mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
257 lines
8.8 KiB
257 lines
8.8 KiB
// TITLE("Compute Checksum")
|
|
//++
|
|
//
|
|
// Copyright (c) 1994 IBM Corporation
|
|
//
|
|
// Module Name:
|
|
//
|
|
// xsum.s
|
|
//
|
|
// Abstract:
|
|
//
|
|
// This module implement a function to compute the checksum of a buffer.
|
|
//
|
|
// Author:
|
|
//
|
|
// David N. Cutler (davec) 27-Jan-1992
|
|
//
|
|
// Environment:
|
|
//
|
|
// User mode.
|
|
//
|
|
// Revision History:
|
|
//
|
|
// Michael W. Thomas 02/14/94 Converted from MIPS
|
|
// Peter L. Johnston 07/19/94 Updated for Daytona Lvl 734 and
|
|
// optimized for PowerPC.
|
|
//
|
|
//--
|
|
|
|
#include "ksppc.h"
|
|
|
|
SBTTL("Compute Checksum")
|
|
//++
|
|
//
|
|
// ULONG
|
|
// tcpxsum (
|
|
// IN ULONG Checksum,
|
|
// IN PUCHAR Source,
|
|
// IN ULONG Length
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function computes the checksum of the specified buffer.
|
|
//
|
|
// N.B. The checksum is the 16 bit checksum of the 16 bit aligned
|
|
// buffer. If the buffer is not 16 bit aligned the first byte is
|
|
// moved to high order position to be added to the correct half.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// Checksum (r3) - Supplies the initial checksum value.
|
|
//
|
|
// Source (r4) - Supplies a pointer to the checksum buffer.
|
|
//
|
|
// Length (r5) - Supplies the length of the buffer in bytes.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// The computed checksum is returned as the function value.
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(tcpxsum)
|
|
|
|
cmpwi r.5, 0 // check if bytes to checksum
|
|
mtcrf 0x01, r.4 // set up for alignment check
|
|
li r.6, 0 // initialize partial checksum
|
|
beqlr- // return if no bytes to checksum
|
|
|
|
andi. r.7, r.5, 1 // check if length is even
|
|
crmove 7, 31 // remember original alignment
|
|
bf 31, evenalign // jif 16 bit aligned
|
|
|
|
//
|
|
// Initialize the checksum to the first byte shifted up a byte.
|
|
//
|
|
lbz r.6, 0(r.4) // get first byte of buffer
|
|
subi r.5, r.5, 1 // reduce count of bytes to checksum
|
|
cmpwi cr.6, r.5, 0 // check if done
|
|
crnot eq, eq // invert odd/even length check
|
|
addi r.4, r.4, 1 // advance buffer address
|
|
mtcrf 0x01, r.4 // reset 32 bit alignment check
|
|
slwi r.6, r.6, 8 // shift byte up in computed checksum
|
|
// max current checksum is 0x0ff00
|
|
beq cr.6, combine // jif no more bytes to checksum
|
|
|
|
evenalign:
|
|
|
|
//
|
|
// Check if the length of the buffer is an even number of bytes.
|
|
//
|
|
// If the buffer is not an even number of bytes, add the last byte to the
|
|
// computed checksum.
|
|
//
|
|
|
|
beq evenlength
|
|
subic. r.5, r.5, 1 // reduce count of bytes to checksum
|
|
lbzx r.7, r.4, r.5 // get last byte from buffer
|
|
add r.6, r.6, r.7 // add last byte to computed checksum
|
|
// max current checksum is 0x0ffff
|
|
beq combine // jif no more bytes in buffer
|
|
|
|
evenlength:
|
|
|
|
//
|
|
// Check if we are 4 byte aligned, if not add first 2 byte word into
|
|
// checksum so the buffer is then 4 byte aligned.
|
|
//
|
|
|
|
bf 30, fourbytealigned // jif 4 byte aligned
|
|
|
|
lhz r.7, 0(r.4) // get 2 byte word
|
|
subic. r.5, r.5, 2 // reduce length
|
|
addi r.4, r.4, 2 // bump address
|
|
add r.6, r.6, r.7 // add 2 bytes to computed checksum
|
|
// max current checksum is 0x1fffe
|
|
beq combine // jif no more bytes to checksum
|
|
|
|
//
|
|
// Attempt to sum the remainder of the buffer in sets of 32 bytes. This
|
|
// should achieve 2 bytes per clock on 601 and 603, and 3.2 bytes per clock
|
|
// on 604. (A seperate implementation will be required to take advantage
|
|
// of 64 bit loads on the 620).
|
|
//
|
|
|
|
fourbytealigned:
|
|
|
|
srwi. r.7, r.5, 5 // get count of 32 byte sets
|
|
mtcrf 0x03, r.5 // break length into block for
|
|
// various run lengths.
|
|
subi r.4, r.4, 4 // adjust buffer address for lwzu
|
|
mtctr r.7
|
|
addic r.6, r.6, 0 // clear carry bit
|
|
beq try16 // jif no 32 byte sets
|
|
|
|
do32: lwz r.8, 4(r.4) // get 1st 4 bytes in set
|
|
lwz r.9, 8(r.4) // get 2nd 4
|
|
adde r.6, r.6, r.8 // add 1st 4 to checksum
|
|
lwz r.10, 12(r.4) // get 3rd 4
|
|
adde r.6, r.6, r.9 // add 2nd 4
|
|
lwz r.11, 16(r.4) // get 4th 4
|
|
adde r.6, r.6, r.10 // add 3rd 4
|
|
lwz r.8, 20(r.4) // get 5th 4
|
|
adde r.6, r.6, r.11 // add 4th 4
|
|
lwz r.9, 24(r.4) // get 6th 4
|
|
adde r.6, r.6, r.8 // add 5th 4
|
|
lwz r.10, 28(r.4) // get 7th 4
|
|
adde r.6, r.6, r.9 // add 6th 4
|
|
lwzu r.11, 32(r.4) // get 8th 4 and update address
|
|
adde r.6, r.6, r.10 // add 7th 4
|
|
adde r.6, r.6, r.11 // add 8th 4
|
|
bdnz do32
|
|
|
|
try16: bf 27, try8 // jif no 16 byte block
|
|
|
|
lwz r.8, 4(r.4) // get 1st 4
|
|
lwz r.9, 8(r.4) // get 2nd 4
|
|
adde r.6, r.6, r.8 // add 1st 4
|
|
lwz r.10, 12(r.4) // get 3rd 4
|
|
adde r.6, r.6, r.9 // add 2nd 4
|
|
lwzu r.11, 16(r.4) // get 4th 4 and update address
|
|
adde r.6, r.6, r.10 // add 3rd 4
|
|
adde r.6, r.6, r.11 // add 4th 4
|
|
|
|
try8: bf 28, try4 // jif no 8 byte block
|
|
lwz r.8, 4(r.4) // get 1st 4
|
|
lwzu r.9, 8(r.4) // get 2nd 4 and update address
|
|
adde r.6, r.6, r.8 // add 1st 4
|
|
adde r.6, r.6, r.9 // add 2nd 4
|
|
|
|
try4: bf 29, try2 // jif no 4 byte block
|
|
lwzu r.8, 4(r.4) // get 4 bytes and update address
|
|
adde r.6, r.6, r.8
|
|
|
|
try2: bf 30, fold // jif no 2 byte block
|
|
|
|
//
|
|
// At this point, r.4 is pointing at the last 4 byte block processed (or
|
|
// not processed if there were no 4 byte blocks). We need to add when we
|
|
// pull the last two bytes.
|
|
//
|
|
lhz r.8, 4(r.4) // get last two bytes
|
|
adde r.6, r.6, r.8 // add last two bytes
|
|
|
|
//
|
|
// Collapse 33 bit (1 carry bit, 32 bits in r.6) into 17 bit checksum.
|
|
//
|
|
|
|
fold: rlwinm r.7, r.6, 16, 0xffff // get 16 most significant bits (upper)
|
|
rlwinm r.6, r.6, 0, 0xffff // get least significant 16 bits (lower)
|
|
adde r.6, r.6, r.7 // upper + lower + carry
|
|
// max current checksum is 0x1ffff
|
|
|
|
//
|
|
// Combine input checksum and partial checksum.
|
|
//
|
|
// If the input buffer was byte aligned, then word swap bytes in computed
|
|
// checksum before combination with input chewcksum.
|
|
//
|
|
|
|
combine:
|
|
|
|
bf 7, waseven // jif original alignment was 16 bit
|
|
|
|
//
|
|
// Swap bytes within upper and lower halves.
|
|
// eg: AA BB CC DD becomes BB AA DD CC
|
|
//
|
|
// As the current maximum partial checksum is 0x1ffff don't worry about AA.
|
|
// ie: want BB 00 DD CC
|
|
//
|
|
|
|
rlwimi r.6, r.6, 16, 0xff000000// r.7 = CC BB CC DD
|
|
rlwinm r.6, r.6, 8, 0xff00ffff// r.7 = BB 00 DD CC
|
|
|
|
waseven:
|
|
|
|
add r.3, r.3, r.6 // combine checksums
|
|
// max current checksum is 0x101fffe
|
|
rotlwi r.4, r.3, 16 // swap checksum words
|
|
add r.3, r.3, r.4 // add words with carry into high word
|
|
srwi r.3, r.3, 16 // extract final checksum
|
|
|
|
LEAF_EXIT(tcpxsum)
|
|
|
|
.debug$S
|
|
.ualong 1
|
|
|
|
.uashort 15
|
|
.uashort 0x9 # S_OBJNAME
|
|
.ualong 0
|
|
.byte 8, "xsum.obj"
|
|
|
|
.uashort 24
|
|
.uashort 0x1 # S_COMPILE
|
|
.byte 0x42 # Target processor = PPC 604
|
|
.byte 3 # Language = ASM
|
|
.byte 0
|
|
.byte 0
|
|
.byte 17, "PowerPC Assembler"
|
|
|
|
.uashort 43
|
|
.uashort 0x205 # S_GPROC32
|
|
.ualong 0
|
|
.ualong 0
|
|
.ualong 0
|
|
.ualong tcpxsum.end-..tcpxsum
|
|
.ualong 0
|
|
.ualong tcpxsum.end-..tcpxsum
|
|
.ualong [secoff]..tcpxsum
|
|
.uashort [secnum]..tcpxsum
|
|
.uashort 0x1000
|
|
.byte 0x00
|
|
.byte 7, "tcpxsum"
|
|
|
|
.uashort 2, 0x6 # S_END
|