Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

257 lines
8.8 KiB

// TITLE("Compute Checksum")
//++
//
// Copyright (c) 1994 IBM Corporation
//
// Module Name:
//
// xsum.s
//
// Abstract:
//
// This module implement a function to compute the checksum of a buffer.
//
// Author:
//
// David N. Cutler (davec) 27-Jan-1992
//
// Environment:
//
// User mode.
//
// Revision History:
//
// Michael W. Thomas 02/14/94 Converted from MIPS
// Peter L. Johnston 07/19/94 Updated for Daytona Lvl 734 and
// optimized for PowerPC.
//
//--
#include "ksppc.h"
SBTTL("Compute Checksum")
//++
//
// ULONG
// tcpxsum (
// IN ULONG Checksum,
// IN PUCHAR Source,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function computes the checksum of the specified buffer.
//
// N.B. The checksum is the 16 bit checksum of the 16 bit aligned
// buffer. If the buffer is not 16 bit aligned the first byte is
// moved to high order position to be added to the correct half.
//
// Arguments:
//
// Checksum (r3) - Supplies the initial checksum value.
//
// Source (r4) - Supplies a pointer to the checksum buffer.
//
// Length (r5) - Supplies the length of the buffer in bytes.
//
// Return Value:
//
// The computed checksum is returned as the function value.
//
//--
LEAF_ENTRY(tcpxsum)
cmpwi r.5, 0 // check if bytes to checksum
mtcrf 0x01, r.4 // set up for alignment check
li r.6, 0 // initialize partial checksum
beqlr- // return if no bytes to checksum
andi. r.7, r.5, 1 // check if length is even
crmove 7, 31 // remember original alignment
bf 31, evenalign // jif 16 bit aligned
//
// Initialize the checksum to the first byte shifted up a byte.
//
lbz r.6, 0(r.4) // get first byte of buffer
subi r.5, r.5, 1 // reduce count of bytes to checksum
cmpwi cr.6, r.5, 0 // check if done
crnot eq, eq // invert odd/even length check
addi r.4, r.4, 1 // advance buffer address
mtcrf 0x01, r.4 // reset 32 bit alignment check
slwi r.6, r.6, 8 // shift byte up in computed checksum
// max current checksum is 0x0ff00
beq cr.6, combine // jif no more bytes to checksum
evenalign:
//
// Check if the length of the buffer is an even number of bytes.
//
// If the buffer is not an even number of bytes, add the last byte to the
// computed checksum.
//
beq evenlength
subic. r.5, r.5, 1 // reduce count of bytes to checksum
lbzx r.7, r.4, r.5 // get last byte from buffer
add r.6, r.6, r.7 // add last byte to computed checksum
// max current checksum is 0x0ffff
beq combine // jif no more bytes in buffer
evenlength:
//
// Check if we are 4 byte aligned, if not add first 2 byte word into
// checksum so the buffer is then 4 byte aligned.
//
bf 30, fourbytealigned // jif 4 byte aligned
lhz r.7, 0(r.4) // get 2 byte word
subic. r.5, r.5, 2 // reduce length
addi r.4, r.4, 2 // bump address
add r.6, r.6, r.7 // add 2 bytes to computed checksum
// max current checksum is 0x1fffe
beq combine // jif no more bytes to checksum
//
// Attempt to sum the remainder of the buffer in sets of 32 bytes. This
// should achieve 2 bytes per clock on 601 and 603, and 3.2 bytes per clock
// on 604. (A seperate implementation will be required to take advantage
// of 64 bit loads on the 620).
//
fourbytealigned:
srwi. r.7, r.5, 5 // get count of 32 byte sets
mtcrf 0x03, r.5 // break length into block for
// various run lengths.
subi r.4, r.4, 4 // adjust buffer address for lwzu
mtctr r.7
addic r.6, r.6, 0 // clear carry bit
beq try16 // jif no 32 byte sets
do32: lwz r.8, 4(r.4) // get 1st 4 bytes in set
lwz r.9, 8(r.4) // get 2nd 4
adde r.6, r.6, r.8 // add 1st 4 to checksum
lwz r.10, 12(r.4) // get 3rd 4
adde r.6, r.6, r.9 // add 2nd 4
lwz r.11, 16(r.4) // get 4th 4
adde r.6, r.6, r.10 // add 3rd 4
lwz r.8, 20(r.4) // get 5th 4
adde r.6, r.6, r.11 // add 4th 4
lwz r.9, 24(r.4) // get 6th 4
adde r.6, r.6, r.8 // add 5th 4
lwz r.10, 28(r.4) // get 7th 4
adde r.6, r.6, r.9 // add 6th 4
lwzu r.11, 32(r.4) // get 8th 4 and update address
adde r.6, r.6, r.10 // add 7th 4
adde r.6, r.6, r.11 // add 8th 4
bdnz do32
try16: bf 27, try8 // jif no 16 byte block
lwz r.8, 4(r.4) // get 1st 4
lwz r.9, 8(r.4) // get 2nd 4
adde r.6, r.6, r.8 // add 1st 4
lwz r.10, 12(r.4) // get 3rd 4
adde r.6, r.6, r.9 // add 2nd 4
lwzu r.11, 16(r.4) // get 4th 4 and update address
adde r.6, r.6, r.10 // add 3rd 4
adde r.6, r.6, r.11 // add 4th 4
try8: bf 28, try4 // jif no 8 byte block
lwz r.8, 4(r.4) // get 1st 4
lwzu r.9, 8(r.4) // get 2nd 4 and update address
adde r.6, r.6, r.8 // add 1st 4
adde r.6, r.6, r.9 // add 2nd 4
try4: bf 29, try2 // jif no 4 byte block
lwzu r.8, 4(r.4) // get 4 bytes and update address
adde r.6, r.6, r.8
try2: bf 30, fold // jif no 2 byte block
//
// At this point, r.4 is pointing at the last 4 byte block processed (or
// not processed if there were no 4 byte blocks). We need to add when we
// pull the last two bytes.
//
lhz r.8, 4(r.4) // get last two bytes
adde r.6, r.6, r.8 // add last two bytes
//
// Collapse 33 bit (1 carry bit, 32 bits in r.6) into 17 bit checksum.
//
fold: rlwinm r.7, r.6, 16, 0xffff // get 16 most significant bits (upper)
rlwinm r.6, r.6, 0, 0xffff // get least significant 16 bits (lower)
adde r.6, r.6, r.7 // upper + lower + carry
// max current checksum is 0x1ffff
//
// Combine input checksum and partial checksum.
//
// If the input buffer was byte aligned, then word swap bytes in computed
// checksum before combination with input chewcksum.
//
combine:
bf 7, waseven // jif original alignment was 16 bit
//
// Swap bytes within upper and lower halves.
// eg: AA BB CC DD becomes BB AA DD CC
//
// As the current maximum partial checksum is 0x1ffff don't worry about AA.
// ie: want BB 00 DD CC
//
rlwimi r.6, r.6, 16, 0xff000000// r.7 = CC BB CC DD
rlwinm r.6, r.6, 8, 0xff00ffff// r.7 = BB 00 DD CC
waseven:
add r.3, r.3, r.6 // combine checksums
// max current checksum is 0x101fffe
rotlwi r.4, r.3, 16 // swap checksum words
add r.3, r.3, r.4 // add words with carry into high word
srwi r.3, r.3, 16 // extract final checksum
LEAF_EXIT(tcpxsum)
.debug$S
.ualong 1
.uashort 15
.uashort 0x9 # S_OBJNAME
.ualong 0
.byte 8, "xsum.obj"
.uashort 24
.uashort 0x1 # S_COMPILE
.byte 0x42 # Target processor = PPC 604
.byte 3 # Language = ASM
.byte 0
.byte 0
.byte 17, "PowerPC Assembler"
.uashort 43
.uashort 0x205 # S_GPROC32
.ualong 0
.ualong 0
.ualong 0
.ualong tcpxsum.end-..tcpxsum
.ualong 0
.ualong tcpxsum.end-..tcpxsum
.ualong [secoff]..tcpxsum
.uashort [secnum]..tcpxsum
.uashort 0x1000
.byte 0x00
.byte 7, "tcpxsum"
.uashort 2, 0x6 # S_END