// TITLE("Compute Checksum") //++ // // Copyright (c) 1994 IBM Corporation // // Module Name: // // xsum.s // // Abstract: // // This module implement a function to compute the checksum of a buffer. // // Author: // // David N. Cutler (davec) 27-Jan-1992 // // Environment: // // User mode. // // Revision History: // // Michael W. Thomas 02/14/94 Converted from MIPS // Peter L. Johnston 07/19/94 Updated for Daytona Lvl 734 and // optimized for PowerPC. // //-- #include "ksppc.h" SBTTL("Compute Checksum") //++ // // ULONG // tcpxsum ( // IN ULONG Checksum, // IN PUCHAR Source, // IN ULONG Length // ) // // Routine Description: // // This function computes the checksum of the specified buffer. // // N.B. The checksum is the 16 bit checksum of the 16 bit aligned // buffer. If the buffer is not 16 bit aligned the first byte is // moved to high order position to be added to the correct half. // // Arguments: // // Checksum (r3) - Supplies the initial checksum value. // // Source (r4) - Supplies a pointer to the checksum buffer. // // Length (r5) - Supplies the length of the buffer in bytes. // // Return Value: // // The computed checksum is returned as the function value. // //-- LEAF_ENTRY(tcpxsum) cmpwi r.5, 0 // check if bytes to checksum mtcrf 0x01, r.4 // set up for alignment check li r.6, 0 // initialize partial checksum beqlr- // return if no bytes to checksum andi. r.7, r.5, 1 // check if length is even crmove 7, 31 // remember original alignment bf 31, evenalign // jif 16 bit aligned // // Initialize the checksum to the first byte shifted up a byte. // lbz r.6, 0(r.4) // get first byte of buffer subi r.5, r.5, 1 // reduce count of bytes to checksum cmpwi cr.6, r.5, 0 // check if done crnot eq, eq // invert odd/even length check addi r.4, r.4, 1 // advance buffer address mtcrf 0x01, r.4 // reset 32 bit alignment check slwi r.6, r.6, 8 // shift byte up in computed checksum // max current checksum is 0x0ff00 beq cr.6, combine // jif no more bytes to checksum evenalign: // // Check if the length of the buffer is an even number of bytes. // // If the buffer is not an even number of bytes, add the last byte to the // computed checksum. // beq evenlength subic. r.5, r.5, 1 // reduce count of bytes to checksum lbzx r.7, r.4, r.5 // get last byte from buffer add r.6, r.6, r.7 // add last byte to computed checksum // max current checksum is 0x0ffff beq combine // jif no more bytes in buffer evenlength: // // Check if we are 4 byte aligned, if not add first 2 byte word into // checksum so the buffer is then 4 byte aligned. // bf 30, fourbytealigned // jif 4 byte aligned lhz r.7, 0(r.4) // get 2 byte word subic. r.5, r.5, 2 // reduce length addi r.4, r.4, 2 // bump address add r.6, r.6, r.7 // add 2 bytes to computed checksum // max current checksum is 0x1fffe beq combine // jif no more bytes to checksum // // Attempt to sum the remainder of the buffer in sets of 32 bytes. This // should achieve 2 bytes per clock on 601 and 603, and 3.2 bytes per clock // on 604. (A seperate implementation will be required to take advantage // of 64 bit loads on the 620). // fourbytealigned: srwi. r.7, r.5, 5 // get count of 32 byte sets mtcrf 0x03, r.5 // break length into block for // various run lengths. subi r.4, r.4, 4 // adjust buffer address for lwzu mtctr r.7 addic r.6, r.6, 0 // clear carry bit beq try16 // jif no 32 byte sets do32: lwz r.8, 4(r.4) // get 1st 4 bytes in set lwz r.9, 8(r.4) // get 2nd 4 adde r.6, r.6, r.8 // add 1st 4 to checksum lwz r.10, 12(r.4) // get 3rd 4 adde r.6, r.6, r.9 // add 2nd 4 lwz r.11, 16(r.4) // get 4th 4 adde r.6, r.6, r.10 // add 3rd 4 lwz r.8, 20(r.4) // get 5th 4 adde r.6, r.6, r.11 // add 4th 4 lwz r.9, 24(r.4) // get 6th 4 adde r.6, r.6, r.8 // add 5th 4 lwz r.10, 28(r.4) // get 7th 4 adde r.6, r.6, r.9 // add 6th 4 lwzu r.11, 32(r.4) // get 8th 4 and update address adde r.6, r.6, r.10 // add 7th 4 adde r.6, r.6, r.11 // add 8th 4 bdnz do32 try16: bf 27, try8 // jif no 16 byte block lwz r.8, 4(r.4) // get 1st 4 lwz r.9, 8(r.4) // get 2nd 4 adde r.6, r.6, r.8 // add 1st 4 lwz r.10, 12(r.4) // get 3rd 4 adde r.6, r.6, r.9 // add 2nd 4 lwzu r.11, 16(r.4) // get 4th 4 and update address adde r.6, r.6, r.10 // add 3rd 4 adde r.6, r.6, r.11 // add 4th 4 try8: bf 28, try4 // jif no 8 byte block lwz r.8, 4(r.4) // get 1st 4 lwzu r.9, 8(r.4) // get 2nd 4 and update address adde r.6, r.6, r.8 // add 1st 4 adde r.6, r.6, r.9 // add 2nd 4 try4: bf 29, try2 // jif no 4 byte block lwzu r.8, 4(r.4) // get 4 bytes and update address adde r.6, r.6, r.8 try2: bf 30, fold // jif no 2 byte block // // At this point, r.4 is pointing at the last 4 byte block processed (or // not processed if there were no 4 byte blocks). We need to add when we // pull the last two bytes. // lhz r.8, 4(r.4) // get last two bytes adde r.6, r.6, r.8 // add last two bytes // // Collapse 33 bit (1 carry bit, 32 bits in r.6) into 17 bit checksum. // fold: rlwinm r.7, r.6, 16, 0xffff // get 16 most significant bits (upper) rlwinm r.6, r.6, 0, 0xffff // get least significant 16 bits (lower) adde r.6, r.6, r.7 // upper + lower + carry // max current checksum is 0x1ffff // // Combine input checksum and partial checksum. // // If the input buffer was byte aligned, then word swap bytes in computed // checksum before combination with input chewcksum. // combine: bf 7, waseven // jif original alignment was 16 bit // // Swap bytes within upper and lower halves. // eg: AA BB CC DD becomes BB AA DD CC // // As the current maximum partial checksum is 0x1ffff don't worry about AA. // ie: want BB 00 DD CC // rlwimi r.6, r.6, 16, 0xff000000// r.7 = CC BB CC DD rlwinm r.6, r.6, 8, 0xff00ffff// r.7 = BB 00 DD CC waseven: add r.3, r.3, r.6 // combine checksums // max current checksum is 0x101fffe rotlwi r.4, r.3, 16 // swap checksum words add r.3, r.3, r.4 // add words with carry into high word srwi r.3, r.3, 16 // extract final checksum LEAF_EXIT(tcpxsum) .debug$S .ualong 1 .uashort 15 .uashort 0x9 # S_OBJNAME .ualong 0 .byte 8, "xsum.obj" .uashort 24 .uashort 0x1 # S_COMPILE .byte 0x42 # Target processor = PPC 604 .byte 3 # Language = ASM .byte 0 .byte 0 .byte 17, "PowerPC Assembler" .uashort 43 .uashort 0x205 # S_GPROC32 .ualong 0 .ualong 0 .ualong 0 .ualong tcpxsum.end-..tcpxsum .ualong 0 .ualong tcpxsum.end-..tcpxsum .ualong [secoff]..tcpxsum .uashort [secnum]..tcpxsum .uashort 0x1000 .byte 0x00 .byte 7, "tcpxsum" .uashort 2, 0x6 # S_END