Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1458 lines
48 KiB

#if defined(R4000)
// TITLE("LZ Decompression")
//++
//
// Copyright (c) 1994 Microsoft Corporation
//
// Module Name:
//
// lznt1m.s
//
// Abstract:
//
// This module implements the decompression engine needed
// to support file system compression.
//
// Author:
//
// Mark Enstrom (marke) 21-Nov-1994
//
// Environment:
//
// Any mode.
//
// Revision History:
//
//--
#include "ksmips.h"
// #define FORMAT412 0
// #define FORMAT511 1
// #define FORMAT610 2
// #define FORMAT79 3
// #define FORMAT88 4
// #define FORMAT97 5
// #define FORMAT106 6
// #define FORMAT115 7
// #define FORMAT124 8
//
// 4/12 5/11 6/10 7/9 8/8 9/7 10/6 11/5 12/4
//
// ULONG FormatMaxLength[] = { 4098, 2050, 1026, 514, 258, 130, 66, 34, 18 };
// ULONG FormatMaxDisplacement[] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
//
// width table for LZ length and offset encoding
//
#define STATUS_BAD_COMPRESSION_BUFFER 0xC0000242
#define SIZE_OF_HEADER 2
.text
//
// define stack based storage
//
.struct 0
LzS0: .space 4 // saved internal registers s0
LzFrameLength: // Length of Stack frame
.space 4*4 // parameter 0-3 space
LzFinal:.space 4 // argument FinalUncompressedChunkSize
SBTTL("LZKMFastDecompressChunk")
//++
//
// NTSTATUS
// LZKMFastDecompressChunk (
// OUT PUCHAR UncompressedBuffer,
// IN PUCHAR EndOfUncompressedBufferPlus1,
// IN PUCHAR CompressedBuffer,
// IN PUCHAR EndOfCompressedBufferPlus1,
// OUT PULONG FinalUncompressedChunkSize
// )
//
// Routine Description:
//
// This function decodes a stream of compression tokens and places the
// resultant output into the destination buffer. The format of the input
// is described ..\lznt1.c. As the input is decoded, checks are made to
// ensure that no data is read past the end of the compressed input buffer
// and that no data is stored past the end of the output buffer. Violations
// indicate corrupt input and are indicated by a status return.
//
// The following code takes advantage of three distinct observations.
// First, literal tokens occur at least twice as often as copy tokens.
// This argues for having a "fall-through" being the case where a literal
// token is found. We structure the main decomposition loop in eight
// pieces where the first piece is a sequence of literal-test fall-throughs
// and the remainder are a copy token followed by 7,6,...,0 literal-test
// fall-throughs. Each test examines a particular bit in the tag byte
// and jumps to the relevant code piece.
//
// The second observation involves performing bounds checking only
// when needed. Bounds checking the compressed buffer need only be done
// when fetching the tag byte. If there is not enough room left in the
// input for a tag byte and 8 (worst case) copy tokens, a branch is made
// to a second loop that handles a byte-by-byte "safe" copy to finish
// up the decompression. Similarly, at the head of the loop a check is
// made to ensure that there is enough room in the output buffer for 8
// literal bytes. If not enough room is left, then the second loop is
// used. Finally, after performing each copy, the output-buffer check
// is made as well since a copy may take the destination pointer
// arbitrarily close to the end of the destination.
//
// The third observation is an examination of CPU time while disk
// decompression is in progress. CPU utilization is only less than
// 25% peak. This means this routine should be written to minimize
// latency instead of bandwidth. For this reason, taken branches are
// avoided at the cost of code size and loop unrolling is not done.
//
// Arguments:
//
// a0 - UncompressedBuffer - Pointer to start of destination buffer
// a1 - EndOfUncompressedBufferPlus1 - One byte beyond uncompressed buffer
// a2 - CompressedBuffer - Pointer to buffer of compressed data
// a3 - EndOfCompressedBufferPlus1 - One byte beyond compressed buffer
// (sp) - FinalUncompressedChunkSize - return bytes written to
// UncompressedBuffer
//
// Return Value:
//
// None
//
//--
LEAF_ENTRY(LZKMFastDecompressChunk)
//
// save internal registers
//
subu sp,sp,LzFrameLength
sw s0,LzS0(sp)
//
// make copy of UncompressedBuffer for
// current output pointer
//
move t4,a0
//
// skip chunk header in CompressedBuffer
//
add a2,a2,SIZE_OF_HEADER
//
// Initialize variables used in keeping track of the
// LZ Copy Token format. t9 is used to store the maximum
// displacement for each phase of LZ decoding
// (see explanation of format in lzkm.c). This displacement
// is added to the start of the CompressedBuffer address
// so that a boundary crossing can be detected.
//
li t9,0x10 // t9 = Max Displacement for LZ
add t8,t9,a0 // t8 = Format boundary
li t7,0xffff >> 4 // t7 = length mask
li t6,12 // t6 = offset shift count
//
// Initialize variables to track safe copy limits for
// CompressedBuffer and UncopmressedBuffer. This allows
// execution of the quick Flag check below without
// checking for crossing the end of either buffer.
// From CompressedBuffer, one input pass includes 1 flag byte
// and up to 8 two byte copy tokens ( 1+2*8).
// To the un-compressed buffer, 8 literal bytes may be written,
// any copy-token bits set will cause an explicit length check
// in the LzCopy section
//
subu v0,a1,8 // safe end of UncompressedBuffer
subu v1,a3,1+2*8 // safe end of CompressedBuffer
Top:
//
// make sure safe copy can be performed for at least 8 literal bytes
//
bgt a2,v1,SafeCheckStart // safe check
bgt t4,v0,SafeCheckStart // safe check
lbu s0,0(a2) // load flag byte
//
// fall-through for copying 8 bytes. Must set noreorder
// so that the lbu following the bltz will stay in the delay
// slot of the branch.
//
.set noreorder
sll t0,s0,31-0 // shift proper flag bit into sign bit
bltz t0,LzCopy0 // if sign bit is set, go to copy routine
lbu t1,1(a2) // load literal or CopyToken[0]
sll t0,s0,31-1 // shift proper flag bit into sign bit
sb t1,0(t4) // store literal byte to dst
bltz t0,LzCopy1 // if sign bit is set, go to copy routine
lbu t1,2(a2) // load literal or CopyToken[0]
sll t0,s0,31-2 // shift proper flag bit into sign bit
sb t1,1(t4) // store literal byte to dst
bltz t0,LzCopy2 // if sign bit is set, go to copy routine
lbu t1,3(a2) // load literal or CopyToken[0]
sll t0,s0,31-3 // shift proper flag bit into sign bit
sb t1,2(t4) // store literal byte to dst
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
lbu t1,4(a2) // load literal or CopyToken[0]
sll t0,s0,31-4 // shift proper flag bit into sign bit
sb t1,3(t4) // store literal byte to dst
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
lbu t1,5(a2) // load literal or CopyToken[0]
sll t0,s0,31-5 // shift proper flag bit into sign bit
sb t1,4(t4) // store literal byte to dst
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
lbu t1,6(a2) // load literal or CopyToken[0]
sll t0,s0,31-6 // shift proper flag bit into sign bit
sb t1,5(t4) // store literal byte to dst
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy0:
//
// LzCopy0
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,2(a2) // load second byte of copy token
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,7 // seven bits left in current flag byte
add a2,a2,2 // Make a2 point to next src byte
srl s0,s0,1 // shift flag byte into next position
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy. Continue flag check at position 1
//
subu t4,t4,1 // unbias output pointer
sll t0,s0,31-1 // rotate flag bit into sign position
.set noreorder
bltz t0,LzCopy1 // if sign bit is set, go to copy routine
lbu t1,2(a2) // load literal or CopyToken[0]
sll t0,s0,31-2 // shift proper flag bit into sign bit
sb t1,1(t4) // store literal byte to dst
bltz t0,LzCopy2 // if sign bit is set, go to copy routine
lbu t1,3(a2) // load literal or CopyToken[0]
sll t0,s0,31-3 // shift proper flag bit into sign bit
sb t1,2(t4) // store literal byte to dst
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
lbu t1,4(a2) // load literal or CopyToken[0]
sll t0,s0,31-4 // shift proper flag bit into sign bit
sb t1,3(t4) // store literal byte to dst
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
lbu t1,5(a2) // load literal or CopyToken[0]
sll t0,s0,31-5 // shift proper flag bit into sign bit
sb t1,4(t4) // store literal byte to dst
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
lbu t1,6(a2) // load literal or CopyToken[0]
sll t0,s0,31-6 // shift proper flag bit into sign bit
sb t1,5(t4) // store literal byte to dst
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy1:
//
// LzCopy1
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,3(a2) // load second byte of copy token
add t4,t4,1 // mov t4 to point to byte 1
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,6 // six bits left in current flag byte
add a2,a2,3 // Make a2 point to next src byte
srl s0,s0,2 // shift flag byte into position
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy. Continue flag check at position 2
//
subu t4,t4,2 // un-bias input pointer
sll t0,s0,31-2 // rotate flag into position for sign check
.set noreorder
bltz t0,LzCopy2 // if sign bit is set, go to copy routine
lbu t1,3(a2) // load literal or CopyToken[0]
sll t0,s0,31-3 // shift proper flag bit into sign bit
sb t1,2(t4) // store literal byte to dst
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
lbu t1,4(a2) // load literal or CopyToken[0]
sll t0,s0,31-4 // shift proper flag bit into sign bit
sb t1,3(t4) // store literal byte to dst
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
lbu t1,5(a2) // load literal or CopyToken[0]
sll t0,s0,31-5 // shift proper flag bit into sign bit
sb t1,4(t4) // store literal byte to dst
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
lbu t1,6(a2) // load literal or CopyToken[0]
sll t0,s0,31-6 // shift proper flag bit into sign bit
sb t1,5(t4) // store literal byte to dst
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy2:
//
// LzCopy2
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,4(a2) // load second byte of copy token
add t4,t4,2 // mov t4 to point to byte 1
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,5 // five bits left in current flag byte
add a2,a2,4 // Make a2 point to next src byte
srl s0,s0,3 // shift flag byte into positin
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,3 // un-bias output pointer
sll t0,s0,31-3 // rotate flag into position for sign check
.set noreorder
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
lbu t1,4(a2) // load literal or CopyToken[0]
sll t0,s0,31-4 // shift proper flag bit into sign bit
sb t1,3(t4) // store literal byte to dst
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
lbu t1,5(a2) // load literal or CopyToken[0]
sll t0,s0,31-5 // shift proper flag bit into sign bit
sb t1,4(t4) // store literal byte to dst
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
lbu t1,6(a2) // load literal or CopyToken[0]
sll t0,s0,31-6 // shift proper flag bit into sign bit
sb t1,5(t4) // store literal byte to dst
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy3:
//
// LzCopy3
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,5(a2) // load second byte of copy token
add t4,t4,3 // mov t4 to point to byte 1
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,4 // four bits left in current flag byte
add a2,a2,5 // Make a2 point to next src byte
srl s0,s0,4 // shift flag byte into positin
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,4 // un-bias output pointer
sll t0,s0,31-4 // rotate flag into position for sign check
.set noreorder
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
lbu t1,5(a2) // load literal or CopyToken[0]
sll t0,s0,31-5 // shift proper flag bit into sign bit
sb t1,4(t4) // store literal byte to dst
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
lbu t1,6(a2) // load literal or CopyToken[0]
sll t0,s0,31-6 // shift proper flag bit into sign bit
sb t1,5(t4) // store literal byte to dst
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy4:
//
// LzCopy4
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,6(a2) // load second byte of copy token
add t4,t4,4 // mov t4 to point to byte 1
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,3 // three bits left in current flag byte
add a2,a2,6 // Make a2 point to next src byte
srl s0,s0,5 // shift flag byte into positin
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,5 // un-bias output pointer
sll t0,s0,31-5 // rotate flag into position for sign check
.set noreorder
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
lbu t1,6(a2) // load literal or CopyToken[0]
sll t0,s0,31-6 // shift proper flag bit into sign bit
sb t1,5(t4) // store literal byte to dst
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy5:
//
// LzCopy5
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,7(a2) // load second byte of copy token
add t4,t4,5 // mov t4 to point to byte 1
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,2 // two bits left in current flag byte
add a2,a2,7 // Make a2 point to next src byte
srl s0,s0,6 // shift flag byte so that next bit is in positin 0
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,6 // un-bias output pointer
sll t0,s0,31-6 // rotate flag into position for sign check
.set noreorder
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
lbu t1,7(a2) // load literal or CopyToken[0]
sll t0,s0,31-7 // shift proper flag bit into sign bit
sb t1,6(t4) // store literal byte to dst
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy6:
//
// LzCopy6
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,8(a2) // load second byte of copy token
add t4,t4,6 // mov t4 to point to byte 1
add a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,1 // one bit left in current flag byte
add a2,a2,8 // Make a2 point to next src byte
srl s0,s0,7 // shift flag byte into position
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,7 // un-bias output pointer
sll t0,s0,31-7 // rotate flag into position for sign check
.set noreorder
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
lbu t1,8(a2) // load literal or CopyToken[0]
addu a2,a2,9 // inc src addr
sb t1,7(t4)
.set reorder
add t4,t4,8
b Top
LzCopy7:
//
// LzCopy7
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
// This routine is special since it is for the last bit in the flag
// byte. The InputPointer(a2) and OutputPointer(t4) are biased at
// the top of this segment and don't need to be biased again
//
//
lbu t2,9(a2) // load second byte of copy token
add t4,t4,7 // mov t4 to point to byte 7
add a2,a2,10 // a2 points to next actual src byte
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
bgt t4,v0,SafeCheckStart // branch to safe-copy setup
//
// t4 and a2 are alreadt corrected
// jump back tostart of quick loop
//
b Top
//
// Near the end of either compressed or uncompressed buffers,
// check buffer limits before any load or store
//
SafeCheckStart:
beq a2,a3,LzSuccess // check for end of CompressedBuffer
lbu s0,0(a2) // load next flag byte
add a2,a2,1 // inc src addr to literal/CopyFlag[0]
li t5,8 // loop count
SafeCheckLoop:
beq a2,a3,LzSuccess // check for end of CompressedBuffer
beq t4,a1,LzSuccess // check for end of UncompressedBuffer
sll t0,s0,31 // shift flag bit into sign bit
.set noreorder
bltz t0,LzSafeCopy // if sign bit, go to safe copy routine
lbu t1,0(a2) // load literal or CopyToken[0]
.set reorder
add a2,a2,1 // inc CompressedBuffer adr
sb t1,0(t4) // store literal byte
add t4,t4,1 // inc UncompressedBuffer
SafeCheckReentry:
srl s0,s0,1 // move next bit into position
add t5,t5,-1
bne t5,zero,SafeCheckLoop // check for more bits in flag byte
b SafeCheckStart // get next flag byte
LzSafeCopy:
//
// LzSafeCopy
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,1(a2) // load second byte of copy token
add a2,a2,2 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
10:
sub t0,t8,t4 // if t4 < t8 then
bgez t0,20f // branch around boundry adjust
sll t9,t9,1 // next length boundary
add t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
b 10b // re-check boundry
20:
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
add t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
add t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
add t3,t4,t0 // CurrentPointer + Length
ble t3,a1,10f // Fix length if it would over-run buffer
subu t0,a1,t4 // calc new lenght, up to end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
beq t0,zero,30f // skip if length = 0
add t2,t4,t0 // t2 = end address
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
add t4,t4,1 // inc dst addr
add t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess // Done
b SafeCheckReentry // Not done yet, continue with flag check
LzSuccess:
//
// calculate how many bytes have been moved to the uncompressed
// buffer, then set good return value
//
lw t0,LzFinal(sp) // address of variable to receive length
subu t1,t4,a0 // bytes stored
beq t0,zero,10f // don't store if this is NULL
sw t1,0(t0) // store length
10:
move v0,zero // STATUS_SUCCESS
LzComplete:
lw s0,LzS0(sp)
addu sp,sp,LzFrameLength
j ra
//
// fatal error in compressed data format
//
LzCompressError:
li v0,STATUS_BAD_COMPRESSION_BUFFER
b LzComplete
.end LZKMFastDecompressChunk
#endif