Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1325 lines
45 KiB

// TITLE("LZ Decompression")
//++
//
// Copyright (c) 1994 Microsoft Corporation
//
// Module Name:
//
// lznt1m.s
//
// Abstract:
//
// This module implements the decompression engine needed
// to support file system compression.
//
// Author:
//
// Mark Enstrom (marke) 21-Nov-1994
//
// Environment:
//
// Any mode.
//
// Revision History:
//
//--
#include "ksmips.h"
// #define FORMAT412 0
// #define FORMAT511 1
// #define FORMAT610 2
// #define FORMAT79 3
// #define FORMAT88 4
// #define FORMAT97 5
// #define FORMAT106 6
// #define FORMAT115 7
// #define FORMAT124 8
//
// 4/12 5/11 6/10 7/9 8/8 9/7 10/6 11/5 12/4
//
// ULONG FormatMaxLength[] = { 4098, 2050, 1026, 514, 258, 130, 66, 34, 18 };
// ULONG FormatMaxDisplacement[] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
//
// width table for LZ length and offset encoding
//
SBTTL("LZNT1DecompressChunk")
//++
//
// NTSTATUS
// LZNT1DecompressChunk (
// OUT PUCHAR UncompressedBuffer,
// IN PUCHAR EndOfUncompressedBufferPlus1,
// IN PUCHAR CompressedBuffer,
// IN PUCHAR EndOfCompressedBufferPlus1,
// OUT PULONG FinalUncompressedChunkSize
// )
//
// Routine Description:
//
// This function decodes a stream of compression tokens and places the
// resultant output into the destination buffer. The format of the input
// is described ..\lznt1.c. As the input is decoded, checks are made to
// ensure that no data is read past the end of the compressed input buffer
// and that no data is stored past the end of the output buffer. Violations
// indicate corrupt input and are indicated by a status return.
//
// The following code takes advantage of three distinct observations.
// First, literal tokens occur at least twice as often as copy tokens.
// This argues for having a "fall-through" being the case where a literal
// token is found. We structure the main decomposition loop in eight
// pieces where the first piece is a sequence of literal-test fall-throughs
// and the remainder are a copy token followed by 7,6,...,0 literal-test
// fall-throughs. Each test examines a particular bit in the tag byte
// and jumps to the relevant code piece.
//
// The second observation involves performing bounds checking only
// when needed. Bounds checking the compressed buffer need only be done
// when fetching the tag byte. If there is not enough room left in the
// input for a tag byte and 8 (worst case) copy tokens, a branch is made
// to a second loop that handles a byte-by-byte "safe" copy to finish
// up the decompression. Similarly, at the head of the loop a check is
// made to ensure that there is enough room in the output buffer for 8
// literal bytes. If not enough room is left, then the second loop is
// used. Finally, after performing each copy, the output-buffer check
// is made as well since a copy may take the destination pointer
// arbitrarily close to the end of the destination.
//
// The third observation is an examination of CPU time while disk
// decompression is in progress. CPU utilization is only less than
// 25% peak. This means this routine should be written to minimize
// latency instead of bandwidth. For this reason, taken branches are
// avoided at the cost of code size and loop unrolling is not done.
//
// Arguments:
//
// a0 - UncompressedBuffer - Pointer to start of destination buffer
// a1 - EndOfUncompressedBufferPlus1 - One byte beyond uncompressed buffer
// a2 - CompressedBuffer - Pointer to buffer of compressed data (this pointer
// has been adjusted to point past the chunk header)
// a3 - EndOfCompressedBufferPlus1 - One byte beyond compressed buffer
// (sp) - FinalUncompressedChunkSize - return bytes written to
// UncompressedBuffer
//
// Return Value:
//
// None
//
//--
.struct 0
LzS0: .space 4 // saved internal register s0
LzRA: .space 4 // saved internal register ra
.space 4*2 // fill to keep stack 16-byte aligned
LzFrameLength: // Length of Stack frame
.space 4*4 // parameter 0-3 space
LzFinal:.space 4 // argument FinalUncompressedChunkSize
NESTED_ENTRY(LZNT1DecompressChunk, LzFrameLength, zero)
subu sp,sp,LzFrameLength
sw s0,LzS0(sp)
sw ra,LzRA(sp)
PROLOGUE_END
//
// make copy of UncompressedBuffer for
// current output pointer
//
move t4,a0
//
// Initialize variables used in keeping track of the
// LZ Copy Token format. t9 is used to store the maximum
// displacement for each phase of LZ decoding
// (see explanation of format in LZNT1.c). This displacement
// is added to the start of the CompressedBuffer address
// so that a boundary crossing can be detected.
//
li t9,0x10 // t9 = Max Displacement for LZ
addu t8,t9,a0 // t8 = Format boundary
li t7,0xffff >> 4 // t7 = length mask
li t6,12 // t6 = offset shift count
//
// Initialize variables to track safe copy limits for
// CompressedBuffer and UncopmressedBuffer. This allows
// execution of the quick Flag check below without
// checking for crossing the end of either buffer.
// From CompressedBuffer, one input pass includes 1 flag byte
// and up to 8 two byte copy tokens ( 1+2*8).
// To the un-compressed buffer, 8 literal bytes may be written,
// any copy-token bits set will cause an explicit length check
// in the LzCopy section
//
subu v0,a1,8 // safe end of UncompressedBuffer
subu v1,a3,1+2*8 // safe end of CompressedBuffer
Top:
//
// make sure safe copy can be performed for at least 8 literal bytes
//
bgt a2,v1,SafeCheckStart // safe check
bgt t4,v0,SafeCheckStart // safe check
lbu s0,0(a2) // load flag byte
//
// fall-through for copying 8 bytes.
//
sll t0,s0,31-0 // shift proper flag bit into sign bit
lbu t1,1(a2) // load literal or CopyToken[0]
bltz t0,LzCopy0 // if sign bit is set, go to copy routine
sb t1,0(t4) // store literal byte to dst
sll t0,s0,31-1 // shift proper flag bit into sign bit
lbu t1,2(a2) // load literal or CopyToken[0]
bltz t0,LzCopy1 // if sign bit is set, go to copy routine
sb t1,1(t4) // store literal byte to dst
sll t0,s0,31-2 // shift proper flag bit into sign bit
lbu t1,3(a2) // load literal or CopyToken[0]
bltz t0,LzCopy2 // if sign bit is set, go to copy routine
sb t1,2(t4) // store literal byte to dst
sll t0,s0,31-3 // shift proper flag bit into sign bit
lbu t1,4(a2) // load literal or CopyToken[0]
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
sb t1,3(t4) // store literal byte to dst
sll t0,s0,31-4 // shift proper flag bit into sign bit
lbu t1,5(a2) // load literal or CopyToken[0]
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
sb t1,4(t4) // store literal byte to dst
sll t0,s0,31-5 // shift proper flag bit into sign bit
lbu t1,6(a2) // load literal or CopyToken[0]
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
sb t1,5(t4) // store literal byte to dst
sll t0,s0,31-6 // shift proper flag bit into sign bit
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy0:
//
// LzCopy0
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,2(a2) // load second byte of copy token
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,7 // seven bits left in current flag byte
addu a2,a2,2 // Make a2 point to next src byte
srl s0,s0,1 // shift flag byte into next position
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy. Continue flag check at position 1
//
subu t4,t4,1 // unbias output pointer
sll t0,s0,31-1 // rotate flag bit into sign position
lbu t1,2(a2) // load literal or CopyToken[0]
bltz t0,LzCopy1 // if sign bit is set, go to copy routine
sb t1,1(t4) // store literal byte to dst
sll t0,s0,31-2 // shift proper flag bit into sign bit
lbu t1,3(a2) // load literal or CopyToken[0]
bltz t0,LzCopy2 // if sign bit is set, go to copy routine
sb t1,2(t4) // store literal byte to dst
sll t0,s0,31-3 // shift proper flag bit into sign bit
lbu t1,4(a2) // load literal or CopyToken[0]
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
sb t1,3(t4) // store literal byte to dst
sll t0,s0,31-4 // shift proper flag bit into sign bit
lbu t1,5(a2) // load literal or CopyToken[0]
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
sb t1,4(t4) // store literal byte to dst
sll t0,s0,31-5 // shift proper flag bit into sign bit
lbu t1,6(a2) // load literal or CopyToken[0]
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
sb t1,5(t4) // store literal byte to dst
sll t0,s0,31-6 // shift proper flag bit into sign bit
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy1:
//
// LzCopy1
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,3(a2) // load second byte of copy token
addu t4,t4,1 // mov t4 to point to byte 1
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,6 // six bits left in current flag byte
addu a2,a2,3 // Make a2 point to next src byte
srl s0,s0,2 // shift flag byte into position
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy. Continue flag check at position 2
//
subu t4,t4,2 // un-bias input pointer
sll t0,s0,31-2 // rotate flag into position for sign check
lbu t1,3(a2) // load literal or CopyToken[0]
bltz t0,LzCopy2 // if sign bit is set, go to copy routine
sb t1,2(t4) // store literal byte to dst
sll t0,s0,31-3 // shift proper flag bit into sign bit
lbu t1,4(a2) // load literal or CopyToken[0]
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
sb t1,3(t4) // store literal byte to dst
sll t0,s0,31-4 // shift proper flag bit into sign bit
lbu t1,5(a2) // load literal or CopyToken[0]
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
sb t1,4(t4) // store literal byte to dst
sll t0,s0,31-5 // shift proper flag bit into sign bit
lbu t1,6(a2) // load literal or CopyToken[0]
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
sb t1,5(t4) // store literal byte to dst
sll t0,s0,31-6 // shift proper flag bit into sign bit
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy2:
//
// LzCopy2
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,4(a2) // load second byte of copy token
addu t4,t4,2 // mov t4 to point to byte 1
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,5 // five bits left in current flag byte
addu a2,a2,4 // Make a2 point to next src byte
srl s0,s0,3 // shift flag byte into positin
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,3 // un-bias output pointer
sll t0,s0,31-3 // rotate flag into position for sign check
lbu t1,4(a2) // load literal or CopyToken[0]
bltz t0,LzCopy3 // if sign bit is set, go to copy routine
sb t1,3(t4) // store literal byte to dst
sll t0,s0,31-4 // shift proper flag bit into sign bit
lbu t1,5(a2) // load literal or CopyToken[0]
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
sb t1,4(t4) // store literal byte to dst
sll t0,s0,31-5 // shift proper flag bit into sign bit
lbu t1,6(a2) // load literal or CopyToken[0]
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
sb t1,5(t4) // store literal byte to dst
sll t0,s0,31-6 // shift proper flag bit into sign bit
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy3:
//
// LzCopy3
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,5(a2) // load second byte of copy token
addu t4,t4,3 // mov t4 to point to byte 1
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,4 // four bits left in current flag byte
addu a2,a2,5 // Make a2 point to next src byte
srl s0,s0,4 // shift flag byte into positin
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,4 // un-bias output pointer
sll t0,s0,31-4 // rotate flag into position for sign check
lbu t1,5(a2) // load literal or CopyToken[0]
bltz t0,LzCopy4 // if sign bit is set, go to copy routine
sb t1,4(t4) // store literal byte to dst
sll t0,s0,31-5 // shift proper flag bit into sign bit
lbu t1,6(a2) // load literal or CopyToken[0]
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
sb t1,5(t4) // store literal byte to dst
sll t0,s0,31-6 // shift proper flag bit into sign bit
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy4:
//
// LzCopy4
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,6(a2) // load second byte of copy token
addu t4,t4,4 // mov t4 to point to byte 1
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,3 // three bits left in current flag byte
addu a2,a2,6 // Make a2 point to next src byte
srl s0,s0,5 // shift flag byte so that next bit is in positin 0
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,5 // un-bias output pointer
sll t0,s0,31-5 // rotate flag into position for sign check
lbu t1,6(a2) // load literal or CopyToken[0]
bltz t0,LzCopy5 // if sign bit is set, go to copy routine
sb t1,5(t4) // store literal byte to dst
sll t0,s0,31-6 // shift proper flag bit into sign bit
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy5:
//
// LzCopy5
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,7(a2) // load second byte of copy token
addu t4,t4,5 // mov t4 to point to byte 1
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,2 // two bits left in current flag byte
addu a2,a2,7 // Make a2 point to next src byte
srl s0,s0,6 // shift flag byte so that next bit is in positin 0
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,6 // un-bias output pointer
sll t0,s0,31-6 // rotate flag into position for sign check
lbu t1,7(a2) // load literal or CopyToken[0]
bltz t0,LzCopy6 // if sign bit is set, go to copy routine
sb t1,6(t4) // store literal byte to dst
sll t0,s0,31-7 // shift proper flag bit into sign bit
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy6:
//
// LzCopy6
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
lbu t2,8(a2) // load second byte of copy token
addu t4,t4,6 // mov t4 to point to byte 1
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
ble t4,v0,10f // skip if still in safe boundry
li t5,1 // one bit left in current flag byte
addu a2,a2,8 // Make a2 point to next src byte
srl s0,s0,7 // shift flag byte into position
b SafeCheckLoop
10:
//
// adjust t4 back to position it would be if this was a liternal byte
// copy
// continue flag check at position 1 (could duplicate LzQuick switch 1-7 here)
//
subu t4,t4,7 // un-bias output pointer
sll t0,s0,31-7 // rotate flag into position for sign check
lbu t1,8(a2) // load literal or CopyToken[0]
bltz t0,LzCopy7 // if sign bit is set, go to copy routine
sb t1,7(t4)
addu a2,a2,9 // inc src addr
addu t4,t4,8
b Top
LzCopy7:
//
// LzCopy7
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of current flag byte
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
// This routine is special since it is for the last bit in the flag
// byte. The InputPointer(a2) and OutputPointer(t4) are biased at
// the top of this segment and don't need to be biased again
//
//
lbu t2,9(a2) // load second byte of copy token
addu t4,t4,7 // mov t4 to point to byte 7
addu a2,a2,10 // a2 points to next actual src byte
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length (End Address)
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess
//
// if t4 > Safe end of uncomressed buffer, then jump to the
// safe (slow) routine to do safety check before every load/store
//
bgt t4,v0,SafeCheckStart // branch to safe-copy setup
//
// t4 and a2 are alreadt corrected
// jump back tostart of quick loop
//
b Top
//
// Near the end of either compressed or uncompressed buffers,
// check buffer limits before any load or store
//
SafeCheckStart:
beq a2,a3,LzSuccess // check for end of CompressedBuffer
lbu s0,0(a2) // load next flag byte
addu a2,a2,1 // inc src addr to literal/CopyFlag[0]
li t5,8 // loop count
SafeCheckLoop:
beq a2,a3,LzSuccess // check for end of CompressedBuffer
beq t4,a1,LzSuccess // check for end of UncompressedBuffer
sll t0,s0,31 // shift flag bit into sign bit
lbu t1,0(a2) // load literal or CopyToken[0]
addu a2,a2,1 // inc CompressedBuffer adr
bltz t0,LzSafeCopy // if sign bit, go to safe copy routine
sb t1,0(t4) // store literal byte
addu t4,t4,1 // inc UncompressedBuffer
SafeCheckReentry:
srl s0,s0,1 // move next bit into position
addu t5,t5,-1
bne t5,zero,SafeCheckLoop // check for more bits in flag byte
b SafeCheckStart // get next flag byte
LzSafeCopy:
//
// LzSafeCopy
//
// t1 - CopyToken[0]
// a2 - CompressedBuffer address of CopyToken[1]
// t4 - UncomressedBuffer address at start of flag byte check
// s0 - Flag byte
//
// load copy token, (first byte already loaded in delay slot),
// then combine into a 16 bit field
//
beq a2,a3,LzCompressError // check for end of CompressedBuffer, error case
lbu t2,0(a2) // load second byte of copy token
addu a2,a2,1 // fix-up src addr for return to switch
sll t2,t2,8 // shift second byte into high 16
or t2,t1,t2 // combine
//
// Check for a breach of the format boundary.
//
subu t0,t8,t4 // if t4 < t8 then
bltzal t0,LzAdjustBoundary // branch to boundry adjust and link return
//
// Extract offset and length from copy token
//
and t0,t2,t7 // t0 = length from field
addu t0,t0,3 // t0 = real length
srl t1,t2,t6 // t1 = offset
addu t1,t1,1 // t1 = real offset
//
// Make sure offset doesn't go below start of uncompressed buffer
//
subu t2,t4,a0 // t2 = current offset into output buffer
bgt t1,t2,LzCompressError // error in compressed data
//
// check if length will not go up to or beyond actual uncompressed buffer length
//
addu t2,t4,t0 // CurrentPointer + Length
ble t2,a1,10f // Fix length if it would over-run buffer
move t2,a1 // new length is end of buffer
10:
//
// copy t0 bytes bytes from [t4-t1] to [t4]
//
subu t3,t4,t1 // t1 = OutputPointer - Offset
20:
lbu t0,0(t3) // load src
sb t0,0(t4) // store to dst
addu t4,t4,1 // inc dst addr
addu t3,t3,1 // inc src addr
bne t4,t2,20b // loop till done
//
// if t4 = a1, then we are up to the end of the uncompressed buffer.
// return success
//
30:
beq t4,a1,LzSuccess // Done
b SafeCheckReentry // Not done yet, continue with flag check
LzSuccess:
//
// calculate how many bytes have been moved to the uncompressed
// buffer, then set good return value
//
lw t0,LzFinal(sp) // address of variable to receive length
subu t1,t4,a0 // bytes stored
sw t1,0(t0) // store length
10:
li v0,STATUS_SUCCESS // STATUS_SUCCESS
LzComplete:
lw ra,LzRA(sp)
lw s0,LzS0(sp)
addu sp,sp,LzFrameLength
j ra
//
// fatal error in compressed data format
//
LzCompressError:
li v0,STATUS_BAD_COMPRESSION_BUFFER
b LzComplete
//
// at least one format boundry has been crossed, set up new bouandry
// then jump back to the check routine to make sure new boundry is
// correct
//
LzAdjustBoundary:
sll t9,t9,1 // next length boundary
addu t8,t9,a0 // t8 = next offset boundary
srl t7,t7,1 // reduce width of length mask
subu t6,t6,1 // reduce shift count to isolate offset
subu t0,t8,t4 // if t4 < t8 then
bltz t0,LzAdjustBoundary // branch to boundry adjust, re-check
j ra
.end LZNT1DecompressChunk