mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1117 lines
33 KiB
1117 lines
33 KiB
/*
|
|
|
|
Decoding is splitted into two phases:
|
|
|
|
1. Fast decoding. Check bounds rarely (when loading new tag and after copying
|
|
of a long string) and switch into Careful only when we are too close to the
|
|
end of input or output buffer.
|
|
|
|
2. Careful decoding. Before performing any memory access all bounds are checked
|
|
to make sure no buffer overrun or underrun will happen. Careful decoding is
|
|
usually 1.5 times slower than Fast one, but only last several hundred bytes
|
|
are decoded this way; all the rest is decoded Fast.
|
|
|
|
As long as decoding code is essentially the same except for bounds checks that
|
|
differ in Fast and Careful mode, in order to avoid code duplication this file
|
|
is included twice with different setting of CAREFUL macro (first it is 0, then
|
|
1).
|
|
|
|
Run "cl -EP xdecode.c >xdecode.pp" to see actual code.
|
|
|
|
*/
|
|
|
|
#if _MSC_FULL_VER >= 13008827 && defined(_M_IX86)
|
|
#pragma warning(disable:4731) // EBP modified with inline asm
|
|
#endif
|
|
|
|
#if CAREFUL
|
|
|
|
#define LABEL(label) careful_##label
|
|
#define CAREFUL_OK_IF(cond) if (cond) RET_OK
|
|
#define CAREFUL_ERR_IF(cond) if (cond) RET_ERR
|
|
#define CAREFUL_EOF_IF(cond) if (cond) goto ret_ok_eof;
|
|
#define CAREFUL_IF(cond, label) label:
|
|
|
|
#if CODING & (CODING_HUFF_LEN | CODING_HUFF_PTR | CODING_HUFF_ALL)
|
|
#define START careful_start:
|
|
#else
|
|
#define START
|
|
#endif
|
|
|
|
#else /* !CAREFUL */
|
|
|
|
#define LABEL(label) label
|
|
#define CAREFUL_OK_IF(cond)
|
|
#define CAREFUL_ERR_IF(cond)
|
|
#define CAREFUL_EOF_IF(cond)
|
|
#define CAREFUL_IF(cond, label) if (cond) goto label
|
|
#define START start:
|
|
|
|
static void do_decode (decode_info *info)
|
|
{
|
|
#endif
|
|
|
|
|
|
|
|
/* ----------------------- CODING_HUFF_ALL ------------------------ */
|
|
/* --------------- */
|
|
|
|
// C code: 26.3 MB/s, asm code: 32.3 MB/s at P3-500
|
|
|
|
#if CODING == CODING_HUFF_ALL
|
|
|
|
#ifndef i386
|
|
|
|
#if !CAREFUL
|
|
ubitmask4 Mask;
|
|
bits_t Bits, bits;
|
|
xint len;
|
|
uxint ofs;
|
|
uchar *dst = info->dst.beg;
|
|
const uchar *src = info->src.beg;
|
|
|
|
Mask = * (__unaligned ubitmask2 *) src; src += sizeof (ubitmask2);
|
|
Mask <<= sizeof (ubitmask2) * 8;
|
|
Mask += * (__unaligned ubitmask2 *) src; src += sizeof (ubitmask2);
|
|
Bits = 8 * sizeof (ubitmask2);
|
|
|
|
if (src >= info->src.careful || dst >= info->dst.careful)
|
|
goto careful_next;
|
|
goto LABEL(next);
|
|
#endif /* CAREFUL */
|
|
|
|
LABEL(decode_more):
|
|
// too close to end of buffer? -- switch to careful mode...
|
|
CAREFUL_IF (src >= info->src.careful, decode_more1);
|
|
CAREFUL_IF (dst >= info->dst.careful, decode_more2);
|
|
|
|
// input buffer ovverrun? -- corrupted compressed data
|
|
CAREFUL_ERR_IF (src >= info->src.end_bitmask2);
|
|
|
|
// read 16 bits more and update Mask&Bits respectively
|
|
bits = Bits;
|
|
ofs = * (__unaligned ubitmask2 *) src;
|
|
bits = (bits_t) (-bits);
|
|
src += sizeof (ubitmask2);
|
|
ofs <<= bits;
|
|
Bits += 8 * sizeof (ubitmask2);
|
|
Mask += (ubitmask4) ofs;
|
|
|
|
if ((len -= 256) >= 0)
|
|
goto LABEL (pointer);
|
|
|
|
for (;;)
|
|
{
|
|
CAREFUL_OK_IF (dst >= info->dst.stop);
|
|
*dst = (uchar) len; // copy literal byte to output
|
|
ofs = (uxint) (Mask >> (8 * sizeof (Mask) - DECODE_BITS));
|
|
++dst;
|
|
len = ((int16 *) info->table)[ofs];
|
|
bits = 15;
|
|
|
|
if (len < 0)
|
|
goto LABEL(long_codeword);
|
|
|
|
// short codeword -- already decoded
|
|
|
|
bits &= len; // bits = # of bit used in Mask
|
|
len >>= 4; // len = token
|
|
|
|
Mask <<= bits; // update Mask&Bits
|
|
Bits = (bits_t) (Bits - bits); // read more bits if necessary
|
|
if (Bits < 0)
|
|
goto LABEL (decode_more);
|
|
|
|
if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
|
|
continue;
|
|
goto LABEL (pointer);
|
|
|
|
LABEL(next): // decode next token via lookup table
|
|
ofs = (uxint) (Mask >> (8 * sizeof (Mask) - DECODE_BITS));
|
|
len = ((int16 *) info->table)[ofs];
|
|
bits = 15;
|
|
|
|
if (len >= 0)
|
|
{
|
|
// short codeword -- already decoded
|
|
|
|
bits &= len; // bits = # of bit used in Mask
|
|
len >>= 4; // len = token
|
|
|
|
Mask <<= bits; // update Mask&Bits
|
|
Bits = (bits_t) (Bits - bits); // read more bits if necessary
|
|
if (Bits < 0)
|
|
goto LABEL (decode_more);
|
|
|
|
if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
LABEL (long_codeword):
|
|
// long codeword -- decode bit by bit
|
|
|
|
Mask <<= DECODE_BITS; // DECODE_BITS alreay parsed
|
|
|
|
do
|
|
{
|
|
len += ((bitmask4) Mask) < 0; // len += MSB (Mask)
|
|
Mask <<= 1; // 1 more bit was used
|
|
len = ((int16 *) info->table)[len + 0x8000];
|
|
}
|
|
while (len < 0);
|
|
|
|
bits &= len; // bits = # of bit used in Mask
|
|
len >>= 4; // len = token
|
|
|
|
Bits = (bits_t) (Bits - bits); // read more bits if necessary
|
|
if (Bits < 0)
|
|
goto LABEL (decode_more);
|
|
|
|
if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
|
|
continue;
|
|
}
|
|
|
|
LABEL(pointer):
|
|
|
|
CAREFUL_EOF_IF (dst >= info->dst.stop);
|
|
|
|
bits = (bits_t) (len >> MAX_LENGTH_LOG); // # of bits in offset
|
|
ofs = (uxint) ((Mask >> 1) | (((ubitmask4) 1) << (8 * sizeof (Mask) - 1)));
|
|
|
|
Mask <<= bits; // update Mask and Bits
|
|
Bits = (bits_t) (Bits - bits);
|
|
|
|
bits ^= 8 * sizeof (ofs) - 1; // bits = 31 - bits
|
|
len &= MAX_LENGTH - 1; // run length - MIN_MATCH
|
|
ofs >>= bits; // ofs = (1<<bits) | (Mask<<(32-bits))
|
|
|
|
info->src.last = src; // save src
|
|
ofs = (uxint) (- (xint) ofs); // ofs = real negative offset
|
|
|
|
#if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
|
|
if (len <= 8-MIN_MATCH)
|
|
{
|
|
src = dst + ofs; // src = beginning of string
|
|
|
|
#ifdef _M_IX86 // unligned access is faster only on x86
|
|
if (ofs < ~2)
|
|
{
|
|
if (src < info->dst.beg) // buffer underrun? -- corrupted data
|
|
RET_ERR;
|
|
|
|
ofs = ((__unaligned uint32 *) src)[0]; // copy 8 bytes
|
|
((__unaligned uint32 *) dst)[0] = ofs;
|
|
ofs = ((__unaligned uint32 *) src)[1];
|
|
((__unaligned uint32 *) dst)[1] = ofs;
|
|
|
|
src = info->src.last; // restore src
|
|
dst = dst + len + MIN_MATCH; // dst = next output position
|
|
|
|
if (Bits >= 0) // have enough Bits in Mask? -- proceed further
|
|
goto LABEL (next);
|
|
goto LABEL (mask_more); // otherwise, read more bits
|
|
}
|
|
#endif
|
|
|
|
if (src < info->dst.beg) // buffer underrun? -- corrupted data
|
|
RET_ERR;
|
|
|
|
COPY_8_BYTES (dst, src); // copy 8 bytes one by one
|
|
// NB: dst & src may overlap
|
|
src = info->src.last; // restore src
|
|
dst = dst + len + MIN_MATCH; // dst = next output position
|
|
|
|
if (Bits >= 0) // have enough Bits in Mask? -- proceed further
|
|
goto LABEL (next);
|
|
goto LABEL (mask_more); // otherwise, read more bits
|
|
}
|
|
#endif /* CAREFUL */
|
|
|
|
if (len == MAX_LENGTH - 1) // long length? -- decode it
|
|
{
|
|
// if input data overrun then compressed data corrupted
|
|
CAREFUL_ERR_IF (src >= info->src.end);
|
|
|
|
len = *src++ + (MAX_LENGTH-1);
|
|
if (len == 255 + MAX_LENGTH-1)
|
|
{
|
|
CAREFUL_ERR_IF (src >= info->src.end_1);
|
|
len = * (__unaligned uint16 *) src;
|
|
src += 2;
|
|
if (len < 255 + MAX_LENGTH-1) // length should be large enough
|
|
RET_ERR;
|
|
}
|
|
|
|
info->src.last = src; // save input buffer pointer
|
|
}
|
|
|
|
len += MIN_MATCH; // len = actual length
|
|
src = dst + ofs; // src = pointer to the beginning of string
|
|
dst += len; // dst = last output position
|
|
|
|
if (src < info->dst.beg) // buffer underrun? -- corrupted data
|
|
RET_ERR;
|
|
|
|
#if !CAREFUL
|
|
if (dst >= info->dst.careful)
|
|
goto careful_check_overrun;
|
|
#else
|
|
careful_check_overrun:
|
|
if (dst > info->dst.stop) // more to copy than necessary?
|
|
{
|
|
dst -= len; // dst = first output position
|
|
len = info->dst.stop - dst; // len = max length to copy
|
|
COPY_BLOCK_SLOW (dst, src, len); // copy last run
|
|
src = info->src.last; // restore input buffer pointer
|
|
RET_OK; // OK but no EOF mark was found
|
|
}
|
|
#endif
|
|
|
|
dst -= len; // dst = first output position
|
|
|
|
COPY_BLOCK_SLOW (dst, src, len); // input & output may overlap -- copy byte by byte
|
|
|
|
src = info->src.last; // restore input buffer pointer
|
|
|
|
CAREFUL_IF (dst >= info->dst.careful, copy1);
|
|
|
|
if (Bits >= 0) // enough Bits in Mask?
|
|
goto LABEL (next); // decode next token
|
|
|
|
#if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
|
|
LABEL(mask_more):
|
|
#endif
|
|
|
|
// have 2 more bytes in input buffer?
|
|
CAREFUL_ERR_IF (src >= info->src.end_bitmask2);
|
|
|
|
// read 16 bits more and update Mask&Bits respectively
|
|
bits = Bits;
|
|
ofs = * (__unaligned ubitmask2 *) src;
|
|
bits = (bits_t) (-bits);
|
|
src += sizeof (ubitmask2);
|
|
ofs <<= bits;
|
|
Bits += 8 * sizeof (ubitmask2);
|
|
Mask += (ubitmask4) ofs;
|
|
|
|
goto LABEL (next); // decode next token
|
|
|
|
} /* of for(;;) */
|
|
|
|
#if CAREFUL
|
|
ret_ok_eof:
|
|
if (dst == info->dst.end && len == 0)
|
|
info->eof = 1;
|
|
ret_ok:
|
|
info->src.last = src;
|
|
info->dst.last = dst;
|
|
info->result = 1;
|
|
return;
|
|
|
|
ret_err:
|
|
info->result = 0;
|
|
return;
|
|
#endif /* CAREFUL */
|
|
|
|
|
|
#else /* ---------------------- defined i386 --------------------- */
|
|
|
|
#if !CAREFUL
|
|
|
|
__asm
|
|
{
|
|
mov eax,info ; save info
|
|
|
|
push esi ; save registers
|
|
push edi
|
|
push edx
|
|
push ecx
|
|
push ebx
|
|
push ebp
|
|
|
|
mov ebp,eax ; (ebp) = info
|
|
mov ebx,[ebp].src.beg ; (ebx) = src
|
|
mov edi,[ebp].dst.beg ; (edx) = dst
|
|
|
|
xor esi,esi ; initialize Mask
|
|
mov si,[ebx]
|
|
shl esi,16
|
|
mov si,[ebx+2]
|
|
add ebx,4
|
|
|
|
mov ch,16 ; (ch) = Bits = 16
|
|
|
|
cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
|
|
jae careful_next ; yes, be careful...
|
|
cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
|
|
jae careful_next ; yes, be careful...
|
|
|
|
jmp LABEL (next)
|
|
#endif /* CAREFUL */
|
|
|
|
LABEL(literal):
|
|
#if CAREFUL
|
|
cmp edi,[ebp].dst.stop ; decoded as much as asked?
|
|
jae ret_ok ; done, but no EOF mark
|
|
#endif
|
|
mov edx,esi ; (edx) = Mask
|
|
mov [edi],al ; store literal byte
|
|
shr edx,32-DECODE_BITS
|
|
inc edi ; (edi) = next output position
|
|
movsx eax,word ptr [ebp+edx*2].table ; (eax) = respective decode table entry
|
|
mov cl,15 ; (cl) = 15
|
|
test eax,eax ; need further decoding? (= codelen > DECODE_BITS?)
|
|
jl LABEL(long_code) ; yes, do it
|
|
|
|
and cl,al ; (cl) = # of bits used in mask
|
|
shr eax,4 ; (eax) = token
|
|
shl esi,cl ; (esi) = resulting mask
|
|
|
|
sub ch,cl ; (ch) = # of available bits left in dx
|
|
jl LABEL(decode_more) ; if ch < 0 need to read more bits
|
|
|
|
sub eax,256 ; (eax) = token - 256
|
|
jl LABEL(literal) ; if < 0 then al = code of literal
|
|
jmp LABEL(pointer) ; otherwise it's pointer
|
|
|
|
LABEL(next):
|
|
mov edx,esi ; (edx) = Mask
|
|
mov cl,15 ; (cl) = 15
|
|
shr edx,32-DECODE_BITS ; (edx) = DECODE_BITS most significant bits of Mask
|
|
movsx eax,word ptr [ebp+edx*2].table ; (eax) = respective decode table entry
|
|
test eax,eax ; need further decoding?
|
|
jl LABEL(long_code) ; yes, continue
|
|
|
|
and cl,al ; (cl) = # of bits used in mask
|
|
shr eax,4 ; (eax) = current token
|
|
shl esi,cl ; (esi) = resulting mask
|
|
|
|
sub ch,cl ; (ch) = # of available bits left in dx
|
|
jl LABEL(decode_more) ; if ch < 0 need to read more bits
|
|
|
|
sub eax,256 ; (eax) = token - 256
|
|
jl LABEL(literal) ; if < 0 then al = code of literal
|
|
jmp LABEL(pointer) ; otherwise it's pointer
|
|
|
|
LABEL(long_code):
|
|
shl esi, DECODE_BITS ; DECODE_BITS were used; remove them
|
|
|
|
LABEL(next_bit):
|
|
add esi,esi ; Mask <<= 1 (and get carry)
|
|
adc eax,0 ; eax += (old Mask < 0)
|
|
movsx eax,word ptr [ebp+eax*2+0x10000].table ; (eax) = token
|
|
test eax,eax ; need further decoding?
|
|
jl LABEL(next_bit) ; yes, continue
|
|
|
|
and cl,al ; (cl) = # of bits used in mask
|
|
shr eax,4 ; (eax) = token
|
|
|
|
sub ch,cl ; (ch) = # of available bits left in Mask
|
|
jl LABEL(decode_more) ; if ch < 0 need to read more bits
|
|
|
|
sub eax,256 ; (eax) = token - 256
|
|
jl LABEL(literal) ; if < 0 then al = code of literal
|
|
jmp LABEL(pointer) ; otherwise it's pointer
|
|
|
|
|
|
LABEL(decode_more):
|
|
#if !CAREFUL
|
|
cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
|
|
jae careful_decode_more ; yes, be careful...
|
|
cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
|
|
jae careful_decode_more ; yes, be careful...
|
|
#else
|
|
cmp ebx,[ebp].src.end_bitmask2 ; buffer overrun?
|
|
jae error ; yes, error...
|
|
#endif
|
|
|
|
mov cl,ch ; (cl) = (# of have - # of used)
|
|
xor edx,edx
|
|
mov dx,[ebx] ; (edx) = next 16 bits
|
|
neg cl ; (cl) = # unused bits in Mask
|
|
add ebx,2 ; (ebx) = ptr to next token
|
|
shl edx,cl ; (edx) = 16 aligned on required boundary
|
|
add ch,16 ; (ch) = # of free bits in Mask
|
|
add esi,edx ; (esi) = Mask + next 16 bits
|
|
|
|
sub eax,256 ; (eax) = token - 256
|
|
jl LABEL(literal) ; if < 0 then al = code of literal
|
|
|
|
LABEL(pointer):
|
|
#if CAREFUL
|
|
cmp edi,[ebp].dst.stop ; reached end of buffer?
|
|
jae ret_ok_eof ; yes, done, and probably EOF (check later)
|
|
#endif
|
|
|
|
mov cl,al ; prepare to obtain # of bits in offset
|
|
mov edx,esi ; (edx) = mask
|
|
|
|
shr cl,MAX_LENGTH_LOG ; (cl) = # of bits in offset
|
|
or edx,1 ; set less significant bit
|
|
|
|
shl esi,cl ; (esi) = (Mask << cl)
|
|
sub ch,cl ; (ch) = # of bits left in mask
|
|
|
|
ror edx,1 ; (edx) = (Mask >> 1) | 0x80000000
|
|
xor cl,31 ; (cl) = 31 - (# of bits in mask)
|
|
|
|
and eax,MAX_LENGTH-1 ; (eax) = length - MIN_MATCH
|
|
shr edx,cl ; (edx) = (1 << #) + (Mask >> (32-#)) = offset
|
|
|
|
push esi ; save mask
|
|
neg edx ; (edx) = negative offset
|
|
|
|
#if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
|
|
cmp eax,8-MIN_MATCH ; length > 8?
|
|
ja LABEL(long_string)
|
|
|
|
lea esi, [edi+edx] ; esi = beginning of string
|
|
|
|
cmp edx,-3 ; offset < 4?
|
|
jae LABEL(copy_by_one) ; yes, copy byte by byte
|
|
|
|
cmp esi, [ebp].dst.beg ; output buffer underrun?
|
|
jb error_pop ; yes, corrupted data
|
|
|
|
mov edx,[esi] ; get first 4 bytes
|
|
mov [edi],edx ; store them
|
|
mov edx,[esi+4] ; get next 4 byte
|
|
mov [edi+4],edx ; store them
|
|
|
|
pop esi ; restore mask
|
|
lea edi,[edi+eax+MIN_MATCH] ; (edi) = next output location
|
|
|
|
test ch,ch ; have enough bits in Mask?
|
|
jge LABEL(next) ; yes, proceed further
|
|
jmp LABEL(mask_more) ; no, need to read in more bits
|
|
|
|
LABEL(copy_by_one):
|
|
cmp esi, [ebp].dst.beg ; output buffer underrun?
|
|
jb error_pop ; yes, corrupted data
|
|
|
|
mov dl,[esi] ; copy 8 bytes by one
|
|
mov [edi],dl ; NB: no readahead is allowed here
|
|
mov dl,[esi+1] ; because source and destination
|
|
mov [edi+1],dl ; may overlap
|
|
mov dl,[esi+2]
|
|
mov [edi+2],dl
|
|
mov dl,[esi+3]
|
|
mov [edi+3],dl
|
|
mov dl,[esi+4]
|
|
mov [edi+4],dl
|
|
mov dl,[esi+5]
|
|
mov [edi+5],dl
|
|
mov dl,[esi+6]
|
|
mov [edi+6],dl
|
|
mov dl,[esi+7]
|
|
mov [edi+7],dl
|
|
|
|
pop esi ; restore mask
|
|
lea edi,[edi+eax+MIN_MATCH] ; (edi) = next output location
|
|
|
|
test ch,ch ; have enough bits in Mask?
|
|
jge LABEL(next) ; yes, proceed further
|
|
jmp LABEL(mask_more) ; no, need to read in more bits
|
|
|
|
LABEL(long_string):
|
|
#endif /* CAREFUL */
|
|
|
|
cmp eax,MAX_LENGTH-1 ; long length?
|
|
je LABEL(long_length) ; yes, decode it
|
|
|
|
LABEL(long_length_done):
|
|
lea esi,[edi+edx] ; (esi) = source pointer
|
|
add eax,MIN_MATCH ; (edx) = length
|
|
lea edx,[esi+eax] ; (eax) = last output position
|
|
|
|
cmp esi,[ebp].dst.beg ; output buffer underrun?
|
|
jb error_pop ; yes, corrupted data
|
|
|
|
xchg eax,ecx ; (ecx) = length, (ah) = bit counter
|
|
|
|
#if !CAREFUL
|
|
cmp edx,[ebp].dst.careful ; too close to the end of buffer?
|
|
jae careful_check_overrun ; yes, be careful
|
|
#else
|
|
careful_check_overrun:
|
|
cmp edx,[ebp].dst.stop ; too much to output?
|
|
jbe careful_no_overrun ; yes, adjust length
|
|
|
|
sub edx,[ebp].dst.stop ; (edx) = excess
|
|
sub ecx,edx ; (ecx) = exact length
|
|
|
|
rep movsb ; copy bytes
|
|
|
|
pop esi ; restore mask
|
|
jmp ret_ok ; OK, but not EOF
|
|
careful_no_overrun:
|
|
#endif
|
|
|
|
rep movsb ; copy bytes
|
|
|
|
mov ch,ah ; restore byte counter
|
|
pop esi ; restore Mask
|
|
|
|
#if !CAREFUL
|
|
cmp edi,[ebp].dst.careful ; too close to the end of input buffer?
|
|
jae careful_copy ; yes, switch into careful mode
|
|
#else
|
|
careful_copy:
|
|
#endif
|
|
|
|
test ch,ch ; have enough bits in Mask?
|
|
jge LABEL(next) ; yes, proceed further
|
|
|
|
#if CAREFUL
|
|
cmp ebx,[ebp].src.end_bitmask2 ; input buffer overrun?
|
|
jae error
|
|
#else
|
|
LABEL(mask_more):
|
|
#endif
|
|
|
|
mov cl,ch ; (cl) = (# of have - # of used)
|
|
xor edx,edx
|
|
mov dx,[ebx] ; (edx) = next 16 bits
|
|
neg cl ; (cl) = # unused bits in Mask
|
|
add ebx,2 ; (ebx) = ptr to next token
|
|
shl edx,cl ; (edx) = 16 aligned on required boundary
|
|
add ch,16 ; (ch) = # of free bits in Mask
|
|
add esi,edx ; (esi) = Mask + next 16 bits
|
|
|
|
jmp LABEL(next) ; decode next token
|
|
|
|
|
|
LABEL(long_length):
|
|
#if CAREFUL
|
|
cmp ebx,[ebp].src.end ; input buffer overrun?
|
|
jae error_pop ; yes, corrupted data
|
|
#endif
|
|
|
|
xor eax,eax
|
|
mov al,[ebx] ; (eax) = next byte
|
|
inc ebx ; (ebx) = ptr to next token
|
|
cmp al,255 ; (eax) == 255?
|
|
lea eax,[eax+MAX_LENGTH-1] ; (eax) = next byte + MAX_LENGTH-1
|
|
jne LABEL(long_length_done) ; no, length decoded
|
|
|
|
#if CAREFUL
|
|
cmp ebx,[ebp].src.end_1 ; input buffer overrun?
|
|
jae error_pop ; yes, corrupted data
|
|
#endif
|
|
xor eax,eax
|
|
mov ax,[ebx] ; (eax) = next word
|
|
add ebx,2 ; (ebx) = ptr to next token
|
|
cmp ax,255+MAX_LENGTH-1 ; length should be long enough
|
|
jae LABEL(long_length_done)
|
|
jmp error
|
|
|
|
|
|
#if CAREFUL
|
|
error_pop:
|
|
pop eax ; pop Mask saved on stack
|
|
|
|
error:
|
|
xor eax,eax ; decode error: return 0
|
|
jmp ret_common
|
|
|
|
ret_ok_eof:
|
|
cmp edi,[ebp].dst.end
|
|
jne ret_ok
|
|
|
|
test eax,eax
|
|
jne ret_ok ; eof iff eax == 0
|
|
mov eax,1
|
|
mov [ebp].eof,eax
|
|
|
|
ret_ok:
|
|
mov eax,1 ; no [obvious] error: return 0
|
|
|
|
ret_common:
|
|
mov [ebp].result, eax ; store result
|
|
mov [ebp].src.last,ebx ; save last value of source ptr
|
|
mov [ebp].dst.last,edi ; save last value of destination ptr
|
|
|
|
pop ebp ; restore registers we used
|
|
pop ebx
|
|
pop ecx
|
|
pop edx
|
|
pop edi
|
|
pop esi ; and return
|
|
} /* end of __asm */
|
|
#endif /* CAREFUL */
|
|
|
|
#endif /* i386 */
|
|
|
|
#endif /* -------------------- CODING_HUFF_ALL ------------------ */
|
|
|
|
|
|
|
|
|
|
/* ----------------------- CODING_DIRECT2 ------------------------ */
|
|
/* -------------- */
|
|
|
|
#if CODING == CODING_DIRECT2
|
|
|
|
#ifndef i386
|
|
|
|
// C code: 73 MB/s at P3-500; asm code 80.5 MB/s
|
|
|
|
/*
|
|
Pseudocode:
|
|
----------
|
|
length = NextWord ();
|
|
offset = length >> DIRECT2_LEN_LOG;
|
|
length &= DIRECT2_MAX_LEN;
|
|
if (length == DIRECT2_MAX_LEN)
|
|
{
|
|
length = NextQuad ();
|
|
if (length == 15)
|
|
{
|
|
length = NextByte ();
|
|
if (length == 255)
|
|
length = NextWord () - 15 - DIRECT2_MAX_LEN;
|
|
length += 15;
|
|
}
|
|
length += DIRECT2_MAX_LEN;
|
|
}
|
|
length += MIN_MATCH;
|
|
++offset;
|
|
memcpy (dst, dst - offset, length);
|
|
dst += length;
|
|
*/
|
|
|
|
#if !CAREFUL
|
|
tag_t bmask = 0;
|
|
xint ofs, len;
|
|
const uchar *ptr = 0;
|
|
uchar *dst = info->dst.beg;
|
|
const uchar *src = info->src.beg;
|
|
|
|
goto start;
|
|
#endif /* !CAREFUL */
|
|
|
|
LABEL (next):
|
|
if (bmask >= 0) do // while MSB(bmask) == 0
|
|
{
|
|
bmask <<= 1;
|
|
LABEL (copy_byte):
|
|
CAREFUL_OK_IF (dst >= info->dst.stop);
|
|
CAREFUL_ERR_IF (src >= info->src.end);
|
|
*dst++ = *src++; // copy next byte
|
|
} while (bmask >= 0);
|
|
|
|
if ((bmask <<= 1) == 0) // if bmask == 0 reload it
|
|
{
|
|
START;
|
|
CAREFUL_IF (src >= info->src.careful || dst >= info->dst.careful, restart);
|
|
CAREFUL_ERR_IF (src >= info->src.end_tag);
|
|
bmask = * (__unaligned tag_t *) src;
|
|
src += sizeof (tag_t);
|
|
if (bmask >= 0)
|
|
{
|
|
bmask = (bmask << 1) + 1;
|
|
goto LABEL (copy_byte);
|
|
}
|
|
bmask = (bmask << 1) + 1;
|
|
}
|
|
|
|
#if !CAREFUL
|
|
assert (dst < info->dst.end - 8);
|
|
#endif
|
|
|
|
CAREFUL_EOF_IF (dst >= info->dst.stop);
|
|
CAREFUL_ERR_IF (src >= info->src.end_1);
|
|
|
|
ofs = * (__unaligned uint16 *) src;
|
|
src += 2;
|
|
len = ofs;
|
|
ofs >>= DIRECT2_LEN_LOG;
|
|
len &= DIRECT2_MAX_LEN;
|
|
ofs = ~ofs;
|
|
|
|
#if !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN)
|
|
if (len <= 8 - MIN_MATCH)
|
|
{
|
|
const uchar *src1 = dst + ofs;
|
|
|
|
#ifdef _M_IX86 // unaligned access is faster only on x86
|
|
if (ofs < ~2)
|
|
{
|
|
if (src1 < info->dst.beg) RET_ERR; // check for buffer underrun
|
|
|
|
ofs = ((__unaligned uint32 *) src1)[0]; // quickly copy 8 bytes
|
|
((__unaligned uint32 *) dst)[0] = ofs;
|
|
ofs = ((__unaligned uint32 *) src1)[1];
|
|
((__unaligned uint32 *) dst)[1] = ofs;
|
|
|
|
dst += len + MIN_MATCH; // dst = next output position
|
|
goto LABEL (next); // decode next token
|
|
}
|
|
#endif
|
|
|
|
if (src1 < info->dst.beg) RET_ERR; // check for buffer overrun
|
|
|
|
COPY_8_BYTES (dst, src1);
|
|
|
|
dst += len + MIN_MATCH;
|
|
goto LABEL (next);
|
|
}
|
|
#endif
|
|
|
|
if (len == DIRECT2_MAX_LEN) // decode long length
|
|
{
|
|
if (ptr == 0)
|
|
{
|
|
CAREFUL_ERR_IF (src >= info->src.end);
|
|
ptr = src;
|
|
len = *src++ & 15;
|
|
}
|
|
else
|
|
{
|
|
len = *ptr >> 4;
|
|
ptr = 0;
|
|
}
|
|
if (len == 15)
|
|
{
|
|
CAREFUL_ERR_IF (src >= info->src.end);
|
|
len = *src++;
|
|
if (len == 255)
|
|
{
|
|
CAREFUL_ERR_IF (src >= info->src.end_1);
|
|
len = * (__unaligned uint16 *) src;
|
|
src += 2;
|
|
if (len < 255 + 15 + DIRECT2_MAX_LEN) RET_ERR;
|
|
len += MIN_MATCH;
|
|
goto LABEL (done_len);
|
|
}
|
|
len += 15;
|
|
}
|
|
len += DIRECT2_MAX_LEN + MIN_MATCH;
|
|
goto LABEL (done_len);
|
|
}
|
|
len += MIN_MATCH;
|
|
LABEL (done_len):
|
|
|
|
info->src.last = src;
|
|
src = dst + ofs;
|
|
|
|
#if !CAREFUL
|
|
if (dst + len >= info->dst.careful)
|
|
goto careful_copy_tail;
|
|
#else
|
|
careful_copy_tail:
|
|
if (dst + len > info->dst.stop)
|
|
{
|
|
if (src < info->dst.beg) RET_ERR;
|
|
len = (xint) (info->dst.stop - dst);
|
|
assert (len >= 0);
|
|
COPY_BLOCK_SLOW (dst, src, len);
|
|
src = info->src.last;
|
|
RET_OK;
|
|
}
|
|
#endif /* !CAREFUL */
|
|
|
|
if (src < info->dst.beg) RET_ERR;
|
|
|
|
COPY_BLOCK_SLOW (dst, src, len); // copy block
|
|
|
|
src = info->src.last; // restore input buffer ptr
|
|
goto LABEL (next);
|
|
|
|
#if CAREFUL
|
|
ret_ok_eof:
|
|
if (dst == info->dst.end)
|
|
info->eof = 1;
|
|
|
|
ret_ok:
|
|
info->src.last = src;
|
|
info->dst.last = dst;
|
|
info->result = 1;
|
|
return;
|
|
|
|
ret_err:
|
|
info->result = 0;
|
|
return;
|
|
#endif /* CAREFUL */
|
|
|
|
#else /* ------------------------- i386 ---------------------------- */
|
|
|
|
#if !CAREFUL
|
|
__asm
|
|
{
|
|
mov eax,info // save info
|
|
|
|
push ebx // save registers
|
|
push ecx
|
|
push edx
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
|
|
#define PTR dword ptr [esp]
|
|
#define DST_STOP dword ptr [esp+4*1]
|
|
#define DST_CAREFUL dword ptr [esp+4*2]
|
|
#define SRC_CAREFUL dword ptr [esp+4*3]
|
|
#define SRC_END dword ptr [esp+4*4]
|
|
#define SRC_END_1 dword ptr [esp+4*5]
|
|
#define SRC_END_TAG dword ptr [esp+4*6]
|
|
#define INFO dword ptr [esp+4*7]
|
|
#define LOCALS 8
|
|
|
|
sub esp,4*LOCALS // make room for locals
|
|
|
|
mov edx,[eax].dst.stop
|
|
mov DST_STOP,edx
|
|
mov edx,[eax].dst.careful
|
|
mov DST_CAREFUL,edx
|
|
mov edx,[eax].src.careful
|
|
mov SRC_CAREFUL,edx
|
|
mov edx,[eax].src.end
|
|
mov SRC_END,edx
|
|
mov edx,[eax].src.end_1
|
|
mov SRC_END_1,edx
|
|
mov edx,[eax].src.end_tag
|
|
mov SRC_END_TAG,edx
|
|
xor edx,edx // ptr = 0
|
|
mov PTR,edx
|
|
mov INFO,eax
|
|
|
|
|
|
mov edx,[eax].dst.beg
|
|
mov ebp,edx
|
|
|
|
mov edi,[eax].dst.beg
|
|
mov ebx,[eax].src.beg
|
|
|
|
xor eax,eax // bmask = 0
|
|
|
|
jmp start
|
|
|
|
#endif /* !CAREFUL */
|
|
|
|
align 16
|
|
LABEL (literal_1):
|
|
mov [edi-1],cl
|
|
LABEL (literal):
|
|
#if CAREFUL
|
|
cmp edi,DST_STOP
|
|
jae ret_ok // recoded everything?
|
|
cmp ebx,SRC_END
|
|
jae ret_err
|
|
#endif
|
|
|
|
inc edi
|
|
mov cl,[ebx] // copy next byte
|
|
inc ebx
|
|
add eax,eax // check most significant bit
|
|
jnc LABEL (literal_1)
|
|
mov [edi-1],cl
|
|
jz LABEL (start) // need reloading?
|
|
|
|
LABEL (pointer):
|
|
#if CAREFUL
|
|
cmp edi,DST_STOP // decoded all the stuff? -- done
|
|
jae ret_ok_eof
|
|
cmp ebx,SRC_END_1
|
|
jae ret_err
|
|
#endif
|
|
|
|
xor edx,edx
|
|
mov dx,[ebx]
|
|
mov ecx,edx
|
|
shr edx,DIRECT2_LEN_LOG
|
|
add ebx,2
|
|
not edx // edx = -offset
|
|
and ecx,DIRECT2_MAX_LEN // ecx = length - MIN_LENGTH
|
|
lea esi,[edi+edx]
|
|
|
|
#if !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN)
|
|
cmp cl,8 - MIN_MATCH // length > 8?
|
|
ja LABEL (long_length)
|
|
|
|
cmp esi,ebp // output buffer underrun?
|
|
jb ret_err
|
|
|
|
cmp edx,-3
|
|
mov edx,[esi]
|
|
jae LABEL (byte_by_byte)
|
|
|
|
mov [edi],edx
|
|
mov edx,[esi+4]
|
|
mov [edi+4],edx
|
|
|
|
lea edi,[edi+ecx+MIN_MATCH]
|
|
add eax,eax
|
|
|
|
jnc LABEL (literal)
|
|
jnz LABEL (pointer)
|
|
jmp LABEL (start)
|
|
|
|
LABEL (byte_by_byte):
|
|
add ecx,MIN_MATCH
|
|
rep movsb
|
|
|
|
add eax,eax
|
|
jnc LABEL (literal)
|
|
jnz LABEL (pointer)
|
|
jmp LABEL (start)
|
|
|
|
LABEL (long_length):
|
|
#endif /* !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN) */
|
|
|
|
cmp esi,ebp // output buffer underrun?
|
|
jb ret_err
|
|
|
|
mov edx,PTR
|
|
cmp cl,DIRECT2_MAX_LEN
|
|
jne LABEL (done_len)
|
|
|
|
test edx,edx
|
|
je LABEL (ptr_zero)
|
|
|
|
xor ecx,ecx
|
|
mov cl,[edx]
|
|
xor edx,edx
|
|
shr ecx,4
|
|
jmp LABEL(done_quad)
|
|
|
|
LABEL (ptr_zero):
|
|
#if CAREFUL
|
|
cmp ebx,SRC_END
|
|
jae ret_err
|
|
#endif
|
|
xor ecx,ecx
|
|
mov cl,[ebx]
|
|
mov edx,ebx
|
|
and ecx,15
|
|
inc ebx
|
|
|
|
LABEL(done_quad):
|
|
mov PTR,edx
|
|
cmp cl,15
|
|
lea ecx,[ecx+DIRECT2_MAX_LEN]
|
|
je LABEL(len255)
|
|
|
|
LABEL(done_len):
|
|
lea edx,[edi+ecx+MIN_MATCH] // edx = end of copy
|
|
add ecx,MIN_MATCH
|
|
|
|
#if !CAREFUL
|
|
cmp edx,DST_CAREFUL // too close to end of buffer?
|
|
jae careful_copy_tail
|
|
#else
|
|
careful_copy_tail:
|
|
cmp edx,DST_STOP // ahead of output buffer?
|
|
jbe LABEL (checked_eob)
|
|
mov ecx,DST_STOP
|
|
sub ecx,edi // ecx = corrected length
|
|
rep movsb // copy substring
|
|
|
|
jmp ret_ok // no errors, no EOF mark
|
|
LABEL (checked_eob):
|
|
#endif
|
|
|
|
rep movsb // copy substring
|
|
|
|
add eax,eax
|
|
jnc LABEL (literal)
|
|
jnz LABEL (pointer)
|
|
|
|
align 16
|
|
LABEL (start):
|
|
#if !CAREFUL
|
|
cmp ebx,SRC_CAREFUL // too close to end of buffer(s)?
|
|
jae careful_start // be careful if so
|
|
cmp edi,DST_CAREFUL
|
|
jae careful_start
|
|
#else
|
|
cmp ebx,SRC_END_TAG // input buffer overrun? -- corrupted data
|
|
jae ret_err
|
|
#endif
|
|
mov eax,[ebx]
|
|
add ebx,4
|
|
test eax,eax
|
|
lea eax,[eax+eax+1]
|
|
jns LABEL (literal)
|
|
jmp LABEL (pointer)
|
|
|
|
LABEL(len255):
|
|
|
|
#if CAREFUL
|
|
cmp ebx,SRC_END
|
|
jae ret_err
|
|
#endif
|
|
xor ecx,ecx
|
|
mov cl,[ebx]
|
|
inc ebx
|
|
cmp cl,255
|
|
lea ecx,[ecx+15+DIRECT2_MAX_LEN]
|
|
jne LABEL(done_len)
|
|
|
|
#if CAREFUL
|
|
cmp ebx,SRC_END_1
|
|
jae ret_err
|
|
#endif
|
|
xor ecx,ecx
|
|
mov cx,[ebx]
|
|
add ebx,2
|
|
cmp ecx,255 + 15 + DIRECT2_MAX_LEN
|
|
jae LABEL (done_len)
|
|
|
|
#if CAREFUL
|
|
ret_err:
|
|
xor eax,eax
|
|
jmp ret_common
|
|
|
|
ret_ok_eof:
|
|
mov ebx,INFO
|
|
mov eax,1
|
|
cmp edi,[ebx].dst.end
|
|
jne ret_ok
|
|
mov [ebx].eof,eax
|
|
|
|
ret_ok:
|
|
mov eax,1
|
|
mov ebx,INFO
|
|
mov [ebx].src.last,ebx
|
|
mov [ebx].dst.last,edi
|
|
|
|
ret_common:
|
|
MOV ebx,INFO
|
|
mov [ebx].result,eax
|
|
|
|
add esp,4*LOCALS
|
|
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
} /* __asm */
|
|
#endif /* CAREFUL */
|
|
|
|
#endif /* i386 */
|
|
|
|
#endif /* ----------------- CODING == CODING_DIRECT2 --------------- */
|
|
|
|
|
|
|
|
/* --------------------------- End of code ------------------------- */
|
|
/* ----------- */
|
|
|
|
#if CAREFUL
|
|
} /* end of "do_decode" */
|
|
#endif /* CAREFUL */
|
|
|
|
|
|
#undef CAREFUL
|
|
#undef LABEL
|
|
#undef CAREFUL_LABEL
|
|
#undef CAREFUL_OK_IF
|
|
#undef CAREFUL_ERR_IF
|
|
#undef CAREFUL_EOF_IF
|
|
#undef CAREFUL_IF
|
|
#undef START
|
|
#undef FAST_COPY_DONE
|