mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
519 lines
9.8 KiB
519 lines
9.8 KiB
;
|
|
; tableasm.asm
|
|
;
|
|
; Assembly version of make_table()
|
|
;
|
|
; jforbes 07/20/96
|
|
;
|
|
; Note, this is not optimised for the Pentium at all; very few
|
|
; instructions will execute two at a time.
|
|
;
|
|
TITLE TABLEASM.ASM
|
|
.386P
|
|
.model FLAT
|
|
|
|
PUBLIC _make_table
|
|
|
|
; COMDAT @_make_table
|
|
_TEXT SEGMENT
|
|
$start = 0
|
|
$weight = $start + 72
|
|
$count = $weight + 72
|
|
$nchar = $count + 72
|
|
$bitlen = $nchar + 4
|
|
$ch = $bitlen + 4
|
|
$leftright = $ch + 4
|
|
$avail = $leftright + 4
|
|
$k = $avail + 4
|
|
$table = $k + 4
|
|
$tablebits = $table + 4
|
|
$jutbits = $tablebits + 4
|
|
$context = $jutbits + 4
|
|
$last = $context + 4
|
|
_make_table PROC NEAR ; COMDAT
|
|
|
|
|
|
;
|
|
;void make_table(
|
|
; t_decoder_context *context,
|
|
; USHORT nchar,
|
|
; UBYTE *bitlen,
|
|
; USHORT tablebits,
|
|
; short *table,
|
|
; short *leftright)
|
|
|
|
; count [esp+72+68]
|
|
; weight [esp+72]
|
|
; start [esp]
|
|
|
|
; 6 regs * 4 = 24 bytes
|
|
push ebx
|
|
push ecx
|
|
push edx
|
|
push ebp
|
|
push esi
|
|
push edi
|
|
|
|
sub esp, $last
|
|
|
|
|
|
; how to access the parameters off the stack
|
|
; skip over 24 bytes of pushed registers, and $last local
|
|
; variables, and the 4 byte return address.
|
|
$parms = $last+28
|
|
|
|
mov eax, [esp + $parms + 4]
|
|
and eax, 65535
|
|
mov [esp + $nchar], eax
|
|
|
|
mov eax, [esp + $parms]
|
|
mov [esp + $context], eax
|
|
|
|
mov eax, [esp + $parms + 8]
|
|
mov [esp + $bitlen], eax
|
|
|
|
mov eax, [esp + $parms + 12]
|
|
and eax, 255
|
|
mov [esp + $tablebits], eax
|
|
|
|
mov eax, [esp + $parms + 16]
|
|
mov [esp + $table], eax
|
|
|
|
mov eax, [esp + $parms + 20]
|
|
mov [esp + $leftright], eax
|
|
|
|
|
|
; for (i = 1; i <= 16; i++)
|
|
; count[i] = 0;
|
|
|
|
; clear 64 bytes starting at &count[1]
|
|
xor eax, eax
|
|
lea edi, [esp + $count + 4]
|
|
mov ecx, 16
|
|
rep stosd
|
|
|
|
|
|
; for (i = 0; i < nchar; i++)
|
|
; count[bitlen[i]]++;
|
|
|
|
; Do it in reverse
|
|
mov ecx, [esp + $nchar] ; u
|
|
mov esi, [esp + $bitlen] ; v
|
|
|
|
xor ebx, ebx ; u
|
|
dec ecx ; v ecx = i
|
|
|
|
loop1:
|
|
mov bl, [esi + ecx] ; bl = bitlen[i]
|
|
inc DWORD PTR [esp + $count + ebx*4] ; NP
|
|
|
|
dec ecx ; u
|
|
jge SHORT loop1 ; v
|
|
|
|
|
|
|
|
|
|
; start[1] = 0;
|
|
;
|
|
; for (i = 1; i <= 16; i++)
|
|
; start[i + 1] = start[i] + (count[i] << (16 - i));
|
|
;
|
|
lea ebp, [esp + $start + 4] ; u
|
|
lea esi, [esp + $count + 4] ; v
|
|
|
|
xor edx, edx ; u edx = start[i]
|
|
mov ecx, 15 ; v ecx = 16 - i
|
|
|
|
mov [ebp], edx ; u start[1] = 0
|
|
nop ; v
|
|
|
|
loop2:
|
|
mov eax, [esi] ; u eax = count[i]
|
|
add ebp, 4 ; v
|
|
|
|
shl eax, cl ; u
|
|
add esi, 4 ; v
|
|
|
|
add eax, edx ; u edx = start[i]
|
|
; stall
|
|
|
|
mov [ebp], eax ; u start[i+1]
|
|
mov edx, eax ; v edx <- start[i+1]
|
|
|
|
dec ecx ; u
|
|
jge SHORT loop2 ; v
|
|
|
|
|
|
; if (start[17] != 65536)
|
|
mov edx, [esp + 68 + $start]
|
|
cmp edx, 65536
|
|
jne not_65536
|
|
|
|
|
|
|
|
; jutbits = 16 - tablebits;
|
|
;
|
|
; for (i = 1; i <= tablebits; i++)
|
|
; {
|
|
; start[i] >>= jutbits;
|
|
; weight[i] = 1 << (tablebits - i);
|
|
; }
|
|
|
|
mov edx, [esp + $tablebits] ; u edx = tablebits
|
|
mov eax, 1 ; v eax = i
|
|
|
|
lea ecx, [edx - 1] ; u ecx = tablebits - i(=1)
|
|
mov ebp, eax ; v ebp = 1
|
|
|
|
shl ebp, cl ; u ebp = 1 << (tablebits - i)
|
|
mov ebx, ecx ; v ebx = tablebits - i(=1)
|
|
|
|
mov cl, 16 ; upper bits of ecx are zero
|
|
sub ecx, edx ; ecx = jutbits = 16 - tablebits
|
|
mov [esp + $jutbits], ecx
|
|
|
|
loop3:
|
|
shr DWORD PTR [esp + $start + eax*4], cl ; u start[i] >>= jutbits
|
|
mov DWORD PTR [esp + $weight + eax*4], ebp ; v
|
|
|
|
shr ebp, 1 ; u
|
|
inc eax ; v i++
|
|
|
|
cmp eax, edx ; u
|
|
jle SHORT loop3 ; v
|
|
|
|
|
|
|
|
; while (i <= 16)
|
|
; {
|
|
; weight[i] = 1 << (16 - i);
|
|
; i++;
|
|
; }
|
|
|
|
cmp al, 16 ; u
|
|
jg SHORT exit_loop4 ; v
|
|
|
|
loop4:
|
|
mov ecx, 16 ; u
|
|
mov ebx, 1 ; v
|
|
|
|
sub ecx, eax ; u ecx = 16 - i
|
|
inc eax ; v WAR ok
|
|
|
|
shl ebx, cl ; u ebx = 1 << (16 - i)
|
|
mov DWORD PTR [esp + $weight + eax*4 - 4], ebx ; v
|
|
|
|
cmp al, 16 ; u
|
|
jle SHORT loop4 ; v
|
|
|
|
exit_loop4:
|
|
|
|
|
|
|
|
; i = start[tablebits+1] >> jutbits
|
|
|
|
; ecx = jutbits
|
|
mov ecx, [esp + $jutbits]
|
|
|
|
; edx = tablebits
|
|
mov edx, [esp + $tablebits]
|
|
|
|
; eax = start[tablebits+1]
|
|
mov eax, [esp + $start + 4 + edx*4]
|
|
|
|
; eax = start[tablebits+1] >> jutbits
|
|
shr eax, cl
|
|
|
|
; if (i != 65536)
|
|
cmp eax, 65536
|
|
je SHORT i_is_zero
|
|
|
|
|
|
;
|
|
; memset(&table[i], 0, sizeof(ushort)*((1 << tablebits)-i);
|
|
;
|
|
|
|
; ecx = tablebits
|
|
mov ecx, edx
|
|
|
|
; edx = 1 << tablebits
|
|
mov edx, 1
|
|
shl edx, cl
|
|
|
|
; edx = (1 << tablebits) - i
|
|
sub edx, eax
|
|
|
|
; count = (1 << tablebits) - i words
|
|
mov ecx, edx
|
|
|
|
; dest = edi = &table[i]
|
|
mov edi, [esp + $table]
|
|
lea edi, [edi + eax*2]
|
|
|
|
; value = 0
|
|
xor eax, eax
|
|
|
|
rep stosw
|
|
|
|
|
|
i_is_zero:
|
|
|
|
;
|
|
; avail = nchar;
|
|
;
|
|
mov eax, [esp + $nchar] ; u
|
|
xor edi, edi ; v edi = ch
|
|
|
|
|
|
;
|
|
; for (ch = 0; ch < nchar; ch++)
|
|
;
|
|
|
|
mov [esp + $avail], eax ; u
|
|
jmp SHORT main_loop ; v
|
|
|
|
|
|
; for short jump
|
|
bad_table2:
|
|
xor eax, eax ; return failure
|
|
jmp cleanup
|
|
|
|
|
|
main_loop:
|
|
|
|
; if ((len = bitlen[ch]) == 0)
|
|
; continue;
|
|
|
|
; eax = &bitlen[0]
|
|
mov eax, [esp + $bitlen]
|
|
|
|
; ebp = len = bitlen[ch]
|
|
movzx ebp, BYTE PTR [eax + edi]
|
|
|
|
; if (len == 0)
|
|
; continue
|
|
test ebp, ebp
|
|
jz loop_bottom
|
|
|
|
|
|
; nextcode = start[len] + weight[len];
|
|
|
|
; ebx = start[len]
|
|
mov ebx, [esp + $start + ebp*4] ; u
|
|
mov ecx, [esp + $tablebits] ; v ecx = tablebits
|
|
|
|
; ebx = nextcode = start[len] + weight[len]
|
|
mov eax, ebx ; u eax = start[len]
|
|
add ebx, [esp + $weight + ebp*4] ; v WAR ok
|
|
|
|
; if (len <= tablebits)
|
|
cmp ebp, ecx ; u
|
|
jg SHORT len_g_tablebits ; v
|
|
|
|
; if (nextcode > (1 << tablebits))
|
|
; bad_table();
|
|
|
|
; edx = 1 << tablebits
|
|
mov edx, 1
|
|
|
|
shl edx, cl ; u
|
|
mov ecx, ebx ; v ecx = nextcode
|
|
|
|
; if (nextcode > (1 << tablebits))
|
|
cmp ebx, edx ; u
|
|
jg SHORT bad_table2 ; v
|
|
|
|
|
|
; for (i = start[len]; i < nextcode; i++)
|
|
; table[i] = ch;
|
|
|
|
|
|
; ecx = nextcode - start[len]
|
|
sub ecx, eax ; u
|
|
add eax, eax ; v WAR ok
|
|
|
|
; eax = &table[ start[len] ]
|
|
add eax, [esp + $table] ; u
|
|
|
|
; start[len] = nextcode (moved up)
|
|
mov [esp + $start + ebp*4], ebx ; v
|
|
|
|
|
|
; For this loop:
|
|
; eax = &table[ start[len] ]
|
|
; edi = ch
|
|
; ecx = nextcode - start[len]
|
|
;
|
|
loop6:
|
|
mov WORD PTR [eax], di ; table[i] = ch
|
|
add eax, 2 ; i++
|
|
|
|
dec ecx
|
|
jnz SHORT loop6
|
|
|
|
|
|
; ch++
|
|
inc edi ; moved up
|
|
|
|
; loop bottom
|
|
cmp edi, [esp + $nchar]
|
|
jl SHORT main_loop
|
|
|
|
mov eax, 1 ; success
|
|
jmp cleanup
|
|
|
|
|
|
;
|
|
; len > tablebits
|
|
;
|
|
; on entry: eax = start[len]
|
|
; ebx = nextcode
|
|
; ecx = tablebits
|
|
; ebp = len
|
|
;
|
|
len_g_tablebits:
|
|
|
|
mov esi, ebp ; u esi = len
|
|
mov edx, eax ; v edx = start[len]
|
|
|
|
sub esi, ecx ; u esi = len - tablebits
|
|
add cl, 16 ; v
|
|
|
|
; edx = k << tablebits
|
|
; shift left another 16 because we want to use a DWORD
|
|
; for testing the negative bit
|
|
shl edx, cl ; u
|
|
mov [esp + $k], eax ; v
|
|
|
|
; start[len] = nextcode;
|
|
mov [esp + $start + ebp*4], ebx ; u
|
|
nop ; v
|
|
|
|
; p = &table[k >> jutbits];
|
|
mov ecx, [esp + $jutbits] ; u ecx = jutbits
|
|
mov ebx, [esp + $k] ; v ebx = k >> jutbits
|
|
|
|
shr ebx, cl ; u
|
|
mov eax, [esp + $table] ; v
|
|
|
|
lea ebx, [eax + ebx*2] ; u ebx = p = &table[k >> jutbits]
|
|
mov ebp, [esp + $avail] ; v ebp = avail
|
|
|
|
bottom_loop:
|
|
|
|
; if (*p == 0)
|
|
|
|
; eax = &leftright[0]
|
|
mov eax, [esp + $leftright]
|
|
|
|
; ecx = *p
|
|
movsx ecx, WORD PTR [ebx] ; NP
|
|
|
|
; *p == 0 ?
|
|
test ecx, ecx ; u
|
|
jne SHORT p_not_zero ; v
|
|
|
|
; left_right[avail*2] = left_right[avail*2+1] = 0;
|
|
; *p = -avail;
|
|
; avail++;
|
|
|
|
mov WORD PTR [ebx], bp ; *p = avail
|
|
|
|
; sets left and right to zero (remember that ecx == 0)
|
|
mov [eax + ebp*4], ecx ; u
|
|
inc ebp ; v avail++
|
|
|
|
; *p = -avail
|
|
neg WORD PTR [ebx]
|
|
|
|
|
|
p_not_zero:
|
|
|
|
; if ((signed short) k < 0)
|
|
; p = &right[-(*p)];
|
|
; else
|
|
; p = &left[-(*p)];
|
|
|
|
; ecx = -(*p)
|
|
movsx ecx, WORD PTR [ebx]
|
|
neg ecx
|
|
|
|
; ebx = p = &ptr[-(*p)]
|
|
lea ebx, [ecx*4 + eax]
|
|
|
|
; if (k becomes -ve when we shift out a bit)
|
|
add edx, edx
|
|
jnc SHORT go_left
|
|
|
|
; right
|
|
add ebx, 2
|
|
|
|
go_left:
|
|
|
|
dec esi ; i--
|
|
jnz SHORT bottom_loop
|
|
|
|
|
|
; *p = ch;
|
|
mov WORD PTR [ebx], di
|
|
|
|
|
|
; store avail
|
|
mov [esp + $avail], ebp
|
|
|
|
|
|
|
|
loop_bottom:
|
|
|
|
; ch++
|
|
inc edi
|
|
|
|
cmp edi, [esp + $nchar]
|
|
jl main_loop
|
|
|
|
mov eax, 1 ; success
|
|
|
|
|
|
cleanup:
|
|
add esp, $last
|
|
|
|
pop edi
|
|
pop esi
|
|
pop ebp
|
|
pop edx
|
|
pop ecx
|
|
pop ebx
|
|
|
|
ret 0
|
|
|
|
|
|
not_65536:
|
|
test edx, edx
|
|
jnz SHORT bad_table
|
|
|
|
; memset(table, 0, sizeof(ushort)*(1<<tablebits))
|
|
xor eax, eax
|
|
mov edi, [esp + $table]
|
|
|
|
mov edx, 1
|
|
mov ecx, [esp + $tablebits]
|
|
dec ecx ; subtract 1 because we're doing STOSD
|
|
|
|
shl edx, cl ; edx := 1 << tablebits
|
|
|
|
mov ecx, edx ; store in ecx
|
|
|
|
rep stosd
|
|
|
|
mov eax, 1 ; success
|
|
jmp SHORT cleanup
|
|
|
|
|
|
|
|
bad_table:
|
|
xor eax, eax ; failure
|
|
jmp cleanup
|
|
|
|
_make_table ENDP
|
|
_TEXT ENDS
|
|
END
|