Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1156 lines
34 KiB

  1. /* ------------------------------------------------------------------------ */
  2. /* */
  3. /* Copyright (c) Microsoft Corporation, 2000-2001. All rights reserved. */
  4. /* Copyright (c) Andrew Kadatch, 1991-2001. All rights reserved. */
  5. /* */
  6. /* Microsoft Confidential -- do not redistribute. */
  7. /* */
  8. /* ------------------------------------------------------------------------ */
  9. /*
  10. Decoding is splitted into two phases:
  11. 1. Fast decoding. Check bounds rarely (when loading new tag and after copying
  12. of a long string) and switch into Careful only when we are too close to the
  13. end of input or output buffer.
  14. 2. Careful decoding. Before performing any memory access all bounds are checked
  15. to make sure no buffer overrun or underrun will happen. Careful decoding is
  16. usually 1.5 times slower than Fast one, but only last several hundred bytes
  17. are decoded this way; all the rest is decoded Fast.
  18. As long as decoding code is essentially the same except for bounds checks that
  19. differ in Fast and Careful mode, in order to avoid code duplication this file
  20. is included twice with different setting of CAREFUL macro (first it is 0, then
  21. 1).
  22. Run "cl -EP xdecode.c >xdecode.pp" to see actual code.
  23. */
  24. #if CAREFUL
  25. #define LABEL(label) careful_##label
  26. #define CAREFUL_OK_IF(cond) if (cond) RET_OK
  27. #define CAREFUL_ERR_IF(cond) if (cond) RET_ERR
  28. #define CAREFUL_EOF_IF(cond) if (cond) goto ret_ok_eof;
  29. #define CAREFUL_IF(cond, label) label:
  30. #if CODING & (CODING_HUFF_LEN | CODING_HUFF_PTR | CODING_HUFF_ALL)
  31. #define START careful_start:
  32. #else
  33. #define START
  34. #endif
  35. #else /* !CAREFUL */
  36. #define LABEL(label) label
  37. #define CAREFUL_OK_IF(cond)
  38. #define CAREFUL_ERR_IF(cond)
  39. #define CAREFUL_EOF_IF(cond)
  40. #define CAREFUL_IF(cond, label) if (cond) goto label
  41. #define START start:
  42. static void do_decode (decode_info *info)
  43. {
  44. #endif
  45. /* ----------------------- CODING_HUFF_ALL ------------------------ */
  46. /* --------------- */
  47. // C code: 26.3 MB/s, asm code: 32.3 MB/s at P3-500
  48. #if CODING == CODING_HUFF_ALL
  49. #ifndef i386
  50. #if !CAREFUL
  51. ubitmask4 Mask;
  52. bits_t Bits, bits;
  53. xint len;
  54. uxint ofs;
  55. uchar *dst = info->dst.beg;
  56. const uchar *src = info->src.beg;
  57. Mask = * (__unaligned ubitmask2 *) src; src += sizeof (ubitmask2);
  58. Mask <<= sizeof (ubitmask2) * 8;
  59. Mask += * (__unaligned ubitmask2 *) src; src += sizeof (ubitmask2);
  60. Bits = 8 * sizeof (ubitmask2);
  61. if (src >= info->src.careful || dst >= info->dst.careful)
  62. goto careful_next;
  63. goto LABEL(next);
  64. #endif /* CAREFUL */
  65. LABEL(decode_more):
  66. // too close to end of buffer? -- switch to careful mode...
  67. CAREFUL_IF (src >= info->src.careful, decode_more1);
  68. CAREFUL_IF (dst >= info->dst.careful, decode_more2);
  69. // input buffer ovverrun? -- corrupted compressed data
  70. CAREFUL_ERR_IF (src >= info->src.end_bitmask2);
  71. // read 16 bits more and update Mask&Bits respectively
  72. bits = Bits;
  73. ofs = * (__unaligned ubitmask2 *) src;
  74. bits = (bits_t) (-bits);
  75. src += sizeof (ubitmask2);
  76. ofs <<= bits;
  77. Bits += 8 * sizeof (ubitmask2);
  78. Mask += (ubitmask4) ofs;
  79. if ((len -= 256) >= 0)
  80. goto LABEL (pointer);
  81. for (;;)
  82. {
  83. CAREFUL_OK_IF (dst >= info->dst.stop);
  84. *dst = (uchar) len; // copy literal byte to output
  85. ofs = (uxint) (Mask >> (8 * sizeof (Mask) - DECODE_BITS));
  86. ++dst;
  87. len = ((int16 *) info->table)[ofs];
  88. bits = 15;
  89. if (len < 0)
  90. goto LABEL(long_codeword);
  91. // short codeword -- already decoded
  92. bits &= len; // bits = # of bit used in Mask
  93. len >>= 4; // len = token
  94. Mask <<= bits; // update Mask&Bits
  95. Bits = (bits_t) (Bits - bits); // read more bits if necessary
  96. if (Bits < 0)
  97. goto LABEL (decode_more);
  98. if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
  99. continue;
  100. goto LABEL (pointer);
  101. LABEL(next): // decode next token via lookup table
  102. ofs = (uxint) (Mask >> (8 * sizeof (Mask) - DECODE_BITS));
  103. len = ((int16 *) info->table)[ofs];
  104. bits = 15;
  105. if (len >= 0)
  106. {
  107. // short codeword -- already decoded
  108. bits &= len; // bits = # of bit used in Mask
  109. len >>= 4; // len = token
  110. Mask <<= bits; // update Mask&Bits
  111. Bits = (bits_t) (Bits - bits); // read more bits if necessary
  112. if (Bits < 0)
  113. goto LABEL (decode_more);
  114. if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
  115. continue;
  116. }
  117. else
  118. {
  119. LABEL (long_codeword):
  120. // long codeword -- decode bit by bit
  121. Mask <<= DECODE_BITS; // DECODE_BITS alreay parsed
  122. do
  123. {
  124. len += ((bitmask4) Mask) < 0; // len += MSB (Mask)
  125. Mask <<= 1; // 1 more bit was used
  126. len = ((int16 *) info->table)[len + 0x8000];
  127. }
  128. while (len < 0);
  129. bits &= len; // bits = # of bit used in Mask
  130. len >>= 4; // len = token
  131. Bits = (bits_t) (Bits - bits); // read more bits if necessary
  132. if (Bits < 0)
  133. goto LABEL (decode_more);
  134. if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
  135. continue;
  136. }
  137. LABEL(pointer):
  138. CAREFUL_EOF_IF (dst >= info->dst.stop);
  139. bits = (bits_t) (len >> MAX_LENGTH_LOG); // # of bits in offset
  140. ofs = (uxint) ((Mask >> 1) | (((ubitmask4) 1) << (8 * sizeof (Mask) - 1)));
  141. Mask <<= bits; // update Mask and Bits
  142. Bits = (bits_t) (Bits - bits);
  143. bits ^= 8 * sizeof (ofs) - 1; // bits = 31 - bits
  144. len &= MAX_LENGTH - 1; // run length - MIN_MATCH
  145. ofs >>= bits; // ofs = (1<<bits) | (Mask<<(32-bits))
  146. info->src.last = src; // save src
  147. ofs = (uxint) (- (xint) ofs); // ofs = real negative offset
  148. #if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
  149. if (len <= 8-MIN_MATCH)
  150. {
  151. src = dst + (xint) ofs; // src = beginning of string
  152. #ifdef _M_IX86 // unligned access is faster only on x86
  153. if (ofs < ~2)
  154. {
  155. if (src < info->dst.beg) // buffer underrun? -- corrupted data
  156. RET_ERR;
  157. ofs = ((__unaligned uint32 *) src)[0]; // copy 8 bytes
  158. ((__unaligned uint32 *) dst)[0] = ofs;
  159. ofs = ((__unaligned uint32 *) src)[1];
  160. ((__unaligned uint32 *) dst)[1] = ofs;
  161. src = info->src.last; // restore src
  162. dst = dst + len + MIN_MATCH; // dst = next output position
  163. if (Bits >= 0) // have enough Bits in Mask? -- proceed further
  164. goto LABEL (next);
  165. goto LABEL (mask_more); // otherwise, read more bits
  166. }
  167. #endif
  168. if (src < info->dst.beg) // buffer underrun? -- corrupted data
  169. RET_ERR;
  170. COPY_8_BYTES (dst, src); // copy 8 bytes one by one
  171. // NB: dst & src may overlap
  172. src = info->src.last; // restore src
  173. dst = dst + len + MIN_MATCH; // dst = next output position
  174. if (Bits >= 0) // have enough Bits in Mask? -- proceed further
  175. goto LABEL (next);
  176. goto LABEL (mask_more); // otherwise, read more bits
  177. }
  178. #endif /* CAREFUL */
  179. if (len == MAX_LENGTH - 1) // long length? -- decode it
  180. {
  181. // if input data overrun then compressed data corrupted
  182. CAREFUL_ERR_IF (src >= info->src.end);
  183. len = *src++ + (MAX_LENGTH-1);
  184. if (len == 255 + MAX_LENGTH-1)
  185. {
  186. CAREFUL_ERR_IF (src >= info->src.end_1);
  187. len = * (__unaligned uint16 *) src;
  188. src += 2;
  189. if (len < 255 + MAX_LENGTH-1) // length should be large enough
  190. RET_ERR;
  191. }
  192. info->src.last = src; // save input buffer pointer
  193. }
  194. len += MIN_MATCH; // len = actual length
  195. src = dst + (xint) ofs; // src = pointer to the beginning of string
  196. dst += len; // dst = last output position
  197. if (src < info->dst.beg) // buffer underrun? -- corrupted data
  198. RET_ERR;
  199. #if !CAREFUL
  200. if (dst >= info->dst.careful)
  201. goto careful_check_overrun;
  202. #else
  203. careful_check_overrun:
  204. if (dst > info->dst.stop) // more to copy than necessary?
  205. {
  206. dst -= len; // dst = first output position
  207. len = (xint) (info->dst.stop - dst); // len = max length to copy
  208. COPY_BLOCK_SLOW (dst, src, len); // copy last run
  209. src = info->src.last; // restore input buffer pointer
  210. RET_OK; // OK but no EOF mark was found
  211. }
  212. #endif
  213. dst -= len; // dst = first output position
  214. COPY_BLOCK_SLOW (dst, src, len); // input & output may overlap -- copy byte by byte
  215. src = info->src.last; // restore input buffer pointer
  216. CAREFUL_IF (dst >= info->dst.careful, copy1);
  217. if (Bits >= 0) // enough Bits in Mask?
  218. goto LABEL (next); // decode next token
  219. #if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
  220. LABEL(mask_more):
  221. #endif
  222. // have 2 more bytes in input buffer?
  223. CAREFUL_ERR_IF (src >= info->src.end_bitmask2);
  224. // read 16 bits more and update Mask&Bits respectively
  225. bits = Bits;
  226. ofs = * (__unaligned ubitmask2 *) src;
  227. bits = (bits_t) (-bits);
  228. src += sizeof (ubitmask2);
  229. ofs <<= bits;
  230. Bits += 8 * sizeof (ubitmask2);
  231. Mask += (ubitmask4) ofs;
  232. goto LABEL (next); // decode next token
  233. } /* of for(;;) */
  234. #if CAREFUL
  235. ret_ok_eof:
  236. if (dst == info->dst.end && len == 0)
  237. info->eof = 1;
  238. ret_ok:
  239. info->src.last = src;
  240. info->dst.last = dst;
  241. info->result = 1;
  242. return;
  243. ret_err:
  244. info->result = 0;
  245. return;
  246. #endif /* CAREFUL */
  247. #else /* ---------------------- defined i386 --------------------- */
  248. #if !CAREFUL
  249. __asm
  250. {
  251. mov eax,info ; save info
  252. push esi ; save registers
  253. push edi
  254. push edx
  255. push ecx
  256. push ebx
  257. push ebp
  258. mov ebp,eax ; (ebp) = info
  259. mov ebx,[ebp].src.beg ; (ebx) = src
  260. mov edi,[ebp].dst.beg ; (edx) = dst
  261. xor esi,esi ; initialize Mask
  262. mov si,[ebx]
  263. shl esi,16
  264. mov si,[ebx+2]
  265. add ebx,4
  266. mov ch,16 ; (ch) = Bits = 16
  267. cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
  268. jae careful_next ; yes, be careful...
  269. cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
  270. jae careful_next ; yes, be careful...
  271. jmp LABEL (next)
  272. #endif /* CAREFUL */
  273. LABEL(literal):
  274. #if CAREFUL
  275. cmp edi,[ebp].dst.stop ; decoded as much as asked?
  276. jae ret_ok ; done, but no EOF mark
  277. #endif
  278. mov edx,esi ; (edx) = Mask
  279. mov [edi],al ; store literal byte
  280. shr edx,32-DECODE_BITS
  281. inc edi ; (edi) = next output position
  282. movsx eax,word ptr [ebp+edx*2].table ; (eax) = respective decode table entry
  283. mov cl,15 ; (cl) = 15
  284. test eax,eax ; need further decoding? (= codelen > DECODE_BITS?)
  285. jl LABEL(long_code) ; yes, do it
  286. and cl,al ; (cl) = # of bits used in mask
  287. shr eax,4 ; (eax) = token
  288. shl esi,cl ; (esi) = resulting mask
  289. sub ch,cl ; (ch) = # of available bits left in dx
  290. jl LABEL(decode_more) ; if ch < 0 need to read more bits
  291. sub eax,256 ; (eax) = token - 256
  292. jl LABEL(literal) ; if < 0 then al = code of literal
  293. jmp LABEL(pointer) ; otherwise it's pointer
  294. LABEL(next):
  295. mov edx,esi ; (edx) = Mask
  296. mov cl,15 ; (cl) = 15
  297. shr edx,32-DECODE_BITS ; (edx) = DECODE_BITS most significant bits of Mask
  298. movsx eax,word ptr [ebp+edx*2].table ; (eax) = respective decode table entry
  299. test eax,eax ; need further decoding?
  300. jl LABEL(long_code) ; yes, continue
  301. and cl,al ; (cl) = # of bits used in mask
  302. shr eax,4 ; (eax) = current token
  303. shl esi,cl ; (esi) = resulting mask
  304. sub ch,cl ; (ch) = # of available bits left in dx
  305. jl LABEL(decode_more) ; if ch < 0 need to read more bits
  306. sub eax,256 ; (eax) = token - 256
  307. jl LABEL(literal) ; if < 0 then al = code of literal
  308. jmp LABEL(pointer) ; otherwise it's pointer
  309. LABEL(long_code):
  310. shl esi, DECODE_BITS ; DECODE_BITS were used; remove them
  311. LABEL(next_bit):
  312. add esi,esi ; Mask <<= 1 (and get carry)
  313. adc eax,0 ; eax += (old Mask < 0)
  314. movsx eax,word ptr [ebp+eax*2+0x10000].table ; (eax) = token
  315. test eax,eax ; need further decoding?
  316. jl LABEL(next_bit) ; yes, continue
  317. and cl,al ; (cl) = # of bits used in mask
  318. shr eax,4 ; (eax) = token
  319. sub ch,cl ; (ch) = # of available bits left in Mask
  320. jl LABEL(decode_more) ; if ch < 0 need to read more bits
  321. sub eax,256 ; (eax) = token - 256
  322. jl LABEL(literal) ; if < 0 then al = code of literal
  323. jmp LABEL(pointer) ; otherwise it's pointer
  324. LABEL(decode_more):
  325. #if !CAREFUL
  326. cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
  327. jae careful_decode_more ; yes, be careful...
  328. cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
  329. jae careful_decode_more ; yes, be careful...
  330. #else
  331. cmp ebx,[ebp].src.end_bitmask2 ; buffer overrun?
  332. jae LABEL(error_1) ; yes, error...
  333. #endif
  334. mov cl,ch ; (cl) = (# of have - # of used)
  335. xor edx,edx
  336. mov dx,[ebx] ; (edx) = next 16 bits
  337. neg cl ; (cl) = # unused bits in Mask
  338. add ebx,2 ; (ebx) = ptr to next token
  339. shl edx,cl ; (edx) = 16 aligned on required boundary
  340. add ch,16 ; (ch) = # of free bits in Mask
  341. add esi,edx ; (esi) = Mask + next 16 bits
  342. sub eax,256 ; (eax) = token - 256
  343. jl LABEL(literal) ; if < 0 then al = code of literal
  344. LABEL(pointer):
  345. #if CAREFUL
  346. cmp edi,[ebp].dst.stop ; reached end of buffer?
  347. jae ret_ok_eof ; yes, done, and probably EOF (check later)
  348. #endif
  349. mov cl,al ; prepare to obtain # of bits in offset
  350. mov edx,esi ; (edx) = mask
  351. shr cl,MAX_LENGTH_LOG ; (cl) = # of bits in offset
  352. or edx,1 ; set less significant bit
  353. shl esi,cl ; (esi) = (Mask << cl)
  354. sub ch,cl ; (ch) = # of bits left in mask
  355. ror edx,1 ; (edx) = (Mask >> 1) | 0x80000000
  356. xor cl,31 ; (cl) = 31 - (# of bits in mask)
  357. and eax,MAX_LENGTH-1 ; (eax) = length - MIN_MATCH
  358. shr edx,cl ; (edx) = (1 << #) + (Mask >> (32-#)) = offset
  359. push esi ; save mask
  360. neg edx ; (edx) = negative offset
  361. #if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
  362. cmp eax,8-MIN_MATCH ; length > 8?
  363. ja LABEL(long_string)
  364. lea esi, [edi+edx] ; esi = beginning of string
  365. cmp edx,-3 ; offset < 4?
  366. jae LABEL(copy_by_one) ; yes, copy byte by byte
  367. cmp esi, [ebp].dst.beg ; output buffer underrun?
  368. jb LABEL(error_pop_1) ; yes, corrupted data
  369. mov edx,[esi] ; get first 4 bytes
  370. mov [edi],edx ; store them
  371. mov edx,[esi+4] ; get next 4 byte
  372. mov [edi+4],edx ; store them
  373. pop esi ; restore mask
  374. lea edi,[edi+eax+MIN_MATCH] ; (edi) = next output location
  375. test ch,ch ; have enough bits in Mask?
  376. jge LABEL(next) ; yes, proceed further
  377. jmp LABEL(mask_more) ; no, need to read in more bits
  378. LABEL(copy_by_one):
  379. cmp esi, [ebp].dst.beg ; output buffer underrun?
  380. jb LABEL(error_pop_2) ; yes, corrupted data
  381. mov dl,[esi] ; copy 8 bytes by one
  382. mov [edi],dl ; NB: no readahead is allowed here
  383. mov dl,[esi+1] ; because source and destination
  384. mov [edi+1],dl ; may overlap
  385. mov dl,[esi+2]
  386. mov [edi+2],dl
  387. mov dl,[esi+3]
  388. mov [edi+3],dl
  389. mov dl,[esi+4]
  390. mov [edi+4],dl
  391. mov dl,[esi+5]
  392. mov [edi+5],dl
  393. mov dl,[esi+6]
  394. mov [edi+6],dl
  395. mov dl,[esi+7]
  396. mov [edi+7],dl
  397. pop esi ; restore mask
  398. lea edi,[edi+eax+MIN_MATCH] ; (edi) = next output location
  399. test ch,ch ; have enough bits in Mask?
  400. jge LABEL(next) ; yes, proceed further
  401. jmp LABEL(mask_more) ; no, need to read in more bits
  402. LABEL(long_string):
  403. #endif /* CAREFUL */
  404. cmp eax,MAX_LENGTH-1 ; long length?
  405. je LABEL(long_length) ; yes, decode it
  406. LABEL(long_length_done):
  407. lea esi,[edi+edx] ; (esi) = source pointer
  408. add eax,MIN_MATCH ; (edx) = length
  409. lea edx,[esi+eax] ; (eax) = last output position
  410. cmp esi,[ebp].dst.beg ; output buffer underrun?
  411. jb LABEL(error_pop_3) ; yes, corrupted data
  412. xchg eax,ecx ; (ecx) = length, (ah) = bit counter
  413. #if !CAREFUL
  414. cmp edx,[ebp].dst.careful ; too close to the end of buffer?
  415. jae careful_check_overrun ; yes, be careful
  416. #else
  417. careful_check_overrun:
  418. cmp edx,[ebp].dst.stop ; too much to output?
  419. jbe careful_no_overrun ; yes, adjust length
  420. sub edx,[ebp].dst.stop ; (edx) = excess
  421. sub ecx,edx ; (ecx) = exact length
  422. rep movsb ; copy bytes
  423. pop esi ; restore mask
  424. jmp ret_ok ; OK, but not EOF
  425. careful_no_overrun:
  426. #endif
  427. rep movsb ; copy bytes
  428. mov ch,ah ; restore byte counter
  429. pop esi ; restore Mask
  430. #if !CAREFUL
  431. cmp edi,[ebp].dst.careful ; too close to the end of input buffer?
  432. jae careful_copy ; yes, switch into careful mode
  433. #else
  434. careful_copy:
  435. #endif
  436. test ch,ch ; have enough bits in Mask?
  437. jge LABEL(next) ; yes, proceed further
  438. #if CAREFUL
  439. cmp ebx,[ebp].src.end_bitmask2 ; input buffer overrun?
  440. jae LABEL(error_2)
  441. #else
  442. LABEL(mask_more):
  443. #endif
  444. mov cl,ch ; (cl) = (# of have - # of used)
  445. xor edx,edx
  446. mov dx,[ebx] ; (edx) = next 16 bits
  447. neg cl ; (cl) = # unused bits in Mask
  448. add ebx,2 ; (ebx) = ptr to next token
  449. shl edx,cl ; (edx) = 16 aligned on required boundary
  450. add ch,16 ; (ch) = # of free bits in Mask
  451. add esi,edx ; (esi) = Mask + next 16 bits
  452. jmp LABEL(next) ; decode next token
  453. LABEL(long_length):
  454. #if CAREFUL
  455. cmp ebx,[ebp].src.end ; input buffer overrun?
  456. jae LABEL(error_pop_4) ; yes, corrupted data
  457. #endif
  458. xor eax,eax
  459. mov al,[ebx] ; (eax) = next byte
  460. inc ebx ; (ebx) = ptr to next token
  461. cmp al,255 ; (eax) == 255?
  462. lea eax,[eax+MAX_LENGTH-1] ; (eax) = next byte + MAX_LENGTH-1
  463. jne LABEL(long_length_done) ; no, length decoded
  464. #if CAREFUL
  465. cmp ebx,[ebp].src.end_1 ; input buffer overrun?
  466. jae LABEL(error_pop_5) ; yes, corrupted data
  467. #endif
  468. xor eax,eax
  469. mov ax,[ebx] ; (eax) = next word
  470. add ebx,2 ; (ebx) = ptr to next token
  471. cmp ax,255+MAX_LENGTH-1 ; length should be long enough
  472. jae LABEL(long_length_done)
  473. jmp LABEL(error_3)
  474. #if CAREFUL
  475. #ifndef DEBUG_LABEL
  476. #if DEBUG
  477. #define DEBUG_LABEL(label) label: mov eax, eax
  478. #else
  479. #define DEBUG_LABEL(label) label:
  480. #endif /* DEBUG */
  481. #endif /* DEBUG_LABEL */
  482. DEBUG_LABEL(error_pop_1)
  483. DEBUG_LABEL(error_pop_2)
  484. DEBUG_LABEL(error_pop_3)
  485. DEBUG_LABEL(careful_error_pop_3)
  486. DEBUG_LABEL(careful_error_pop_4)
  487. DEBUG_LABEL(careful_error_pop_5)
  488. pop eax ; pop Mask saved on stack
  489. DEBUG_LABEL(careful_error_1)
  490. DEBUG_LABEL(careful_error_2)
  491. DEBUG_LABEL(error_3)
  492. DEBUG_LABEL(careful_error_3)
  493. xor eax,eax ; decode error: return 0
  494. jmp ret_common
  495. ret_ok_eof:
  496. cmp edi,[ebp].dst.end
  497. jne ret_ok
  498. test eax,eax
  499. jne ret_ok ; eof iff eax == 0
  500. mov eax,1
  501. mov [ebp].eof,eax
  502. ret_ok:
  503. mov eax,1 ; no [obvious] error: return 0
  504. ret_common:
  505. mov [ebp].result, eax ; store result
  506. mov [ebp].src.last,ebx ; save last value of source ptr
  507. mov [ebp].dst.last,edi ; save last value of destination ptr
  508. pop ebp ; restore registers we used
  509. pop ebx
  510. pop ecx
  511. pop edx
  512. pop edi
  513. pop esi ; and return
  514. } /* end of __asm */
  515. #endif /* CAREFUL */
  516. #endif /* i386 */
  517. #endif /* -------------------- CODING_HUFF_ALL ------------------ */
  518. /* ----------------------- CODING_DIRECT2 ------------------------ */
  519. /* -------------- */
  520. #if CODING == CODING_DIRECT2
  521. #ifndef i386
  522. // C code: 73 MB/s at P3-500; asm code 80.5 MB/s
  523. /*
  524. Pseudocode:
  525. ----------
  526. length = NextWord ();
  527. offset = length >> DIRECT2_LEN_LOG;
  528. length &= DIRECT2_MAX_LEN;
  529. if (length == DIRECT2_MAX_LEN)
  530. {
  531. length = NextQuad ();
  532. if (length == 15)
  533. {
  534. length = NextByte ();
  535. if (length == 255)
  536. length = NextWord () - 15 - DIRECT2_MAX_LEN;
  537. length += 15;
  538. }
  539. length += DIRECT2_MAX_LEN;
  540. }
  541. length += MIN_MATCH;
  542. ++offset;
  543. memcpy (dst, dst - offset, length);
  544. dst += length;
  545. */
  546. #if !CAREFUL
  547. tag_t bmask = 0;
  548. xint ofs, len;
  549. const uchar *ptr = 0;
  550. uchar *dst = info->dst.beg;
  551. const uchar *src = info->src.beg;
  552. goto start;
  553. #endif /* !CAREFUL */
  554. LABEL (next):
  555. if (bmask >= 0) do // while MSB(bmask) == 0
  556. {
  557. bmask <<= 1;
  558. LABEL (copy_byte):
  559. CAREFUL_OK_IF (dst >= info->dst.stop);
  560. CAREFUL_ERR_IF (src >= info->src.end);
  561. *dst++ = *src++; // copy next byte
  562. } while (bmask >= 0);
  563. if ((bmask <<= 1) == 0) // if bmask == 0 reload it
  564. {
  565. START;
  566. CAREFUL_IF (src >= info->src.careful || dst >= info->dst.careful, restart);
  567. CAREFUL_ERR_IF (src >= info->src.end_tag);
  568. bmask = * (__unaligned tag_t *) src;
  569. src += sizeof (tag_t);
  570. if (bmask >= 0)
  571. {
  572. bmask = (bmask << 1) + 1;
  573. goto LABEL (copy_byte);
  574. }
  575. bmask = (bmask << 1) + 1;
  576. }
  577. #if !CAREFUL
  578. assert (dst < info->dst.end - 8);
  579. #endif
  580. CAREFUL_EOF_IF (dst >= info->dst.stop);
  581. CAREFUL_ERR_IF (src >= info->src.end_1);
  582. ofs = * (__unaligned uint16 *) src;
  583. src += 2;
  584. len = ofs;
  585. ofs >>= DIRECT2_LEN_LOG;
  586. len &= DIRECT2_MAX_LEN;
  587. ofs = ~ofs;
  588. #if !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN)
  589. if (len <= 8 - MIN_MATCH)
  590. {
  591. const uchar *src1 = dst + ofs;
  592. #ifdef _M_IX86 // unaligned access is faster only on x86
  593. if (ofs < ~2)
  594. {
  595. if (src1 < info->dst.beg) RET_ERR; // check for buffer underrun
  596. ofs = ((__unaligned uint32 *) src1)[0]; // quickly copy 8 bytes
  597. ((__unaligned uint32 *) dst)[0] = ofs;
  598. ofs = ((__unaligned uint32 *) src1)[1];
  599. ((__unaligned uint32 *) dst)[1] = ofs;
  600. dst += len + MIN_MATCH; // dst = next output position
  601. goto LABEL (next); // decode next token
  602. }
  603. #endif
  604. if (src1 < info->dst.beg) RET_ERR; // check for buffer overrun
  605. COPY_8_BYTES (dst, src1);
  606. dst += len + MIN_MATCH;
  607. goto LABEL (next);
  608. }
  609. #endif
  610. if (len == DIRECT2_MAX_LEN) // decode long length
  611. {
  612. if (ptr == 0)
  613. {
  614. CAREFUL_ERR_IF (src >= info->src.end);
  615. ptr = src;
  616. len = *src++ & 15;
  617. }
  618. else
  619. {
  620. len = *ptr >> 4;
  621. ptr = 0;
  622. }
  623. if (len == 15)
  624. {
  625. CAREFUL_ERR_IF (src >= info->src.end);
  626. len = *src++;
  627. if (len == 255)
  628. {
  629. CAREFUL_ERR_IF (src >= info->src.end_1);
  630. len = * (__unaligned uint16 *) src;
  631. src += 2;
  632. if (len < 255 + 15 + DIRECT2_MAX_LEN) RET_ERR;
  633. len += MIN_MATCH;
  634. goto LABEL (done_len);
  635. }
  636. len += 15;
  637. }
  638. len += DIRECT2_MAX_LEN + MIN_MATCH;
  639. goto LABEL (done_len);
  640. }
  641. len += MIN_MATCH;
  642. LABEL (done_len):
  643. info->src.last = src;
  644. src = dst + ofs;
  645. #if !CAREFUL
  646. if (dst + len >= info->dst.careful)
  647. goto careful_copy_tail;
  648. #else
  649. careful_copy_tail:
  650. if (dst + len > info->dst.stop)
  651. {
  652. if (src < info->dst.beg) RET_ERR;
  653. len = (xint) (info->dst.stop - dst);
  654. assert (len >= 0);
  655. COPY_BLOCK_SLOW (dst, src, len);
  656. src = info->src.last;
  657. RET_OK;
  658. }
  659. #endif /* !CAREFUL */
  660. if (src < info->dst.beg) RET_ERR;
  661. COPY_BLOCK_SLOW (dst, src, len); // copy block
  662. src = info->src.last; // restore input buffer ptr
  663. goto LABEL (next);
  664. #if CAREFUL
  665. ret_ok_eof:
  666. if (dst == info->dst.end)
  667. info->eof = 1;
  668. ret_ok:
  669. info->src.last = src;
  670. info->dst.last = dst;
  671. info->result = 1;
  672. return;
  673. ret_err:
  674. info->result = 0;
  675. return;
  676. #endif /* CAREFUL */
  677. #else /* ------------------------- i386 ---------------------------- */
  678. #if !CAREFUL
  679. __asm
  680. {
  681. mov eax,info // save info
  682. push ebx // save registers
  683. push ecx
  684. push edx
  685. push esi
  686. push edi
  687. push ebp
  688. #define PTR dword ptr [esp]
  689. #define DST_STOP dword ptr [esp+4*1]
  690. #define DST_CAREFUL dword ptr [esp+4*2]
  691. #define SRC_CAREFUL dword ptr [esp+4*3]
  692. #define SRC_END dword ptr [esp+4*4]
  693. #define SRC_END_1 dword ptr [esp+4*5]
  694. #define SRC_END_TAG dword ptr [esp+4*6]
  695. #define INFO dword ptr [esp+4*7]
  696. #define LOCALS 8
  697. sub esp,4*LOCALS // make room for locals
  698. mov edx,[eax].dst.stop
  699. mov DST_STOP,edx
  700. mov edx,[eax].dst.careful
  701. mov DST_CAREFUL,edx
  702. mov edx,[eax].src.careful
  703. mov SRC_CAREFUL,edx
  704. mov edx,[eax].src.end
  705. mov SRC_END,edx
  706. mov edx,[eax].src.end_1
  707. mov SRC_END_1,edx
  708. mov edx,[eax].src.end_tag
  709. mov SRC_END_TAG,edx
  710. xor edx,edx // ptr = 0
  711. mov PTR,edx
  712. mov INFO,eax
  713. mov edx,[eax].dst.beg
  714. mov ebp,edx
  715. mov edi,[eax].dst.beg
  716. mov ebx,[eax].src.beg
  717. xor eax,eax // bmask = 0
  718. jmp start
  719. #endif /* !CAREFUL */
  720. align 16
  721. LABEL (literal_1):
  722. mov [edi-1],cl
  723. LABEL (literal):
  724. #if CAREFUL
  725. cmp edi,DST_STOP
  726. jae ret_ok // recoded everything?
  727. cmp ebx,SRC_END
  728. jae LABEL(ret_err_1)
  729. #endif
  730. inc edi
  731. mov cl,[ebx] // copy next byte
  732. inc ebx
  733. add eax,eax // check most significant bit
  734. jnc LABEL (literal_1)
  735. mov [edi-1],cl
  736. jz LABEL (start) // need reloading?
  737. LABEL (pointer):
  738. #if CAREFUL
  739. cmp edi,DST_STOP // decoded all the stuff? -- done
  740. jae ret_ok_eof
  741. cmp ebx,SRC_END_1
  742. jae LABEL(ret_err_2)
  743. #endif
  744. xor edx,edx
  745. mov dx,[ebx]
  746. mov ecx,edx
  747. shr edx,DIRECT2_LEN_LOG
  748. add ebx,2
  749. not edx // edx = -offset
  750. and ecx,DIRECT2_MAX_LEN // ecx = length - MIN_LENGTH
  751. lea esi,[edi+edx]
  752. #if !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN)
  753. cmp cl,8 - MIN_MATCH // length > 8?
  754. ja LABEL (long_length)
  755. cmp esi,ebp // output buffer underrun?
  756. jb LABEL(ret_err_3)
  757. cmp edx,-3
  758. mov edx,[esi]
  759. jae LABEL (byte_by_byte)
  760. mov [edi],edx
  761. mov edx,[esi+4]
  762. mov [edi+4],edx
  763. lea edi,[edi+ecx+MIN_MATCH]
  764. add eax,eax
  765. jnc LABEL (literal)
  766. jnz LABEL (pointer)
  767. jmp LABEL (start)
  768. LABEL (byte_by_byte):
  769. add ecx,MIN_MATCH
  770. rep movsb
  771. add eax,eax
  772. jnc LABEL (literal)
  773. jnz LABEL (pointer)
  774. jmp LABEL (start)
  775. LABEL (long_length):
  776. #endif /* !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN) */
  777. cmp esi,ebp // output buffer underrun?
  778. jb LABEL(ret_err_4)
  779. mov edx,PTR
  780. cmp cl,DIRECT2_MAX_LEN
  781. jne LABEL (done_len)
  782. test edx,edx
  783. je LABEL (ptr_zero)
  784. xor ecx,ecx
  785. mov cl,[edx]
  786. xor edx,edx
  787. shr ecx,4
  788. jmp LABEL(done_quad)
  789. LABEL (ptr_zero):
  790. #if CAREFUL
  791. cmp ebx,SRC_END
  792. jae LABEL(ret_err_5)
  793. #endif
  794. xor ecx,ecx
  795. mov cl,[ebx]
  796. mov edx,ebx
  797. and ecx,15
  798. inc ebx
  799. LABEL(done_quad):
  800. mov PTR,edx
  801. cmp cl,15
  802. lea ecx,[ecx+DIRECT2_MAX_LEN]
  803. je LABEL(len255)
  804. LABEL(done_len):
  805. lea edx,[edi+ecx+MIN_MATCH] // edx = end of copy
  806. add ecx,MIN_MATCH
  807. #if !CAREFUL
  808. cmp edx,DST_CAREFUL // too close to end of buffer?
  809. jae careful_copy_tail
  810. #else
  811. careful_copy_tail:
  812. cmp edx,DST_STOP // ahead of output buffer?
  813. jbe LABEL (checked_eob)
  814. mov ecx,DST_STOP
  815. sub ecx,edi // ecx = corrected length
  816. rep movsb // copy substring
  817. jmp ret_ok // no errors, no EOF mark
  818. LABEL (checked_eob):
  819. #endif
  820. rep movsb // copy substring
  821. add eax,eax
  822. jnc LABEL (literal)
  823. jnz LABEL (pointer)
  824. align 16
  825. LABEL (start):
  826. #if !CAREFUL
  827. cmp ebx,SRC_CAREFUL // too close to end of buffer(s)?
  828. jae careful_start // be careful if so
  829. cmp edi,DST_CAREFUL
  830. jae careful_start
  831. #else
  832. cmp ebx,SRC_END_TAG // input buffer overrun? -- corrupted data
  833. jae LABEL(ret_err_6)
  834. #endif
  835. mov eax,[ebx]
  836. add ebx,4
  837. test eax,eax
  838. lea eax,[eax+eax+1]
  839. jns LABEL (literal)
  840. jmp LABEL (pointer)
  841. LABEL(len255):
  842. #if CAREFUL
  843. cmp ebx,SRC_END
  844. jae LABEL(ret_err_7)
  845. #endif
  846. xor ecx,ecx
  847. mov cl,[ebx]
  848. inc ebx
  849. cmp cl,255
  850. lea ecx,[ecx+15+DIRECT2_MAX_LEN]
  851. jne LABEL(done_len)
  852. #if CAREFUL
  853. cmp ebx,SRC_END_1
  854. jae LABEL(ret_err_7)
  855. #endif
  856. xor ecx,ecx
  857. mov cx,[ebx]
  858. add ebx,2
  859. cmp ecx,255 + 15 + DIRECT2_MAX_LEN
  860. jae LABEL (done_len)
  861. #if CAREFUL
  862. #ifndef DEBUG_LABEL
  863. #if DEBUG
  864. #define DEBUG_LABEL(label) label: mov eax, eax
  865. #else
  866. #define DEBUG_LABEL(label) label:
  867. #endif /* DEBUG */
  868. #endif /* DEBUG_LABEL */
  869. DEBUG_LABEL(careful_ret_err_1)
  870. DEBUG_LABEL(careful_ret_err_2)
  871. DEBUG_LABEL(ret_err_3)
  872. DEBUG_LABEL(ret_err_4)
  873. DEBUG_LABEL(careful_ret_err_4)
  874. DEBUG_LABEL(careful_ret_err_5)
  875. DEBUG_LABEL(careful_ret_err_6)
  876. DEBUG_LABEL(careful_ret_err_7)
  877. xor eax,eax
  878. jmp ret_common
  879. ret_ok_eof:
  880. mov ecx,INFO
  881. mov eax,1
  882. cmp edi,[ecx].dst.end
  883. jne ret_ok
  884. mov [ecx].eof,eax
  885. ret_ok:
  886. mov eax,1
  887. mov ecx,INFO
  888. mov [ecx].src.last,ebx
  889. mov [ecx].dst.last,edi
  890. ret_common:
  891. MOV ecx,INFO
  892. mov [ecx].result,eax
  893. add esp,4*LOCALS
  894. pop ebp
  895. pop edi
  896. pop esi
  897. pop edx
  898. pop ecx
  899. pop ebx
  900. } /* __asm */
  901. #endif /* CAREFUL */
  902. #endif /* i386 */
  903. #endif /* ----------------- CODING == CODING_DIRECT2 --------------- */
  904. /* --------------------------- End of code ------------------------- */
  905. /* ----------- */
  906. #if CAREFUL
  907. } /* end of "do_decode" */
  908. #endif /* CAREFUL */
  909. #undef CAREFUL
  910. #undef LABEL
  911. #undef CAREFUL_LABEL
  912. #undef CAREFUL_OK_IF
  913. #undef CAREFUL_ERR_IF
  914. #undef CAREFUL_EOF_IF
  915. #undef CAREFUL_IF
  916. #undef START
  917. #undef FAST_COPY_DONE