Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1164 lines
36 KiB

  1. /* ------------------------------------------------------------------------ */
  2. /* */
  3. /* Copyright (c) Microsoft Corporation, 2000-2002. All rights reserved. */
  4. /* Copyright (c) Andrew Kadatch, 1991-2002. All rights reserved. */
  5. /* */
  6. /* Microsoft Confidential -- do not redistribute. */
  7. /* */
  8. /* ------------------------------------------------------------------------ */
  9. /*
  10. Decoding is splitted into two phases:
  11. 1. Fast decoding. Check bounds rarely (when loading new tag and after copying
  12. of a long string) and switch into Careful only when we are too close to the
  13. end of input or output buffer.
  14. 2. Careful decoding. Before performing any memory access all bounds are checked
  15. to make sure no buffer overrun or underrun will happen. Careful decoding is
  16. usually 1.5 times slower than Fast one, but only last several hundred bytes
  17. are decoded this way; all the rest is decoded Fast.
  18. As long as decoding code is essentially the same except for bounds checks that
  19. differ in Fast and Careful mode, in order to avoid code duplication this file
  20. is included twice with different setting of CAREFUL macro (first it is 0, then
  21. 1).
  22. Run "cl -EP xdecode.c >xdecode.pp" to see actual code.
  23. */
  24. #if CAREFUL
  25. #define LABEL(label) careful_##label
  26. #define CAREFUL_OK_IF(cond) if (cond) RET_OK
  27. #define CAREFUL_ERR_IF(cond) if (cond) RET_ERR
  28. #define CAREFUL_EOF_IF(cond) if (cond) goto ret_ok_eof;
  29. #define CAREFUL_IF(cond, label) label:
  30. #if CODING & (CODING_HUFF_LEN | CODING_HUFF_PTR | CODING_HUFF_ALL)
  31. #define START careful_start:
  32. #else
  33. #define START
  34. #endif
  35. #else /* !CAREFUL */
  36. #define LABEL(label) label
  37. #define CAREFUL_OK_IF(cond)
  38. #define CAREFUL_ERR_IF(cond)
  39. #define CAREFUL_EOF_IF(cond)
  40. #define CAREFUL_IF(cond, label) if (cond) goto label
  41. #define START start:
  42. static void do_decode (decode_info *info)
  43. {
  44. #endif
  45. /* ----------------------- CODING_HUFF_ALL ------------------------ */
  46. /* --------------- */
  47. // C code: 26.3 MB/s, asm code: 32.3 MB/s at P3-500
  48. #if CODING == CODING_HUFF_ALL
  49. #ifndef i386
  50. #if !CAREFUL
  51. ubitmask4 Mask;
  52. bits_t Bits, bits;
  53. xint len;
  54. uxint ofs;
  55. uchar *dst = info->dst.beg;
  56. const uchar *src = info->src.beg;
  57. Mask = * (__unaligned ubitmask2 *) src; src += sizeof (ubitmask2);
  58. Mask <<= sizeof (ubitmask2) * 8;
  59. Mask += * (__unaligned ubitmask2 *) src; src += sizeof (ubitmask2);
  60. Bits = 8 * sizeof (ubitmask2);
  61. if (src >= info->src.careful || dst >= info->dst.careful)
  62. goto careful_next;
  63. goto LABEL(next);
  64. #endif /* CAREFUL */
  65. LABEL(decode_more):
  66. // too close to end of buffer? -- switch to careful mode...
  67. CAREFUL_IF (src >= info->src.careful, decode_more1);
  68. CAREFUL_IF (dst >= info->dst.careful, decode_more2);
  69. // input buffer ovverrun? -- corrupted compressed data
  70. CAREFUL_ERR_IF (src >= info->src.end_bitmask2);
  71. // read 16 bits more and update Mask&Bits respectively
  72. bits = Bits;
  73. ofs = * (__unaligned ubitmask2 *) src;
  74. bits = (bits_t) (-bits);
  75. src += sizeof (ubitmask2);
  76. ofs <<= bits;
  77. Bits += 8 * sizeof (ubitmask2);
  78. Mask += (ubitmask4) ofs;
  79. if ((len -= 256) >= 0)
  80. goto LABEL (pointer);
  81. for (;;)
  82. {
  83. CAREFUL_OK_IF (dst >= info->dst.stop);
  84. *dst = (uchar) len; // copy literal byte to output
  85. ofs = (uxint) (Mask >> (8 * sizeof (Mask) - DECODE_BITS));
  86. ++dst;
  87. len = ((int16 *) info->table)[ofs];
  88. bits = 15;
  89. if (len < 0)
  90. goto LABEL(long_codeword);
  91. // short codeword -- already decoded
  92. bits &= len; // bits = # of bit used in Mask
  93. len >>= 4; // len = token
  94. Mask <<= bits; // update Mask&Bits
  95. Bits = (bits_t) (Bits - bits); // read more bits if necessary
  96. if (Bits < 0)
  97. goto LABEL (decode_more);
  98. if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
  99. continue;
  100. goto LABEL (pointer);
  101. LABEL(next): // decode next token via lookup table
  102. ofs = (uxint) (Mask >> (8 * sizeof (Mask) - DECODE_BITS));
  103. len = ((int16 *) info->table)[ofs];
  104. bits = 15;
  105. if (len >= 0)
  106. {
  107. // short codeword -- already decoded
  108. bits &= len; // bits = # of bit used in Mask
  109. len >>= 4; // len = token
  110. Mask <<= bits; // update Mask&Bits
  111. Bits = (bits_t) (Bits - bits); // read more bits if necessary
  112. if (Bits < 0)
  113. goto LABEL (decode_more);
  114. if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
  115. continue;
  116. }
  117. else
  118. {
  119. LABEL (long_codeword):
  120. // long codeword -- decode bit by bit
  121. Mask <<= DECODE_BITS; // DECODE_BITS alreay parsed
  122. do
  123. {
  124. len += ((bitmask4) Mask) < 0; // len += MSB (Mask)
  125. Mask <<= 1; // 1 more bit was used
  126. len = ((int16 *) info->table)[len + 0x8000];
  127. }
  128. while (len < 0);
  129. bits &= len; // bits = # of bit used in Mask
  130. len >>= 4; // len = token
  131. Bits = (bits_t) (Bits - bits); // read more bits if necessary
  132. if (Bits < 0)
  133. goto LABEL (decode_more);
  134. if ((len -= 256) < 0) // (len -= 256) < 0 ? literal : pointer
  135. continue;
  136. }
  137. LABEL(pointer):
  138. CAREFUL_EOF_IF (dst >= info->dst.stop);
  139. bits = (bits_t) (len >> MAX_LENGTH_LOG); // # of bits in offset
  140. ofs = (uxint) ((Mask >> 1) | (((ubitmask4) 1) << (8 * sizeof (Mask) - 1)));
  141. Mask <<= bits; // update Mask and Bits
  142. Bits = (bits_t) (Bits - bits);
  143. bits ^= 8 * sizeof (ofs) - 1; // bits = 31 - bits
  144. len &= MAX_LENGTH - 1; // run length - MIN_MATCH
  145. ofs >>= bits; // ofs = (1<<bits) | (Mask<<(32-bits))
  146. info->src.last = src; // save src
  147. ofs = (uxint) (- (xint) ofs); // ofs = real negative offset
  148. #if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
  149. if (len <= 8-MIN_MATCH)
  150. {
  151. src = dst + (xint) ofs; // src = beginning of string
  152. #if defined (i386) || defined (i386compat) // unligned access is faster only on x86
  153. if (ofs < ~2)
  154. {
  155. if (src < info->dst.beg) // buffer underrun? -- corrupted data
  156. RET_ERR;
  157. ofs = ((__unaligned uint32 *) src)[0]; // copy 8 bytes
  158. ((__unaligned uint32 *) dst)[0] = ofs;
  159. ofs = ((__unaligned uint32 *) src)[1];
  160. ((__unaligned uint32 *) dst)[1] = ofs;
  161. src = info->src.last; // restore src
  162. dst = dst + len + MIN_MATCH; // dst = next output position
  163. if (Bits >= 0) // have enough Bits in Mask? -- proceed further
  164. goto LABEL (next);
  165. goto LABEL (mask_more); // otherwise, read more bits
  166. }
  167. #endif
  168. if (src < info->dst.beg) // buffer underrun? -- corrupted data
  169. RET_ERR;
  170. COPY_8_BYTES (dst, src); // copy 8 bytes one by one
  171. // NB: dst & src may overlap
  172. src = info->src.last; // restore src
  173. dst = dst + len + MIN_MATCH; // dst = next output position
  174. if (Bits >= 0) // have enough Bits in Mask? -- proceed further
  175. goto LABEL (next);
  176. goto LABEL (mask_more); // otherwise, read more bits
  177. }
  178. #endif /* CAREFUL */
  179. if (len == MAX_LENGTH - 1) // long length? -- decode it
  180. {
  181. // if input data overrun then compressed data corrupted
  182. CAREFUL_ERR_IF (src >= info->src.end);
  183. len = *src++ + (MAX_LENGTH-1);
  184. if (len == 255 + MAX_LENGTH-1)
  185. {
  186. CAREFUL_ERR_IF (src >= info->src.end_1);
  187. len = * (__unaligned uint16 *) src;
  188. src += 2;
  189. if (len < 255 + MAX_LENGTH-1) // length should be large enough
  190. RET_ERR;
  191. }
  192. info->src.last = src; // save input buffer pointer
  193. }
  194. len += MIN_MATCH; // len = actual length
  195. src = dst + (xint) ofs; // src = pointer to the beginning of string
  196. dst += len; // dst = last output position
  197. if (src < info->dst.beg) // buffer underrun? -- corrupted data
  198. RET_ERR;
  199. #if !CAREFUL
  200. if (dst >= info->dst.careful)
  201. goto careful_check_overrun;
  202. #else
  203. careful_check_overrun:
  204. if (dst > info->dst.stop) // more to copy than necessary?
  205. {
  206. dst -= len; // dst = first output position
  207. len = (xint) (info->dst.stop - dst); // len = max length to copy
  208. COPY_BLOCK_SLOW (dst, src, len); // copy last run
  209. src = info->src.last; // restore input buffer pointer
  210. RET_OK; // OK but no EOF mark was found
  211. }
  212. #endif
  213. dst -= len; // dst = first output position
  214. COPY_BLOCK_SLOW (dst, src, len); // input & output may overlap -- copy byte by byte
  215. src = info->src.last; // restore input buffer pointer
  216. CAREFUL_IF (dst >= info->dst.careful, copy1);
  217. if (Bits >= 0) // enough Bits in Mask?
  218. goto LABEL (next); // decode next token
  219. #if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
  220. LABEL(mask_more):
  221. #endif
  222. // too close to end of buffer? -- switch to careful mode...
  223. CAREFUL_IF (src >= info->src.careful, decode_more3);
  224. CAREFUL_IF (dst >= info->dst.careful, decode_more4);
  225. // have 2 more bytes in input buffer?
  226. CAREFUL_ERR_IF (src >= info->src.end_bitmask2);
  227. // read 16 bits more and update Mask&Bits respectively
  228. bits = Bits;
  229. ofs = * (__unaligned ubitmask2 *) src;
  230. bits = (bits_t) (-bits);
  231. src += sizeof (ubitmask2);
  232. ofs <<= bits;
  233. Bits += 8 * sizeof (ubitmask2);
  234. Mask += (ubitmask4) ofs;
  235. goto LABEL (next); // decode next token
  236. } /* of for(;;) */
  237. #if CAREFUL
  238. ret_ok_eof:
  239. if (dst == info->dst.end && len == 0)
  240. info->eof = 1;
  241. ret_ok:
  242. info->src.last = src;
  243. info->dst.last = dst;
  244. info->result = 1;
  245. return;
  246. ret_err:
  247. info->result = 0;
  248. return;
  249. #endif /* CAREFUL */
  250. #else /* ---------------------- defined i386 --------------------- */
  251. #if !CAREFUL
  252. __asm
  253. {
  254. mov eax,info ; save info
  255. push esi ; save registers
  256. push edi
  257. push edx
  258. push ecx
  259. push ebx
  260. push ebp
  261. mov ebp,eax ; (ebp) = info
  262. mov ebx,[ebp].src.beg ; (ebx) = src
  263. mov edi,[ebp].dst.beg ; (edx) = dst
  264. xor esi,esi ; initialize Mask
  265. mov si,[ebx]
  266. shl esi,16
  267. mov si,[ebx+2]
  268. add ebx,4
  269. mov ch,16 ; (ch) = Bits = 16
  270. cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
  271. jae careful_next ; yes, be careful...
  272. cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
  273. jae careful_next ; yes, be careful...
  274. jmp LABEL (next)
  275. #endif /* CAREFUL */
  276. align 16
  277. LABEL(literal):
  278. #if CAREFUL
  279. cmp edi,[ebp].dst.stop ; decoded as much as asked?
  280. jae ret_ok ; done, but no EOF mark
  281. #endif
  282. mov edx,esi ; (edx) = Mask
  283. mov [edi],al ; store literal byte
  284. shr edx,32-DECODE_BITS
  285. inc edi ; (edi) = next output position
  286. movsx eax,word ptr [ebp+edx*2].table ; (eax) = respective decode table entry
  287. mov cl,15 ; (cl) = 15
  288. test eax,eax ; need further decoding? (= codelen > DECODE_BITS?)
  289. jl LABEL(long_code) ; yes, do it
  290. and cl,al ; (cl) = # of bits used in mask
  291. shr eax,4 ; (eax) = token
  292. shl esi,cl ; (esi) = resulting mask
  293. sub ch,cl ; (ch) = # of available bits left in dx
  294. jl LABEL(decode_more) ; if ch < 0 need to read more bits
  295. sub eax,256 ; (eax) = token - 256
  296. jl LABEL(literal) ; if < 0 then al = code of literal
  297. jmp LABEL(pointer) ; otherwise it's pointer
  298. LABEL(next):
  299. mov edx,esi ; (edx) = Mask
  300. mov cl,15 ; (cl) = 15
  301. shr edx,32-DECODE_BITS ; (edx) = DECODE_BITS most significant bits of Mask
  302. movsx eax,word ptr [ebp+edx*2].table ; (eax) = respective decode table entry
  303. test eax,eax ; need further decoding?
  304. jl LABEL(long_code) ; yes, continue
  305. and cl,al ; (cl) = # of bits used in mask
  306. shr eax,4 ; (eax) = current token
  307. shl esi,cl ; (esi) = resulting mask
  308. sub ch,cl ; (ch) = # of available bits left in dx
  309. jl LABEL(decode_more) ; if ch < 0 need to read more bits
  310. sub eax,256 ; (eax) = token - 256
  311. jl LABEL(literal) ; if < 0 then al = code of literal
  312. jmp LABEL(pointer) ; otherwise it's pointer
  313. LABEL(long_code):
  314. shl esi, DECODE_BITS ; DECODE_BITS were used; remove them
  315. LABEL(next_bit):
  316. add esi,esi ; Mask <<= 1 (and get carry)
  317. adc eax,0 ; eax += (old Mask < 0)
  318. movsx eax,word ptr [ebp+eax*2+0x10000].table ; (eax) = token
  319. test eax,eax ; need further decoding?
  320. jl LABEL(next_bit) ; yes, continue
  321. and cl,al ; (cl) = # of bits used in mask
  322. shr eax,4 ; (eax) = token
  323. sub ch,cl ; (ch) = # of available bits left in Mask
  324. jl LABEL(decode_more) ; if ch < 0 need to read more bits
  325. sub eax,256 ; (eax) = token - 256
  326. jl LABEL(literal) ; if < 0 then al = code of literal
  327. jmp LABEL(pointer) ; otherwise it's pointer
  328. LABEL(decode_more):
  329. #if !CAREFUL
  330. cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
  331. jae careful_decode_more ; yes, be careful...
  332. cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
  333. jae careful_decode_more ; yes, be careful...
  334. #else
  335. cmp ebx,[ebp].src.end_bitmask2 ; buffer overrun?
  336. jae LABEL(error_1) ; yes, error...
  337. #endif
  338. mov cl,ch ; (cl) = (# of have - # of used)
  339. xor edx,edx
  340. mov dx,[ebx] ; (edx) = next 16 bits
  341. neg cl ; (cl) = # unused bits in Mask
  342. add ebx,2 ; (ebx) = ptr to next token
  343. shl edx,cl ; (edx) = 16 aligned on required boundary
  344. add ch,16 ; (ch) = # of free bits in Mask
  345. add esi,edx ; (esi) = Mask + next 16 bits
  346. sub eax,256 ; (eax) = token - 256
  347. jl LABEL(literal) ; if < 0 then al = code of literal
  348. LABEL(pointer):
  349. #if CAREFUL
  350. cmp edi,[ebp].dst.stop ; reached end of buffer?
  351. jae ret_ok_eof ; yes, done, and probably EOF (check later)
  352. #endif
  353. mov cl,al ; prepare to obtain # of bits in offset
  354. mov edx,esi ; (edx) = mask
  355. shr cl,MAX_LENGTH_LOG ; (cl) = # of bits in offset
  356. or edx,1 ; set less significant bit
  357. shl esi,cl ; (esi) = (Mask << cl)
  358. sub ch,cl ; (ch) = # of bits left in mask
  359. ror edx,1 ; (edx) = (Mask >> 1) | 0x80000000
  360. xor cl,31 ; (cl) = 31 - (# of bits in mask)
  361. and eax,MAX_LENGTH-1 ; (eax) = length - MIN_MATCH
  362. shr edx,cl ; (edx) = (1 << #) + (Mask >> (32-#)) = offset
  363. push esi ; save mask
  364. neg edx ; (edx) = negative offset
  365. #if !CAREFUL && 8-MIN_MATCH < MAX_LENGTH-1
  366. cmp eax,8-MIN_MATCH ; length > 8?
  367. ja LABEL(long_string)
  368. lea esi, [edi+edx] ; esi = beginning of string
  369. cmp edx,-3 ; offset < 4?
  370. jae LABEL(copy_by_one) ; yes, copy byte by byte
  371. cmp esi, [ebp].dst.beg ; output buffer underrun?
  372. jb LABEL(error_pop_1) ; yes, corrupted data
  373. mov edx,[esi] ; get first 4 bytes
  374. mov [edi],edx ; store them
  375. mov edx,[esi+4] ; get next 4 byte
  376. mov [edi+4],edx ; store them
  377. pop esi ; restore mask
  378. lea edi,[edi+eax+MIN_MATCH] ; (edi) = next output location
  379. test ch,ch ; have enough bits in Mask?
  380. jge LABEL(next) ; yes, proceed further
  381. jmp LABEL(mask_more) ; no, need to read in more bits
  382. LABEL(copy_by_one):
  383. cmp esi, [ebp].dst.beg ; output buffer underrun?
  384. jb LABEL(error_pop_2) ; yes, corrupted data
  385. mov dl,[esi] ; copy 8 bytes by one
  386. mov [edi],dl ; NB: no readahead is allowed here
  387. mov dl,[esi+1] ; because source and destination
  388. mov [edi+1],dl ; may overlap
  389. mov dl,[esi+2]
  390. mov [edi+2],dl
  391. mov dl,[esi+3]
  392. mov [edi+3],dl
  393. mov dl,[esi+4]
  394. mov [edi+4],dl
  395. mov dl,[esi+5]
  396. mov [edi+5],dl
  397. mov dl,[esi+6]
  398. mov [edi+6],dl
  399. mov dl,[esi+7]
  400. mov [edi+7],dl
  401. pop esi ; restore mask
  402. lea edi,[edi+eax+MIN_MATCH] ; (edi) = next output location
  403. test ch,ch ; have enough bits in Mask?
  404. jge LABEL(next) ; yes, proceed further
  405. jmp LABEL(mask_more) ; no, need to read in more bits
  406. LABEL(long_string):
  407. #endif /* CAREFUL */
  408. cmp eax,MAX_LENGTH-1 ; long length?
  409. je LABEL(long_length) ; yes, decode it
  410. LABEL(long_length_done):
  411. lea esi,[edi+edx] ; (esi) = source pointer
  412. add eax,MIN_MATCH ; (edx) = length
  413. lea edx,[edi+eax] ; (eax) = last output position
  414. cmp esi,[ebp].dst.beg ; output buffer underrun?
  415. jb LABEL(error_pop_3) ; yes, corrupted data
  416. xchg eax,ecx ; (ecx) = length, (ah) = bit counter
  417. #if !CAREFUL
  418. cmp edx,[ebp].dst.careful ; too close to the end of buffer?
  419. jae careful_check_overrun ; yes, be careful
  420. #else
  421. careful_check_overrun:
  422. cmp edx,[ebp].dst.stop ; too much to output?
  423. jbe careful_no_overrun ; yes, adjust length
  424. sub edx,[ebp].dst.stop ; (edx) = excess
  425. sub ecx,edx ; (ecx) = exact length
  426. rep movsb ; copy bytes
  427. pop esi ; restore mask
  428. jmp ret_ok ; OK, but not EOF
  429. careful_no_overrun:
  430. #endif
  431. rep movsb ; copy bytes
  432. mov ch,ah ; restore byte counter
  433. pop esi ; restore Mask
  434. #if !CAREFUL
  435. cmp edi,[ebp].dst.careful ; too close to the end of input buffer?
  436. jae careful_copy ; yes, switch into careful mode
  437. #else
  438. careful_copy:
  439. #endif
  440. test ch,ch ; have enough bits in Mask?
  441. jge LABEL(next) ; yes, proceed further
  442. LABEL(mask_more):
  443. #if !CAREFUL
  444. cmp ebx,[ebp].src.careful ; too close to the end of src buffer?
  445. jae careful_mask_more ; yes, be careful...
  446. cmp edi,[ebp].dst.careful ; too close to the end of dst buffer?
  447. jae careful_mask_more ; yes, be careful...
  448. #else
  449. cmp ebx,[ebp].src.end_bitmask2 ; input buffer overrun?
  450. jae LABEL(error_2) ; yes, error...
  451. #endif
  452. mov cl,ch ; (cl) = (# of have - # of used)
  453. xor edx,edx
  454. mov dx,[ebx] ; (edx) = next 16 bits
  455. neg cl ; (cl) = # unused bits in Mask
  456. add ebx,2 ; (ebx) = ptr to next token
  457. shl edx,cl ; (edx) = 16 aligned on required boundary
  458. add ch,16 ; (ch) = # of free bits in Mask
  459. add esi,edx ; (esi) = Mask + next 16 bits
  460. jmp LABEL(next) ; decode next token
  461. LABEL(long_length):
  462. #if CAREFUL
  463. cmp ebx,[ebp].src.end ; input buffer overrun?
  464. jae LABEL(error_pop_4) ; yes, corrupted data
  465. #endif
  466. xor eax,eax
  467. mov al,[ebx] ; (eax) = next byte
  468. inc ebx ; (ebx) = ptr to next token
  469. cmp al,255 ; (eax) == 255?
  470. lea eax,[eax+MAX_LENGTH-1] ; (eax) = next byte + MAX_LENGTH-1
  471. jne LABEL(long_length_done) ; no, length decoded
  472. #if CAREFUL
  473. cmp ebx,[ebp].src.end_1 ; input buffer overrun?
  474. jae LABEL(error_pop_5) ; yes, corrupted data
  475. #endif
  476. xor eax,eax
  477. mov ax,[ebx] ; (eax) = next word
  478. add ebx,2 ; (ebx) = ptr to next token
  479. cmp ax,255+MAX_LENGTH-1 ; length should be long enough
  480. jae LABEL(long_length_done)
  481. jmp LABEL(error_3)
  482. #if CAREFUL
  483. #ifndef DEBUG_LABEL
  484. #if DEBUG
  485. #define DEBUG_LABEL(label) label: mov eax, eax
  486. #else
  487. #define DEBUG_LABEL(label) label:
  488. #endif /* DEBUG */
  489. #endif /* DEBUG_LABEL */
  490. DEBUG_LABEL(error_pop_1)
  491. DEBUG_LABEL(error_pop_2)
  492. DEBUG_LABEL(error_pop_3)
  493. DEBUG_LABEL(careful_error_pop_3)
  494. DEBUG_LABEL(careful_error_pop_4)
  495. DEBUG_LABEL(careful_error_pop_5)
  496. pop eax ; pop Mask saved on stack
  497. DEBUG_LABEL(careful_error_1)
  498. DEBUG_LABEL(careful_error_2)
  499. DEBUG_LABEL(error_3)
  500. DEBUG_LABEL(careful_error_3)
  501. xor eax,eax ; decode error: return 0
  502. jmp ret_common
  503. ret_ok_eof:
  504. cmp edi,[ebp].dst.end
  505. jne ret_ok
  506. test eax,eax
  507. jne ret_ok ; eof iff eax == 0
  508. mov eax,1
  509. mov [ebp].eof,eax
  510. ret_ok:
  511. mov eax,1 ; no [obvious] error: return 0
  512. ret_common:
  513. mov [ebp].result, eax ; store result
  514. mov [ebp].src.last,ebx ; save last value of source ptr
  515. mov [ebp].dst.last,edi ; save last value of destination ptr
  516. pop ebp ; restore registers we used
  517. pop ebx
  518. pop ecx
  519. pop edx
  520. pop edi
  521. pop esi ; and return
  522. } /* end of __asm */
  523. #endif /* CAREFUL */
  524. #endif /* i386 */
  525. #endif /* -------------------- CODING_HUFF_ALL ------------------ */
  526. /* ----------------------- CODING_DIRECT2 ------------------------ */
  527. /* -------------- */
  528. #if CODING == CODING_DIRECT2
  529. #ifndef i386
  530. // C code: 73 MB/s at P3-500; asm code 80.5 MB/s
  531. /*
  532. Pseudocode:
  533. ----------
  534. length = NextWord ();
  535. offset = length >> DIRECT2_LEN_LOG;
  536. length &= DIRECT2_MAX_LEN;
  537. if (length == DIRECT2_MAX_LEN)
  538. {
  539. length = NextQuad ();
  540. if (length == 15)
  541. {
  542. length = NextByte ();
  543. if (length == 255)
  544. length = NextWord () - 15 - DIRECT2_MAX_LEN;
  545. length += 15;
  546. }
  547. length += DIRECT2_MAX_LEN;
  548. }
  549. length += MIN_MATCH;
  550. ++offset;
  551. memcpy (dst, dst - offset, length);
  552. dst += length;
  553. */
  554. #if !CAREFUL
  555. tag_t bmask = 0;
  556. xint ofs, len;
  557. const uchar *ptr = 0;
  558. uchar *dst = info->dst.beg;
  559. const uchar *src = info->src.beg;
  560. goto start;
  561. #endif /* !CAREFUL */
  562. LABEL (copy_byte):
  563. CAREFUL_OK_IF (dst >= info->dst.stop);
  564. CAREFUL_ERR_IF (src >= info->src.end);
  565. *dst++ = *src++; // copy next byte
  566. LABEL (next):
  567. if (bmask >= 0) do // while MSB(bmask) == 0
  568. {
  569. bmask <<= 1;
  570. CAREFUL_OK_IF (dst >= info->dst.stop);
  571. CAREFUL_ERR_IF (src >= info->src.end);
  572. *dst++ = *src++; // copy next byte
  573. } while (bmask >= 0);
  574. if ((bmask <<= 1) == 0) // if bmask == 0 reload it
  575. {
  576. START;
  577. CAREFUL_IF (src >= info->src.careful || dst >= info->dst.careful, restart);
  578. CAREFUL_ERR_IF (src >= info->src.end_tag);
  579. bmask = * (__unaligned tag_t *) src;
  580. src += sizeof (tag_t);
  581. if (bmask >= 0)
  582. {
  583. bmask = (bmask << 1) + 1;
  584. goto LABEL (copy_byte);
  585. }
  586. bmask = (bmask << 1) + 1;
  587. }
  588. #if !CAREFUL
  589. assert (dst < info->dst.end - 8);
  590. #endif
  591. CAREFUL_EOF_IF (dst >= info->dst.stop);
  592. CAREFUL_ERR_IF (src >= info->src.end_1);
  593. ofs = * (__unaligned uint16 *) src;
  594. src += 2;
  595. len = ofs;
  596. ofs >>= DIRECT2_LEN_LOG;
  597. len &= DIRECT2_MAX_LEN;
  598. ofs = ~ofs;
  599. #if !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN)
  600. if (len <= 8 - MIN_MATCH)
  601. {
  602. const uchar *src1 = dst + ofs;
  603. #if defined (i386) || defined (i386compat) // unligned access is faster only on x86
  604. if (ofs < ~2)
  605. {
  606. if (src1 < info->dst.beg) RET_ERR; // check for buffer underrun
  607. ofs = ((__unaligned uint32 *) src1)[0]; // quickly copy 8 bytes
  608. ((__unaligned uint32 *) dst)[0] = ofs;
  609. ofs = ((__unaligned uint32 *) src1)[1];
  610. ((__unaligned uint32 *) dst)[1] = ofs;
  611. dst += len + MIN_MATCH; // dst = next output position
  612. goto LABEL (next); // decode next token
  613. }
  614. #endif
  615. if (src1 < info->dst.beg) RET_ERR; // check for buffer overrun
  616. COPY_8_BYTES (dst, src1);
  617. dst += len + MIN_MATCH;
  618. goto LABEL (next);
  619. }
  620. #endif
  621. if (len == DIRECT2_MAX_LEN) // decode long length
  622. {
  623. if (ptr == 0)
  624. {
  625. CAREFUL_ERR_IF (src >= info->src.end);
  626. ptr = src;
  627. len = *src++ & 15;
  628. }
  629. else
  630. {
  631. len = *ptr >> 4;
  632. ptr = 0;
  633. }
  634. if (len == 15)
  635. {
  636. CAREFUL_ERR_IF (src >= info->src.end);
  637. len = *src++;
  638. if (len == 255)
  639. {
  640. CAREFUL_ERR_IF (src >= info->src.end_1);
  641. len = * (__unaligned uint16 *) src;
  642. src += 2;
  643. if (len < 255 + 15 + DIRECT2_MAX_LEN) RET_ERR;
  644. len += MIN_MATCH;
  645. goto LABEL (done_len);
  646. }
  647. len += 15;
  648. }
  649. len += DIRECT2_MAX_LEN + MIN_MATCH;
  650. goto LABEL (done_len);
  651. }
  652. len += MIN_MATCH;
  653. LABEL (done_len):
  654. info->src.last = src;
  655. src = dst + ofs;
  656. #if !CAREFUL
  657. if (dst + len >= info->dst.careful)
  658. goto careful_copy_tail;
  659. #else
  660. careful_copy_tail:
  661. if (dst + len > info->dst.stop)
  662. {
  663. if (src < info->dst.beg) RET_ERR;
  664. len = (xint) (info->dst.stop - dst);
  665. assert (len >= 0);
  666. COPY_BLOCK_SLOW (dst, src, len);
  667. src = info->src.last;
  668. RET_OK;
  669. }
  670. #endif /* !CAREFUL */
  671. if (src < info->dst.beg) RET_ERR;
  672. COPY_BLOCK_SLOW (dst, src, len); // copy block
  673. src = info->src.last; // restore input buffer ptr
  674. goto LABEL (next);
  675. #if CAREFUL
  676. ret_ok_eof:
  677. if (dst == info->dst.end)
  678. info->eof = 1;
  679. ret_ok:
  680. info->src.last = src;
  681. info->dst.last = dst;
  682. info->result = 1;
  683. return;
  684. ret_err:
  685. info->result = 0;
  686. return;
  687. #endif /* CAREFUL */
  688. #else /* ------------------------- i386 ---------------------------- */
  689. #if !CAREFUL
  690. __asm
  691. {
  692. mov eax,info // save info
  693. push ebx // save registers
  694. push ecx
  695. push edx
  696. push esi
  697. push edi
  698. push ebp
  699. #define PTR dword ptr [esp]
  700. #define DST_STOP dword ptr [esp+4*1]
  701. #define DST_CAREFUL dword ptr [esp+4*2]
  702. #define SRC_CAREFUL dword ptr [esp+4*3]
  703. #define SRC_END dword ptr [esp+4*4]
  704. #define SRC_END_1 dword ptr [esp+4*5]
  705. #define SRC_END_TAG dword ptr [esp+4*6]
  706. #define INFO dword ptr [esp+4*7]
  707. #define LOCALS 8
  708. sub esp,4*LOCALS // make room for locals
  709. mov edx,[eax].dst.stop
  710. mov DST_STOP,edx
  711. mov edx,[eax].dst.careful
  712. mov DST_CAREFUL,edx
  713. mov edx,[eax].src.careful
  714. mov SRC_CAREFUL,edx
  715. mov edx,[eax].src.end
  716. mov SRC_END,edx
  717. mov edx,[eax].src.end_1
  718. mov SRC_END_1,edx
  719. mov edx,[eax].src.end_tag
  720. mov SRC_END_TAG,edx
  721. xor edx,edx // ptr = 0
  722. mov PTR,edx
  723. mov INFO,eax
  724. mov edx,[eax].dst.beg
  725. mov ebp,edx
  726. mov edi,[eax].dst.beg
  727. mov ebx,[eax].src.beg
  728. xor eax,eax // bmask = 0
  729. jmp start
  730. #endif /* !CAREFUL */
  731. align 16
  732. LABEL (literal_1):
  733. mov [edi],cl
  734. inc edi
  735. LABEL (literal):
  736. #if CAREFUL
  737. cmp edi,DST_STOP
  738. jae ret_ok // recoded everything?
  739. cmp ebx,SRC_END
  740. jae LABEL(ret_err_1)
  741. #endif
  742. mov cl,[ebx] // copy next byte
  743. add eax,eax // check most significant bit
  744. lea ebx, [ebx+1]
  745. jnc LABEL (literal_1)
  746. mov [edi],cl
  747. lea edi, [edi+1]
  748. jz LABEL (start) // need reloading?
  749. LABEL (pointer):
  750. #if CAREFUL
  751. cmp edi,DST_STOP // decoded all the stuff? -- done
  752. jae ret_ok_eof
  753. cmp ebx,SRC_END_1
  754. jae LABEL(ret_err_2)
  755. #endif
  756. movzx edx, word ptr [ebx]
  757. mov ecx,edx
  758. shr edx,DIRECT2_LEN_LOG
  759. add ebx,2
  760. not edx // edx = -offset
  761. and ecx,DIRECT2_MAX_LEN // ecx = length - MIN_LENGTH
  762. lea esi,[edi+edx]
  763. #if !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN)
  764. cmp cl,8 - MIN_MATCH // length > 8?
  765. ja LABEL (long_length)
  766. cmp esi,ebp // output buffer underrun?
  767. jb LABEL(ret_err_3)
  768. cmp edx,-3
  769. mov edx,[esi]
  770. jae LABEL (byte_by_byte)
  771. mov [edi],edx
  772. mov edx,[esi+4]
  773. mov [edi+4],edx
  774. lea edi,[edi+ecx+MIN_MATCH]
  775. add eax,eax
  776. jnc LABEL (literal)
  777. jnz LABEL (pointer)
  778. jmp LABEL (start)
  779. LABEL (byte_by_byte):
  780. add ecx,MIN_MATCH
  781. rep movsb
  782. add eax,eax
  783. jnc LABEL (literal)
  784. jnz LABEL (pointer)
  785. jmp LABEL (start)
  786. LABEL (long_length):
  787. #endif /* !CAREFUL && (8 - MIN_MATCH < DIRECT2_MAX_LEN) */
  788. cmp esi,ebp // output buffer underrun?
  789. jb LABEL(ret_err_4)
  790. mov edx,PTR
  791. cmp cl,DIRECT2_MAX_LEN
  792. jne LABEL (done_len)
  793. test edx,edx
  794. je LABEL (ptr_zero)
  795. movzx ecx, byte ptr [edx]
  796. xor edx,edx
  797. shr ecx,4
  798. jmp LABEL(done_quad)
  799. LABEL (ptr_zero):
  800. #if CAREFUL
  801. cmp ebx,SRC_END
  802. jae LABEL(ret_err_5)
  803. #endif
  804. movzx ecx, byte ptr [ebx]
  805. mov edx,ebx
  806. and ecx,15
  807. inc ebx
  808. LABEL(done_quad):
  809. mov PTR,edx
  810. cmp cl,15
  811. lea ecx,[ecx+DIRECT2_MAX_LEN]
  812. je LABEL(len255)
  813. LABEL(done_len):
  814. lea edx,[edi+ecx+MIN_MATCH] // edx = end of copy
  815. add ecx,MIN_MATCH
  816. #if !CAREFUL
  817. cmp edx,DST_CAREFUL // too close to end of buffer?
  818. jae careful_copy_tail
  819. #else
  820. careful_copy_tail:
  821. cmp edx,DST_STOP // ahead of output buffer?
  822. jbe LABEL (checked_eob)
  823. mov ecx,DST_STOP
  824. sub ecx,edi // ecx = corrected length
  825. rep movsb // copy substring
  826. jmp ret_ok // no errors, no EOF mark
  827. LABEL (checked_eob):
  828. #endif
  829. rep movsb // copy substring
  830. add eax,eax
  831. jnc LABEL (literal)
  832. jnz LABEL (pointer)
  833. align 16
  834. LABEL (start):
  835. #if !CAREFUL
  836. cmp ebx,SRC_CAREFUL // too close to end of buffer(s)?
  837. jae careful_start // be careful if so
  838. cmp edi,DST_CAREFUL
  839. jae careful_start
  840. #else
  841. cmp ebx,SRC_END_TAG // input buffer overrun? -- corrupted data
  842. jae LABEL(ret_err_6)
  843. #endif
  844. mov eax,[ebx]
  845. add ebx,4
  846. test eax,eax
  847. lea eax,[eax+eax+1]
  848. jns LABEL (literal)
  849. jmp LABEL (pointer)
  850. LABEL(len255):
  851. #if CAREFUL
  852. cmp ebx,SRC_END
  853. jae LABEL(ret_err_7)
  854. #endif
  855. movzx ecx, byte ptr [ebx]
  856. inc ebx
  857. cmp cl,255
  858. lea ecx,[ecx+15+DIRECT2_MAX_LEN]
  859. jne LABEL(done_len)
  860. #if CAREFUL
  861. cmp ebx,SRC_END_1
  862. jae LABEL(ret_err_7)
  863. #endif
  864. movzx ecx, word ptr [ebx]
  865. add ebx,2
  866. cmp ecx,255 + 15 + DIRECT2_MAX_LEN
  867. jae LABEL (done_len)
  868. #if CAREFUL
  869. #ifndef DEBUG_LABEL
  870. #if DEBUG
  871. #define DEBUG_LABEL(label) label: mov eax, eax
  872. #else
  873. #define DEBUG_LABEL(label) label:
  874. #endif /* DEBUG */
  875. #endif /* DEBUG_LABEL */
  876. DEBUG_LABEL(careful_ret_err_1)
  877. DEBUG_LABEL(careful_ret_err_2)
  878. DEBUG_LABEL(ret_err_3)
  879. DEBUG_LABEL(ret_err_4)
  880. DEBUG_LABEL(careful_ret_err_4)
  881. DEBUG_LABEL(careful_ret_err_5)
  882. DEBUG_LABEL(careful_ret_err_6)
  883. DEBUG_LABEL(careful_ret_err_7)
  884. xor eax,eax
  885. jmp ret_common
  886. ret_ok_eof:
  887. mov ecx,INFO
  888. mov eax,1
  889. cmp edi,[ecx].dst.end
  890. jne ret_ok
  891. mov [ecx].eof,eax
  892. ret_ok:
  893. mov eax,1
  894. mov ecx,INFO
  895. mov [ecx].src.last,ebx
  896. mov [ecx].dst.last,edi
  897. ret_common:
  898. MOV ecx,INFO
  899. mov [ecx].result,eax
  900. add esp,4*LOCALS
  901. pop ebp
  902. pop edi
  903. pop esi
  904. pop edx
  905. pop ecx
  906. pop ebx
  907. } /* __asm */
  908. #endif /* CAREFUL */
  909. #endif /* i386 */
  910. #endif /* ----------------- CODING == CODING_DIRECT2 --------------- */
  911. /* --------------------------- End of code ------------------------- */
  912. /* ----------- */
  913. #if CAREFUL
  914. } /* end of "do_decode" */
  915. #endif /* CAREFUL */
  916. #undef CAREFUL
  917. #undef LABEL
  918. #undef CAREFUL_LABEL
  919. #undef CAREFUL_OK_IF
  920. #undef CAREFUL_ERR_IF
  921. #undef CAREFUL_EOF_IF
  922. #undef CAREFUL_IF
  923. #undef START
  924. #undef FAST_COPY_DONE