Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

601 lines
19 KiB

  1. #if _MSC_FULL_VER >= 13008827 && defined(_M_IX86)
  2. #pragma warning(disable:4731) // EBP modified with inline asm
  3. #endif
  4. #if CHAIN >= 2
  5. INLINE void find_match (prs *p)
  6. {
  7. const uchar *p1;
  8. xint k, n, m;
  9. #if CHAIN >= 3
  10. xint chain = v.chain;
  11. #endif
  12. p->x.z_next[0] = (z_index_t) (k = n = v.orig.pos);
  13. do
  14. {
  15. m = p->x.z_next[k];
  16. {
  17. uint16 c = *(__unaligned uint16 *)((p1 = v.orig.ptr + v.match.len - 1) + n);
  18. #if CHAIN >= 3
  19. do
  20. {
  21. if (--chain < 0)
  22. return;
  23. #endif
  24. k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
  25. m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
  26. k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
  27. m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
  28. k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
  29. m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
  30. k = p->x.z_next[m]; if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
  31. m = p->x.z_next[k]; if (*(__unaligned uint16 *) (p1 + k) == c) goto same_k;
  32. #if CHAIN < 3
  33. if (*(__unaligned uint16 *) (p1 + m) == c) goto same_m;
  34. return;
  35. #else
  36. }
  37. while (1);
  38. #endif
  39. same_m:
  40. k = m;
  41. same_k:
  42. if (k == n)
  43. return;
  44. #if MAX_OFFSET < BUFF_SIZE_LOG
  45. if (n - k >= (1 << MAX_OFFSET))
  46. return;
  47. #endif
  48. }
  49. {
  50. const uchar *p2;
  51. p1 = v.orig.ptr;
  52. p2 = p1 + k;
  53. p1 += n;
  54. if ((m = *(__unaligned uint32 *)p2 ^ *(__unaligned uint32 *)p1) != 0)
  55. {
  56. #if MIN_MATCH <= 3
  57. if ((m & 0xffffff) == 0 && v.match.len <= 2 && p1 + 3 <= v.orig.end)
  58. {
  59. v.match.len = 3;
  60. v.match.pos = k;
  61. }
  62. #endif
  63. goto cont;
  64. }
  65. if (p1 <= v.orig.end_16)
  66. {
  67. goto entry4;
  68. do
  69. {
  70. #define X(i) if (p1[i] != p2[i]) {p1 += i; goto chk;}
  71. X(0); X(1); X(2); X(3);
  72. entry4:
  73. X(4); X(5); X(6); X(7); X(8);
  74. X(9); X(10); X(11); X(12); X(13); X(14); X(15);
  75. #undef X
  76. p1 += 16; p2 += 16;
  77. }
  78. while (p1 <= v.orig.end_16);
  79. }
  80. while (p1 != v.orig.end)
  81. {
  82. if (*p1 != *p2)
  83. goto chk;
  84. ++p1;
  85. ++p2;
  86. }
  87. #define SET_LENGTH() \
  88. n = -n; \
  89. n += (xint) (p1 - v.orig.ptr); \
  90. if (n > v.match.len) \
  91. { \
  92. v.match.len = n; \
  93. v.match.pos = k; \
  94. }
  95. SET_LENGTH ();
  96. return;
  97. }
  98. chk:
  99. SET_LENGTH ();
  100. cont:
  101. n = v.orig.pos;
  102. }
  103. while (CHAIN >= 3);
  104. }
  105. static void encode_pass1 (prs *p)
  106. {
  107. uchar *ptr = v.temp.ptr;
  108. do
  109. {
  110. if (p->x.z_next[v.orig.pos] == 0)
  111. goto literal;
  112. v.match.len = MIN_MATCH-1;
  113. find_match (p);
  114. if (v.match.len <= MIN_MATCH-1)
  115. {
  116. literal:
  117. write_lit (p, ptr, v.orig.ptr[v.orig.pos]);
  118. v.orig.pos += 1;
  119. }
  120. else
  121. {
  122. ptr = write_ptr (p, ptr, v.orig.pos - v.match.pos, v.match.len);
  123. v.orig.pos += v.match.len;
  124. }
  125. }
  126. while (v.orig.pos < v.orig.stop);
  127. v.temp.ptr = ptr;
  128. }
  129. #endif /* CHAIN >= 2 */
  130. #if CHAIN < 2
  131. #if CODING != CODING_DIRECT2 || !defined (i386)
  132. static void encode_pass1 (prs *p)
  133. {
  134. const uchar *b, *b1, *stop;
  135. uchar *ptr;
  136. #if CHAIN > 0
  137. xint pos = v.orig.pos;
  138. #endif
  139. b = v.orig.ptr;
  140. v.orig.ptr_stop = stop = b + v.orig.stop;
  141. b += v.orig.pos;
  142. ptr = v.temp.ptr;
  143. if (b != v.orig.ptr)
  144. goto literal_entry;
  145. for (;;)
  146. {
  147. do
  148. {
  149. #if MAX_OFFSET < BUFF_SIZE_LOG
  150. next:
  151. #endif
  152. write_lit (p, ptr, *b);
  153. ++b;
  154. #if CHAIN > 0
  155. ++pos;
  156. #endif
  157. literal_entry:
  158. if (b >= stop)
  159. goto ret;
  160. {
  161. uxint h;
  162. #if CHAIN <= 0
  163. h = Q_HASH_SUM (b);
  164. b1 = p->x.q_last[h];
  165. p->x.q_last[h] = b;
  166. #else
  167. assert (pos == b - v.orig.ptr);
  168. h = Z_HASH_SUM (b);
  169. b1 = v.orig.ptr + p->x.z_next[h];
  170. p->x.z_next[h] = (z_index_t) pos;
  171. #endif
  172. }
  173. #if MAX_OFFSET < BUFF_SIZE_LOG
  174. if (b1 <= b - (1 << MAX_OFFSET))
  175. goto next;
  176. #endif
  177. }
  178. while (b1 == 0 || b1[0] != b[0] || b1[1] != b[1] || b1[2] != b[2]);
  179. assert (v.orig.ptr + v.orig.size - b > 7);
  180. {
  181. const uchar *b0 = b;
  182. if (b <= v.orig.end_16)
  183. goto match_entry_3;
  184. goto match_careful;
  185. do
  186. {
  187. #define X(i) if (b1[i] != b[i]) {b += i; b1 += i; goto eval_len;}
  188. X(0); X(1); X(2);
  189. match_entry_3:
  190. X(3); X(4); X(5); X(6); X(7);
  191. X(8); X(9); X(10); X(11);
  192. X(12); X(13); X(14); X(15);
  193. #undef X
  194. b += 16; b1 += 16;
  195. }
  196. while (b <= v.orig.end_16);
  197. match_careful:
  198. while (b != v.orig.end && *b1 == *b)
  199. {
  200. ++b;
  201. ++b1;
  202. }
  203. eval_len:
  204. #if BUFF_SIZE_LOG > 16
  205. #error
  206. #endif
  207. ptr = write_ptr (p, ptr, (xint)(b - b1), (xint)(b - b0));
  208. b1 = b0;
  209. }
  210. ++b1;
  211. #if CHAIN > 0
  212. ++pos;
  213. #endif
  214. if (b > v.orig.end_3)
  215. {
  216. while (b1 < v.orig.end_3)
  217. {
  218. #if CHAIN <= 0
  219. p->x.q_last[Q_HASH_SUM (b1)] = b1;
  220. #else
  221. assert (pos == b1 - v.orig.ptr);
  222. p->x.z_next[Z_HASH_SUM (b1)] = (z_index_t) pos;
  223. ++pos;
  224. #endif
  225. ++b1;
  226. }
  227. goto literal_entry;
  228. }
  229. do
  230. {
  231. #if CHAIN <= 0
  232. p->x.q_last[Q_HASH_SUM (b1)] = b1;
  233. #else
  234. assert (pos == b1 - v.orig.ptr);
  235. p->x.z_next[Z_HASH_SUM (b1)] = (z_index_t) pos;
  236. ++pos;
  237. #endif
  238. ++b1;
  239. }
  240. while (b1 != b);
  241. goto literal_entry;
  242. }
  243. ret:
  244. v.orig.pos = (xint)(b - v.orig.ptr);
  245. v.temp.ptr = ptr;
  246. }
  247. #else /* CODING != CODING_DIRECT2 */
  248. static void encode_pass1 (prs *PrsPtr)
  249. {
  250. #define PRS edx
  251. #define TAG ebp
  252. #define TAGW bp
  253. // access to prs structure fields
  254. #define V [PRS - SIZE prs] prs.c
  255. // TAG = tag_mask; adjusts TAG (tag_mask), V.temp.tag_ptr, and ebx (output pointer)
  256. #define WRITE_TAG_MASK() \
  257. __asm mov ecx, V.temp.tag_ptr \
  258. __asm mov V.temp.tag_ptr, ebx \
  259. __asm add ebx, 4 \
  260. __asm mov [ecx], TAG \
  261. __asm mov TAG, 1
  262. #if CHAIN <= 0
  263. // access to respective hash table entry
  264. #define Q_HTABLE(idx) dword ptr [PRS + idx*4] prs.x.q_last
  265. // evaluate hash sum of [esi] on eax; spoils eax, ecx, TAG
  266. #define Q_HASH_SUM_ASM() \
  267. __asm movzx eax, byte ptr [esi] \
  268. __asm movzx ecx, byte ptr [esi+1] \
  269. __asm movzx edi, byte ptr [esi+2] \
  270. __asm lea ecx, [ecx + eax * (1 << (Q_HASH_SH1 - Q_HASH_SH2))] \
  271. __asm lea eax, [edi + ecx * (1 << Q_HASH_SH2)]
  272. #else
  273. // access to respective hash table entry
  274. #define Z_HTABLE(idx) word ptr [PRS + idx*2] prs.x.z_next
  275. // evaluate hash sum of [esi] on eax; spoils eax, ecx, edi
  276. #define Z_HASH_SUM_ASM() \
  277. __asm movzx eax, byte ptr [esi] \
  278. __asm movzx ecx, byte ptr [esi+1] \
  279. __asm movzx edi, byte ptr [esi+2] \
  280. __asm movzx eax, word ptr z_hash_map[eax*2] \
  281. __asm movzx ecx, word ptr z_hash_map[ecx*2][512] \
  282. __asm movzx edi, word ptr z_hash_map[edi*2][1024] \
  283. __asm xor eax, ecx \
  284. __asm xor eax, edi
  285. #endif /* CHAIN <= 0 */
  286. __asm
  287. {
  288. push ebp // save ebp
  289. mov PRS, PrsPtr // PRS = PrsPtr (globally)
  290. // esi = b
  291. // edi = b1
  292. // ebx = V.prs.temp.ptr
  293. // TAG = V.temp.tag_mask
  294. mov esi, V.orig.ptr // obtain b, b1, temp.ptr, and temp.mask
  295. mov eax, V.orig.stop
  296. add eax, esi
  297. mov V.orig.ptr_stop, eax // and set orig.ptr_stop by orig.stop
  298. add esi, V.orig.pos
  299. mov ebx, V.temp.ptr
  300. mov TAG, V.temp.tag_mask
  301. cmp esi, V.orig.ptr // if beginning of buffer
  302. jne write_literal_entry // then write literal immediately
  303. write_literal:
  304. mov al, [esi] // read the literal
  305. inc ebx // shift dst ptr in advance
  306. inc esi // shift src ptr to next character
  307. mov [ebx-1], al // emit literal
  308. add TAG, TAG // write tag bit 0
  309. jc write_literal_tag_new // save tag word if it is full
  310. write_literal_tag_done:
  311. write_literal_entry:
  312. cmp esi, V.orig.ptr_stop // processed everything?
  313. jae pass1_stop // yes, stop
  314. #if CHAIN <= 0
  315. Q_HASH_SUM_ASM () // evaluate hash sum
  316. #if MAX_OFFSET < BUFF_SIZE_LOG
  317. lea ecx, [esi - (1 << MAX_OFFSET) + 1] // min. allowed left bound
  318. #endif
  319. mov edi, Q_HTABLE (eax) // edi = candidate ptr
  320. mov Q_HTABLE (eax), esi // save current ptr
  321. #else
  322. Z_HASH_SUM_ASM () // evaluate hash sum
  323. mov ecx,V.orig.ptr
  324. movzx di, Z_HTABLE (eax) // edi = offset to candidate ptr
  325. sub esi, ecx // esi = offset to current ptr
  326. add edi, ecx // edi = candidate ptr
  327. mov Z_HTABLE (eax), si // store current ptr offset
  328. add esi, ecx // restore current ptr
  329. #if MAX_OFFSET < BUFF_SIZE_LOG
  330. lea ecx, [esi - (1 << MAX_OFFSET) + 1] // min. allowed left bound
  331. #endif
  332. #endif /* CHAIN <= 0 */
  333. #if MAX_OFFSET < BUFF_SIZE_LOG
  334. cmp edi, ecx // canidate is in window?
  335. js write_literal // no, then emit literal
  336. #endif
  337. test edi, edi // is it NULL?
  338. jz write_literal // emit literal if so
  339. mov eax, [esi] // get first 4 src bytes
  340. sub eax, [edi] // diff them with first 4 candidate bytes
  341. je length_4 // if no diff then match is at least 4 bytes
  342. test eax, 0xffffff // is there any difference in first 3 bytes?
  343. jne write_literal // if yes emit literal
  344. mov ecx, 3 // save match ptr of length ECX
  345. sub edi, esi // edi = -offset
  346. write_small_ptr:
  347. lea eax, [esi+ecx] // eax = end of src match
  348. not edi // edi = offset-1
  349. add ebx, 2 // adjust output ptr in advance
  350. shl edi, DIRECT2_LEN_LOG // make room for length
  351. inc esi // esi = next substring (current already inserted)
  352. lea edi, [edi + ecx - MIN_MATCH] // combine offset and shoft length
  353. stc // set carry bit
  354. mov [ebx-2], di // save packed pointer
  355. adc TAG, TAG // write tag bit 1
  356. jc write_pointer_tag_new // write tag word when it is full
  357. write_pointer_tag_done:
  358. cmp eax, V.orig.end_3 // is it too close to end of buffer?
  359. ja insert_tail // if yes process is specially avoiding read overrun
  360. #if CHAIN <= 0
  361. push TAG // save tag_mask
  362. mov TAG, eax // eax = end-of-match
  363. insert_all:
  364. Q_HASH_SUM_ASM () // evaluate hash sum
  365. mov Q_HTABLE (eax), esi // save current ptr
  366. inc esi // shift to next position
  367. cmp esi, TAG // inserted all substrings in the match?
  368. jne insert_all // continue until finished
  369. pop TAG // restore tag_mask value
  370. jmp write_literal_entry // process next substring
  371. #else
  372. push TAG // save tag_mask
  373. push eax // save end-of-match
  374. mov TAG, esi // TAG = current ptr
  375. sub TAG, V.orig.ptr // TAG = current ptr offset
  376. insert_all:
  377. Z_HASH_SUM_ASM () // evaluate hash sum
  378. mov Z_HTABLE (eax), TAGW // save current offset
  379. inc esi // shift to next position
  380. inc TAG // increase offset
  381. cmp esi, [esp] // inserted all substrings in the match?
  382. jne insert_all // continue until finished
  383. pop eax // remove end-of-match ptr from the stack
  384. pop TAG // restore tag_mask
  385. jmp write_literal_entry // process next substring
  386. #endif /* CHAIN <= 0 */
  387. length_4:
  388. #define KNOWN_LENGTH 4 // we know that first 4 bytes match
  389. #if DIRECT2_MAX_LEN + MIN_MATCH >= 8
  390. mov eax, [esi+4] // fetch next 4 bytes
  391. sub eax, [edi+4] // get the diff between src and candidate
  392. jz length_8 // do long compare if 8+ bytes match
  393. bsf ecx, eax // ecx = # of first non-zero bit
  394. sub edi, esi // edi = -offset
  395. shr ecx, 3 // ecx = # of first non-zero byte
  396. not edi // edi = offset-1
  397. add ecx, 4 // plus previous 4 matching bytes = match length
  398. add ebx, 2 // adjust output ptr in advance
  399. lea eax, [esi+ecx] // eax = end of src match
  400. shl edi, DIRECT2_LEN_LOG // make room for length
  401. inc esi // esi = next substring (current already inserted)
  402. lea edi,[edi+ecx-MIN_MATCH] // combine offset and shoft length
  403. stc // set carry bit
  404. mov [ebx-2], di // save packed pointer
  405. adc TAG, TAG // write tag bit 1
  406. jnc write_pointer_tag_done // write tag word when it is full
  407. WRITE_TAG_MASK ()
  408. jmp write_pointer_tag_done
  409. length_8:
  410. #undef KNOWN_LENGTH
  411. #define KNOWN_LENGTH 8 // we know that first 8 bytes match
  412. #endif /* DIRECT2_MAX_LEN + MIN_MATCH >= 8 */
  413. mov eax, esi // eax = beginning of the string
  414. mov ecx, V.orig.end // ecx = end of buffer
  415. add esi, KNOWN_LENGTH // shift to first untested src byte
  416. add edi, KNOWN_LENGTH // shift to first untested candidate
  417. sub ecx, esi // ecx = max compare length
  418. rep cmpsb // compare src and candidate
  419. je match_complete // if eq then match till end of buffer
  420. match_complete_done:
  421. lea ecx, [esi-1] // ecx = end of match
  422. sub edi, esi // edi = -offset
  423. sub ecx, eax // ecx = match length
  424. mov esi, eax // esi = src ptr
  425. cmp ecx, DIRECT2_MAX_LEN+MIN_MATCH // small length?
  426. jb write_small_ptr // write ptr if so
  427. not edi // edi = offset-1
  428. lea eax, [esi+ecx] // eax = end of match
  429. shl edi, DIRECT2_LEN_LOG // make room for length
  430. sub ecx, DIRECT2_MAX_LEN+MIN_MATCH // decrease the length
  431. add edi, DIRECT2_MAX_LEN // mark length as long
  432. push eax // save end of match
  433. mov [ebx], di // write packed pointer
  434. mov al, cl // al = (ecx <= 15 ? cl : 15)
  435. cmp ecx, 15
  436. jbe match_less_15
  437. mov al, 15
  438. match_less_15:
  439. mov edi, V.stat.ptr // edi = quad_ptr
  440. add ebx, 2 // wrote 2 bytes, move output ptr
  441. test edi, edi // if quad_ptr != NULL write upper 4 bits
  442. jne match_have_ptr
  443. mov V.stat.ptr, ebx // make new tag_ptr
  444. mov [ebx], al // write lower 4 bits
  445. inc ebx // wrote 1 byte, move output ptr
  446. jmp match_done_ptr // continue execution
  447. match_have_ptr:
  448. shl al, 4 // will write into upper 4 bits
  449. mov dword ptr V.stat.ptr, 0 // no more space in this quad_bit[0]
  450. or [edi], al // write upper 4 bits
  451. match_done_ptr:
  452. sub ecx, 15 // adjusted length < 15?
  453. jae match_long_long_length // if not continue encoding
  454. match_finish_2:
  455. inc esi // shift to next output position
  456. pop eax // restore eax = end-of-match
  457. stc // set carry flag
  458. adc TAG, TAG // write tag bit 1
  459. jnc write_pointer_tag_done // continue execution if do not need to flush
  460. write_pointer_tag_new: // write tag word and return to pointers
  461. WRITE_TAG_MASK ()
  462. jmp write_pointer_tag_done
  463. match_long_long_length:
  464. mov [ebx], cl // write the length as a byte
  465. inc ebx // move output ptr
  466. cmp ecx, 255 // adjusted length fits in byte?
  467. jb match_finish_2 // if so ptr is written
  468. add ecx, DIRECT2_MAX_LEN+15 // restore full length - MIN_MATCH
  469. mov byte ptr [ebx-1], 255 // mark byte length as "to be continued"
  470. mov [ebx], cx // write full length
  471. add ebx, 2 // move output ptr
  472. jmp match_finish_2
  473. write_literal_tag_new: // write tag word and return to literals
  474. WRITE_TAG_MASK ()
  475. jmp write_literal_tag_done
  476. match_complete: // cmpsb compared till end of buffer
  477. inc esi // increase esi
  478. inc edi // increase edi
  479. jmp match_complete_done // resume execution
  480. insert_tail:
  481. push eax // save end-of-match
  482. jmp insert_tail_1
  483. insert_tail_next:
  484. #if CHAIN <= 0
  485. Q_HASH_SUM_ASM () // evaluate hash sum
  486. mov Q_HTABLE (eax), esi // insert current src pointer
  487. #else
  488. Z_HASH_SUM_ASM () // evaluate hash sum
  489. mov ecx, esi
  490. sub ecx, V.orig.ptr // ecx = current ptr offset
  491. mov Z_HTABLE (eax), cx // save offset in hash table
  492. #endif /* CHAIN <= 0 */
  493. inc esi // and move it to next substring
  494. insert_tail_1: // end of match exceeds end_3 -- be careful
  495. cmp esi, V.orig.end_3 // inserted up to end_3?
  496. jb insert_tail_next // if not continue
  497. pop esi // esi = end of match
  498. jmp write_literal_entry
  499. pass1_stop:
  500. mov V.temp.ptr, ebx // save register variables
  501. mov V.temp.tag_mask, TAG
  502. sub esi, V.orig.ptr
  503. mov V.orig.pos, esi
  504. pop ebp // restore ebp
  505. } /* __asm */
  506. }
  507. #undef V
  508. #undef PRS
  509. #undef TAG
  510. #undef TAGW
  511. #undef Q_HTABLE
  512. #undef Q_HASH_SUM_ASM
  513. #undef Z_HTABLE
  514. #undef Z_HASH_SUM_ASM
  515. #undef WRITE_SMALL_PTR
  516. #undef KNOWN_LENGTH
  517. #endif /* CODING != CODING_DIRECT2 */
  518. #endif /* CHAIN < 2 */
  519. #undef CHAIN
  520. #undef find_match
  521. #undef encode_pass1