Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1468 lines
40 KiB

  1. /******************************Module*Header*******************************\
  2. * Module Name: srcblt16.cxx
  3. *
  4. * This contains the bitmap simulation functions that blt to a 16 bit/pel
  5. * DIB surface.
  6. *
  7. * Created: 07-Feb-1991 19:27:49
  8. * Author: Patrick Haluptzok patrickh
  9. *
  10. * Copyright (c) 1990-1999 Microsoft Corporation
  11. *
  12. \**************************************************************************/
  13. #include "precomp.hxx"
  14. // Turn off validations
  15. #if 1
  16. // On free builds, don't call any verification code:
  17. #define VERIFYS16D16(psb)
  18. #define VERIFYS24D16(psb)
  19. #define VERIFYS32D16(psb)
  20. #else
  21. // On checked builds, verify the RGB conversions:
  22. VOID VERIFYS16D16(PBLTINFO psb)
  23. {
  24. // We assume we are doing left to right top to bottom blting
  25. // If it was on the same surface it would be the identity case.
  26. ASSERTGDI(psb->xDir == 1, "vSrcCopyS16D16 - direction not left to right");
  27. ASSERTGDI(psb->yDir == 1, "vSrcCopyS16D16 - direction not up to down");
  28. // These are our holding variables
  29. PUSHORT pusSrcTemp;
  30. PUSHORT pusDstTemp;
  31. ULONG cxTemp;
  32. PUSHORT pusSrc = (PUSHORT) (psb->pjSrc + (2 * psb->xSrcStart));
  33. PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart));
  34. ULONG cx = psb->cx;
  35. ULONG cy = psb->cy;
  36. XLATE *pxlo = psb->pxlo;
  37. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  38. while(1)
  39. {
  40. pusSrcTemp = pusSrc;
  41. pusDstTemp = pusDst;
  42. cxTemp = cx;
  43. while(cxTemp--)
  44. {
  45. if (*(pusDstTemp++) != (USHORT) (pxlo->ulTranslate((ULONG) *(pusSrcTemp++))))
  46. RIP("RGB mis-match");
  47. }
  48. if (--cy)
  49. {
  50. pusSrc = (PUSHORT) (((PBYTE) pusSrc) + psb->lDeltaSrc);
  51. pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst);
  52. }
  53. else
  54. break;
  55. }
  56. }
  57. VOID VERIFYS24D16(PBLTINFO psb)
  58. {
  59. // We assume we are doing left to right top to bottom blting
  60. ASSERTGDI(psb->xDir == 1, "vSrcCopyS24D16 - direction not left to right");
  61. ASSERTGDI(psb->yDir == 1, "vSrcCopyS24D16 - direction not up to down");
  62. // These are our holding variables
  63. ULONG ulDink; // variable to dink around with the bytes in
  64. PBYTE pjSrcTemp;
  65. PUSHORT pusDstTemp;
  66. ULONG cxTemp;
  67. PBYTE pjSrc = psb->pjSrc + (3 * psb->xSrcStart);
  68. PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart));
  69. ULONG cx = psb->cx;
  70. ULONG cy = psb->cy;
  71. XLATE *pxlo = psb->pxlo;
  72. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  73. while(1)
  74. {
  75. pjSrcTemp = pjSrc;
  76. pusDstTemp = pusDst;
  77. cxTemp = cx;
  78. while(cxTemp--)
  79. {
  80. ulDink = *(pjSrcTemp + 2);
  81. ulDink = ulDink << 8;
  82. ulDink |= (ULONG) *(pjSrcTemp + 1);
  83. ulDink = ulDink << 8;
  84. ulDink |= (ULONG) *pjSrcTemp;
  85. if (*pusDstTemp != (USHORT) (pxlo->ulTranslate(ulDink)))
  86. RIP("RGB mis-match");
  87. pusDstTemp++;
  88. pjSrcTemp += 3;
  89. }
  90. if (--cy)
  91. {
  92. pjSrc += psb->lDeltaSrc;
  93. pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst);
  94. }
  95. else
  96. break;
  97. }
  98. }
  99. VOID VERIFYS32D16(PBLTINFO psb)
  100. {
  101. // We assume we are doing left to right top to bottom blting.
  102. ASSERTGDI(psb->xDir == 1, "vSrcCopyS32D16 - direction not left to right");
  103. ASSERTGDI(psb->yDir == 1, "vSrcCopyS32D16 - direction not up to down");
  104. // These are our holding variables
  105. PULONG pulSrcTemp;
  106. PUSHORT pusDstTemp;
  107. ULONG cxTemp;
  108. PULONG pulSrc = (PULONG) (psb->pjSrc + (4 * psb->xSrcStart));
  109. PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart));
  110. ULONG cx = psb->cx;
  111. ULONG cy = psb->cy;
  112. XLATE *pxlo = psb->pxlo;
  113. ULONG ulLastSrcPel;
  114. USHORT usLastDstPel;
  115. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  116. usLastDstPel = (USHORT) (pxlo->ulTranslate(ulLastSrcPel = *pulSrc));
  117. while(1)
  118. {
  119. pulSrcTemp = pulSrc;
  120. pusDstTemp = pusDst;
  121. cxTemp = cx;
  122. while(cxTemp--)
  123. {
  124. ULONG ulTemp;
  125. if ((ulTemp = *(pulSrcTemp)) != ulLastSrcPel)
  126. {
  127. ulLastSrcPel = ulTemp;
  128. usLastDstPel = (USHORT) (pxlo->ulTranslate(ulLastSrcPel));
  129. }
  130. if (*pusDstTemp++ != usLastDstPel)
  131. RIP("RGB mis-match");
  132. pulSrcTemp++;
  133. }
  134. if (--cy)
  135. {
  136. pulSrc = (PULONG) (((PBYTE) pulSrc) + psb->lDeltaSrc);
  137. pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst);
  138. }
  139. else
  140. break;
  141. }
  142. }
  143. #endif
  144. /*******************Public*Routine*****************\
  145. * vSrcCopyS1D16
  146. *
  147. * There are three main loops in this function.
  148. *
  149. * The first loop deals with the full byte part mapping
  150. * the Dst while fetching/shifting the matching 8 bits
  151. * from the Src.
  152. *
  153. * The second loop deals with the left starting
  154. * pixels.
  155. *
  156. * The third loop deals with the ending pixels.
  157. *
  158. * For the full bytes, we walk thru Src one byte at a time
  159. * and expand to Dst 8 words at a time. Dst is
  160. * DWORD aligned.
  161. *
  162. * We expand the starting/ending pixels one bit
  163. * at a time.
  164. *
  165. * History:
  166. * 17-Oct-1994 -by- Lingyun Wang [lingyunw]
  167. * Wrote it.
  168. *
  169. \**************************************************/
  170. VOID vSrcCopyS1D16(PBLTINFO psb)
  171. {
  172. BYTE jSrc; // holds a source byte
  173. INT iDst; // Position in the first 8 Dst words
  174. INT iSrc; // bit position in the first Src byte
  175. PBYTE pjDst; // pointer to the Src bytes
  176. PBYTE pjSrc; // pointer to the Dst bytes
  177. LONG xSrcEnd = psb->xSrcEnd;
  178. LONG cy; // number of rows
  179. LONG cx; // number of pixels
  180. BYTE alignL; // alignment bits to the left
  181. BYTE alignR; // alignment bits to the right
  182. LONG cibytes; //number of full 8 bytes dealed with
  183. BOOL bNextByte;
  184. LONG xDstEnd = psb->xDstStart+psb->cx;
  185. LONG lDeltaDst;
  186. LONG lDeltaSrc;
  187. USHORT ausTable[2];
  188. ULONG ulB = (ULONG)(psb->pxlo->pulXlate[0]);
  189. ULONG uF = (ULONG)(psb->pxlo->pulXlate[1]);
  190. USHORT usB = (USHORT)(psb->pxlo->pulXlate[0]);
  191. USHORT usF = (USHORT)(psb->pxlo->pulXlate[1]);
  192. ULONG aulTable[4];
  193. INT count;
  194. BOOL bNextSrc = TRUE;
  195. // We assume we are doing left to right top to bottom blting
  196. ASSERTGDI(psb->xDir == 1, "vSrcCopyS1D16 - direction not left to right");
  197. ASSERTGDI(psb->yDir == 1, "vSrcCopyS1D16 - direction not up to down");
  198. ASSERTGDI(psb->cy != 0, "ERROR: Src Move cy == 0");
  199. //DbgPrint ("vsrccopys1d16\n");
  200. // Generate aulTable. 4 entries.
  201. // Each 2 bits will be an index to the aulTable
  202. // which translates to a 32 bit ULONG
  203. ULONG ulValB = ulB;
  204. ULONG ulValF = uF;
  205. ulValB = (ulValB << 16) | ulValB;
  206. ulValF = (ulValF << 16) | ulValF;
  207. aulTable[0] = ulValB; //0 0
  208. aulTable[1] = (ulValF<<16) | (ulValB>>16); //1 0
  209. aulTable[2] = (ulValB<<16) | (ulValF>>16); //0 1
  210. aulTable[3] = ulValF ; //1 1
  211. // Generate ausTable.
  212. // Two entries. This table used when dealing
  213. // with begin and end parts.
  214. ausTable[0] = usB;
  215. ausTable[1] = usF;
  216. //Get Src and Dst start positions
  217. iSrc = psb->xSrcStart & 0x0007;
  218. iDst = psb->xDstStart & 0x0007;
  219. if (iSrc < iDst)
  220. alignL = 8 - (iDst - iSrc);
  221. else
  222. alignL = iSrc - iDst;
  223. alignR = 8 - alignL;
  224. cx=psb->cx;
  225. lDeltaDst = psb->lDeltaDst;
  226. lDeltaSrc = psb->lDeltaSrc;
  227. // if there is a next 8 words
  228. bNextByte = !((xDstEnd>>3) ==
  229. (psb->xDstStart>>3));
  230. // if Src and Dst are aligned, use a separete loop
  231. // to obtain better performance;
  232. // If not, we shift the Src bytes to match with
  233. // the Dst 8 bytes (2 dwords) one at a time
  234. if (bNextByte)
  235. {
  236. long iStrideSrc;
  237. long iStrideDst;
  238. PBYTE pjSrcEnd;
  239. // Get first Dst full 8 words
  240. pjDst = psb->pjDst + 2*((psb->xDstStart+7)&~0x07);
  241. // Get the Src byte that matches the first Dst
  242. // full 8 bytes
  243. pjSrc = psb->pjSrc + ((psb->xSrcStart+((8-iDst)&0x07)) >> 3);
  244. //Get the number of full 8 words
  245. cibytes = (xDstEnd>>3)-((psb->xDstStart+7)>>3);
  246. //the increment to the full byte on the next scan line
  247. iStrideDst = lDeltaDst - cibytes*16;
  248. iStrideSrc = lDeltaSrc - cibytes;
  249. // deal with our special case
  250. cy = psb->cy;
  251. if (!alignL)
  252. {
  253. while (cy--)
  254. {
  255. pjSrcEnd = pjSrc + cibytes;
  256. while (pjSrc != pjSrcEnd)
  257. {
  258. jSrc = *pjSrc++;
  259. *(PULONG) (pjDst + 0) = aulTable[(jSrc >> 6) & 0x03];
  260. *(PULONG) (pjDst + 4) = aulTable[(jSrc >> 4) & 0x03];
  261. *(PULONG) (pjDst + 8) = aulTable[(jSrc >> 2)& 0x03];
  262. *(PULONG) (pjDst + 12) = aulTable[jSrc & 0x03];
  263. pjDst +=16;
  264. }
  265. pjDst += iStrideDst;
  266. pjSrc += iStrideSrc;
  267. }
  268. } //end of if (!alignL)
  269. // Here comes our general case for the main full
  270. // bytes part
  271. else // if not aligned
  272. {
  273. BYTE jRem; //remainder
  274. while (cy--)
  275. {
  276. jRem = *pjSrc << alignL;
  277. pjSrcEnd = pjSrc + cibytes;
  278. while (pjSrc != pjSrcEnd)
  279. {
  280. jSrc = ((*(++pjSrc))>>alignR) | jRem;
  281. *(PULONG) (pjDst + 0) = aulTable[(jSrc >> 6) & 0x03];
  282. *(PULONG) (pjDst + 4) = aulTable[(jSrc >> 4) & 0x03];
  283. *(PULONG) (pjDst + 8) = aulTable[(jSrc >> 2)& 0x03];
  284. *(PULONG) (pjDst + 12) = aulTable[jSrc & 0x03];
  285. pjDst +=16;
  286. //next remainder
  287. jRem = *pjSrc << alignL;
  288. }
  289. // go to the beginging full byte of
  290. // next scan line
  291. pjDst += iStrideDst;
  292. pjSrc += iStrideSrc;
  293. }
  294. } //else
  295. } //if
  296. // End of our dealing with the full bytes
  297. //Deal with the starting pixels
  298. if (!bNextByte)
  299. {
  300. count = cx;
  301. bNextSrc = ((iSrc+cx) > 8);
  302. }
  303. else
  304. count = 8-iDst;
  305. if (iDst | !bNextByte)
  306. {
  307. PBYTE pjDstTemp;
  308. PBYTE pjDstEnd;
  309. pjDst = psb->pjDst + 2*psb->xDstStart;
  310. pjSrc = psb->pjSrc + (psb->xSrcStart>>3);
  311. cy = psb->cy;
  312. if (iSrc > iDst)
  313. {
  314. if (bNextSrc)
  315. {
  316. while (cy--)
  317. {
  318. jSrc = *pjSrc << alignL;
  319. jSrc |= *(pjSrc+1) >> alignR;
  320. jSrc <<= iDst;
  321. pjDstTemp = pjDst;
  322. pjDstEnd = pjDst + count*2;
  323. while (pjDstTemp != pjDstEnd)
  324. {
  325. *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
  326. jSrc <<= 1;
  327. pjDstTemp += 2;
  328. }
  329. pjDst += lDeltaDst;
  330. pjSrc += lDeltaSrc;
  331. }
  332. }
  333. else
  334. {
  335. while (cy--)
  336. {
  337. jSrc = *pjSrc << alignL;
  338. jSrc <<= iDst;
  339. pjDstTemp = pjDst;
  340. pjDstEnd = pjDst + count*2;
  341. while (pjDstTemp != pjDstEnd)
  342. {
  343. *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
  344. jSrc <<= 1;
  345. pjDstTemp += 2;
  346. }
  347. pjDst += lDeltaDst;
  348. pjSrc += lDeltaSrc;
  349. }
  350. }
  351. }
  352. else //if (iSrc < iDst)
  353. {
  354. while (cy--)
  355. {
  356. jSrc = *pjSrc << iSrc;
  357. pjDstTemp = pjDst;
  358. pjDstEnd = pjDst + 2*count;
  359. while (pjDstTemp != pjDstEnd)
  360. {
  361. *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
  362. jSrc <<= 1;
  363. pjDstTemp += 2;
  364. }
  365. pjDst += lDeltaDst;
  366. pjSrc += lDeltaSrc;
  367. }
  368. }
  369. } //if
  370. // Deal with the ending pixels
  371. if ((xDstEnd & 0x0007)
  372. && bNextByte)
  373. {
  374. PBYTE pjDstTemp;
  375. PBYTE pjDstEnd;
  376. // Get the last partial bytes on the
  377. // scan line
  378. pjDst = psb->pjDst+2*(xDstEnd&~0x07);
  379. // Get the Src byte that matches the
  380. // right partial Dst 8 bytes
  381. pjSrc = psb->pjSrc + ((psb->xSrcEnd-1) >>3);
  382. // Get the ending position in the last
  383. // Src and Dst bytes
  384. iSrc = (psb->xSrcEnd-1) & 0x0007;
  385. iDst = (xDstEnd-1) & 0x0007;
  386. count = iDst+1;
  387. cy = psb->cy;
  388. if (iSrc >= iDst)
  389. {
  390. while (cy--)
  391. {
  392. jSrc = *pjSrc << alignL;
  393. pjDstTemp = pjDst;
  394. pjDstEnd = pjDst + 2*count;
  395. while (pjDstTemp != pjDstEnd)
  396. {
  397. *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
  398. jSrc <<= 1;
  399. pjDstTemp += 2;
  400. }
  401. pjDst += lDeltaDst;
  402. pjSrc += lDeltaSrc;
  403. }
  404. }
  405. else if (iSrc < iDst)
  406. {
  407. while (cy--)
  408. {
  409. jSrc = *(pjSrc-1) << alignL;
  410. jSrc |= *pjSrc >> alignR;
  411. pjDstTemp = pjDst;
  412. pjDstEnd = pjDst + 2*count;
  413. while (pjDstTemp != pjDstEnd)
  414. {
  415. *(PUSHORT) pjDstTemp = ausTable[(jSrc&0x80)>>7];
  416. jSrc <<= 1;
  417. pjDstTemp += 2;
  418. }
  419. pjDst += lDeltaDst;
  420. pjSrc += lDeltaSrc;
  421. }
  422. }
  423. } //if
  424. }
  425. /******************************Public*Routine******************************\
  426. * vSrcCopyS4D16
  427. *
  428. *
  429. * History:
  430. * 06-Feb-1991 -by- Patrick Haluptzok patrickh
  431. * Wrote it.
  432. \**************************************************************************/
  433. VOID vSrcCopyS4D16(PBLTINFO psb)
  434. {
  435. // We assume we are doing left to right top to bottom blting
  436. ASSERTGDI(psb->xDir == 1, "vSrcCopyS4D16 - direction not left to right");
  437. ASSERTGDI(psb->yDir == 1, "vSrcCopyS4D16 - direction not up to down");
  438. BYTE jSrc;
  439. LONG i;
  440. PUSHORT pusDst;
  441. PBYTE pjSrc;
  442. PUSHORT pusDstHolder = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart));
  443. PBYTE pjSrcHolder = psb->pjSrc + (psb->xSrcStart >> 1);
  444. ULONG cy = psb->cy;
  445. XLATE *pxlo = psb->pxlo;
  446. PULONG pulXlate = psb->pxlo->pulXlate;
  447. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  448. while(1)
  449. {
  450. pusDst = pusDstHolder;
  451. pjSrc = pjSrcHolder;
  452. i = psb->xSrcStart;
  453. if (i & 0x00000001)
  454. jSrc = *(pjSrc++);
  455. while(i != psb->xSrcEnd)
  456. {
  457. if (i & 0x00000001)
  458. *(pusDst++) = (USHORT) pulXlate[jSrc & 0x0F];
  459. else
  460. {
  461. // We need a new byte
  462. jSrc = *(pjSrc++);
  463. *(pusDst++) = (USHORT) pulXlate[((ULONG) (jSrc & 0xF0)) >> 4];
  464. }
  465. ++i;
  466. }
  467. if (--cy)
  468. {
  469. pjSrcHolder += psb->lDeltaSrc;
  470. pusDstHolder = (PUSHORT) (((PBYTE) pusDstHolder) + psb->lDeltaDst);
  471. }
  472. else
  473. break;
  474. }
  475. }
  476. /******************************Public*Routine******************************\
  477. * vSrcCopyS8D16
  478. *
  479. *
  480. * History:
  481. * 06-Feb-1991 -by- Patrick Haluptzok patrickh
  482. * Wrote it.
  483. \**************************************************************************/
  484. VOID vSrcCopyS8D16(PBLTINFO psb)
  485. {
  486. // We assume we are doing left to right top to bottom blting
  487. ASSERTGDI(psb->xDir == 1, "vSrcCopyS8D16 - direction not left to right");
  488. ASSERTGDI(psb->yDir == 1, "vSrcCopyS8D16 - direction not up to down");
  489. // These are our holding variables
  490. PBYTE pjSrc = psb->pjSrc + psb->xSrcStart;
  491. PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart);
  492. LONG cx = psb->cx;
  493. LONG cy = psb->cy;
  494. XLATE *pxlo = psb->pxlo;
  495. PULONG pulXlate = psb->pxlo->pulXlate;
  496. LONG lSrcSkip = psb->lDeltaSrc - cx;
  497. LONG lDstSkip = psb->lDeltaDst - (cx * 2);
  498. LONG i;
  499. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  500. while(1)
  501. {
  502. i = cx;
  503. // Get 'dword' alignment on the destination:
  504. if (((ULONG_PTR) pjDst) & 2)
  505. {
  506. *((USHORT*) pjDst) = (USHORT) pulXlate[*pjSrc];
  507. pjDst += 2;
  508. pjSrc += 1;
  509. i--;
  510. }
  511. // Now write pixels a dword at a time. This is almost a 2x win
  512. // over doing word writes if we're writing to frame buffer memory
  513. // over the PCI bus on Pentium class systems, because the PCI
  514. // write throughput is so slow:
  515. while(1)
  516. {
  517. i -=2;
  518. if (i < 0)
  519. break;
  520. *((ULONG*) pjDst) = (pulXlate[*(pjSrc)])
  521. | (pulXlate[*(pjSrc + 1)] << 16);
  522. pjDst += 4;
  523. pjSrc += 2;
  524. }
  525. // Take care of the end alignment:
  526. if (i & 1)
  527. {
  528. *((USHORT*) pjDst) = (USHORT) pulXlate[*pjSrc];
  529. pjDst += 2;
  530. pjSrc += 1;
  531. }
  532. if (--cy == 0)
  533. break;
  534. pjSrc += lSrcSkip;
  535. pjDst += lDstSkip;
  536. }
  537. }
  538. /******************************Public*Routine******************************\
  539. * vSrcCopyS16D16
  540. *
  541. *
  542. * History:
  543. * 07-Feb-1991 -by- Patrick Haluptzok patrickh
  544. * Wrote it.
  545. \**************************************************************************/
  546. VOID vSrcCopyS16D16(PBLTINFO psb)
  547. {
  548. // We assume we are doing left to right top to bottom blting
  549. // If it was on the same surface it would be the identity case.
  550. ASSERTGDI(psb->xDir == 1, "vSrcCopyS16D16 - direction not left to right");
  551. ASSERTGDI(psb->yDir == 1, "vSrcCopyS16D16 - direction not up to down");
  552. // These are our holding variables
  553. PBYTE pjSrc = psb->pjSrc + (2 * psb->xSrcStart);
  554. PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart);
  555. ULONG cx = psb->cx;
  556. ULONG cy = psb->cy;
  557. XLATE *pxlo = psb->pxlo;
  558. XEPALOBJ palSrc(pxlo->ppalSrc);
  559. XEPALOBJ palDst(pxlo->ppalDst);
  560. LONG lSrcSkip = psb->lDeltaSrc - (cx * 2);
  561. LONG lDstSkip = psb->lDeltaDst - (cx * 2);
  562. PFN_pfnXlate pfnXlate;
  563. LONG i;
  564. USHORT us;
  565. ULONG ul;
  566. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  567. // Optimize 5-5-5 to 5-6-5.
  568. if (palSrc.bIs555() && palDst.bIs565())
  569. {
  570. while (1)
  571. {
  572. i = cx;
  573. if (((ULONG_PTR) pjDst) & 2)
  574. {
  575. us = *((USHORT*) pjSrc);
  576. *((USHORT*) pjDst) = ((us) & 0x001f)
  577. | ((us << 1) & 0xffc0)
  578. | ((us >> 4) & 0x0020);
  579. pjDst += 2;
  580. pjSrc += 2;
  581. i--;
  582. }
  583. while(1)
  584. {
  585. i -=2;
  586. if (i < 0)
  587. break;
  588. ul = *(UNALIGNED_DWORD_POINTER(pjSrc));
  589. *((ULONG*) pjDst) = ((ul) & 0x001f001f)
  590. | ((ul << 1) & 0xffc0ffc0)
  591. | ((ul >> 4) & 0x00200020);
  592. pjDst += 4;
  593. pjSrc += 4;
  594. }
  595. if (i & 1)
  596. {
  597. us = *((USHORT*) pjSrc);
  598. *((USHORT*) pjDst) = ((us) & 0x001f)
  599. | ((us << 1) & 0xffc0)
  600. | ((us >> 4) & 0x0020);
  601. pjDst += 2;
  602. pjSrc += 2;
  603. }
  604. if (--cy == 0)
  605. break;
  606. pjSrc += lSrcSkip;
  607. pjDst += lDstSkip;
  608. }
  609. VERIFYS16D16(psb);
  610. return;
  611. }
  612. // Optimize 5-6-5 to 5-5-5.
  613. if (palSrc.bIs565() && palDst.bIs555())
  614. {
  615. while (1)
  616. {
  617. i = cx;
  618. if (((ULONG_PTR) pjDst) & 2)
  619. {
  620. us = *((USHORT*) pjSrc);
  621. *((USHORT*) pjDst) = ((us) & 0x001f)
  622. | ((us >> 1) & 0x7fe0);
  623. pjDst += 2;
  624. pjSrc += 2;
  625. i--;
  626. }
  627. while(1)
  628. {
  629. i -=2;
  630. if (i < 0)
  631. break;
  632. ul = *(UNALIGNED_DWORD_POINTER(pjSrc));
  633. *((ULONG*) pjDst) = ((ul) & 0x001f001f)
  634. | ((ul >> 1) & 0x7fe07fe0);
  635. pjDst += 4;
  636. pjSrc += 4;
  637. }
  638. if (i & 1)
  639. {
  640. us = *((USHORT*) pjSrc);
  641. *((USHORT*) pjDst) = ((us) & 0x001f)
  642. | ((us >> 1) & 0x7fe0);
  643. pjDst += 2;
  644. pjSrc += 2;
  645. }
  646. if (--cy == 0)
  647. break;
  648. pjSrc += lSrcSkip;
  649. pjDst += lDstSkip;
  650. }
  651. VERIFYS16D16(psb);
  652. return;
  653. }
  654. // Finally, fall back to the generic case:
  655. pfnXlate = pxlo->pfnXlateBetweenBitfields();
  656. while (1)
  657. {
  658. i = cx;
  659. do {
  660. *((USHORT*) pjDst) = (USHORT) pfnXlate(pxlo, *((USHORT*) pjSrc));
  661. pjDst += 2;
  662. pjSrc += 2;
  663. } while (--i != 0);
  664. if (--cy == 0)
  665. break;
  666. pjSrc += lSrcSkip;
  667. pjDst += lDstSkip;
  668. }
  669. VERIFYS16D16(psb);
  670. }
  671. /******************************Public*Routine******************************\
  672. * vSrcCopyS16D16Identity
  673. *
  674. * This is the special case no translate blting. All the SmDn should have
  675. * them if m==n. Identity xlates only occur amoung matching format bitmaps.
  676. *
  677. * History:
  678. * 06-Feb-1991 -by- Patrick Haluptzok patrickh
  679. * Wrote it.
  680. \**************************************************************************/
  681. VOID vSrcCopyS16D16Identity(PBLTINFO psb)
  682. {
  683. // These are our holding variables
  684. PUSHORT pusSrc = (PUSHORT) (psb->pjSrc + (2 * psb->xSrcStart));
  685. PUSHORT pusDst = (PUSHORT) (psb->pjDst + (2 * psb->xDstStart));
  686. ULONG cx = psb->cx;
  687. ULONG cy = psb->cy;
  688. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  689. if (psb->xDir < 0)
  690. {
  691. pusSrc -= (cx - 1);
  692. pusDst -= (cx - 1);
  693. }
  694. cx = cx << 1;
  695. while(1)
  696. {
  697. if(psb->fSrcAlignedRd)
  698. vSrcAlignCopyMemory((PBYTE)pusDst,(PBYTE)pusSrc,cx);
  699. else
  700. RtlMoveMemory((PVOID)pusDst, (PVOID)pusSrc, cx);
  701. if (--cy)
  702. {
  703. pusSrc = (PUSHORT) (((PBYTE) pusSrc) + psb->lDeltaSrc);
  704. pusDst = (PUSHORT) (((PBYTE) pusDst) + psb->lDeltaDst);
  705. }
  706. else
  707. break;
  708. }
  709. }
  710. /******************************Public*Routine******************************\
  711. * vSrcCopyS24D16
  712. *
  713. *
  714. * History:
  715. * 06-Feb-1991 -by- Patrick Haluptzok patrickh
  716. * Wrote it.
  717. \**************************************************************************/
  718. VOID vSrcCopyS24D16(PBLTINFO psb)
  719. {
  720. // We assume we are doing left to right top to bottom blting
  721. ASSERTGDI(psb->xDir == 1, "vSrcCopyS24D16 - direction not left to right");
  722. ASSERTGDI(psb->yDir == 1, "vSrcCopyS24D16 - direction not up to down");
  723. // These are our holding variables
  724. PBYTE pjSrc = psb->pjSrc + (3 * psb->xSrcStart);
  725. PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart);
  726. ULONG cx = psb->cx;
  727. ULONG cy = psb->cy;
  728. LONG lSrcSkip = psb->lDeltaSrc - (cx * 3);
  729. LONG lDstSkip = psb->lDeltaDst - (cx * 2);
  730. XLATE *pxlo = psb->pxlo;
  731. XEPALOBJ palSrc(pxlo->ppalSrc);
  732. XEPALOBJ palDst(pxlo->ppalDst);
  733. PFN_pfnXlate pfnXlate;
  734. ULONG ul;
  735. ULONG ul0;
  736. ULONG ul1;
  737. LONG i;
  738. ASSERTGDI(cy != 0,
  739. "ERROR: Src Move cy == 0");
  740. ASSERTGDI(((pxlo->flXlate & (XO_TABLE | XO_TO_MONO)) == 0)
  741. && ((pxlo->flPrivate & XLATE_PAL_MANAGED) == 0),
  742. "ERROR: flXlate != 0 or flPrivate != 0");
  743. ASSERTGDI(((XEPALOBJ) pxlo->ppalDst).cEntries() == 0,
  744. "ERROR: cEntries != 0");
  745. ASSERTGDI(palDst.bIsBitfields(),
  746. "ERROR: destination not bitfields");
  747. if (palSrc.bIsBGR())
  748. {
  749. // First, try to optimize BGR to 5-6-5:
  750. if (palDst.bIs565())
  751. {
  752. while (1)
  753. {
  754. i = cx;
  755. if (((ULONG_PTR) pjDst) & 2)
  756. {
  757. ul = ((*(pjSrc) >> 3))
  758. | ((*(pjSrc + 1) << 3) & 0x07e0)
  759. | ((*(pjSrc + 2) << 8) & 0xf800);
  760. *((USHORT*) pjDst) = (USHORT) ul;
  761. pjDst += 2;
  762. pjSrc += 3;
  763. i--;
  764. }
  765. #if defined(_X86_)
  766. _asm {
  767. mov esi, pjSrc
  768. mov edi, pjDst
  769. sub i, 2
  770. js Done_565_Loop
  771. Middle_565_Loop:
  772. movzx eax, byte ptr [esi]
  773. movzx ebx, byte ptr [esi+1]
  774. shr eax, 3
  775. shl ebx, 3
  776. movzx edx, byte ptr [esi+2]
  777. movzx ecx, byte ptr [esi+3]
  778. shl edx, 8
  779. shl ecx, 13
  780. or eax, edx
  781. or ebx, ecx
  782. movzx edx, byte ptr [esi+4]
  783. movzx ecx, byte ptr [esi+5]
  784. shl edx, 19
  785. shl ecx, 24
  786. or eax, edx
  787. or ebx, ecx
  788. and eax, 0x07e0f81f
  789. and ebx, 0xf81f07e0
  790. or eax, ebx
  791. add esi, 6
  792. mov [edi], eax
  793. add edi, 4
  794. sub i, 2
  795. jns Middle_565_Loop
  796. Done_565_Loop:
  797. mov pjSrc, esi
  798. mov pjDst, edi
  799. }
  800. #else
  801. while (1)
  802. {
  803. i -= 2;
  804. if (i < 0)
  805. break;
  806. ul0 = (*(pjSrc) >> 3)
  807. | (*(pjSrc + 2) << 8)
  808. | (*(pjSrc + 4) << 19);
  809. ul1 = (*(pjSrc + 1) << 3)
  810. | (*(pjSrc + 3) << 13)
  811. | (*(pjSrc + 5) << 24);
  812. *((ULONG*) pjDst) = (ul0 & 0x07e0f81f)
  813. | (ul1 & 0xf81f07e0);
  814. pjDst += 4;
  815. pjSrc += 6;
  816. }
  817. #endif
  818. if (i & 1)
  819. {
  820. ul = ((*(pjSrc) >> 3))
  821. | ((*(pjSrc + 1) << 3) & 0x07e0)
  822. | ((*(pjSrc + 2) << 8) & 0xf800);
  823. *((USHORT*) pjDst) = (USHORT) ul;
  824. pjDst += 2;
  825. pjSrc += 3;
  826. }
  827. if (--cy == 0)
  828. break;
  829. pjSrc += lSrcSkip;
  830. pjDst += lDstSkip;
  831. }
  832. VERIFYS24D16(psb);
  833. return;
  834. }
  835. // Next, try to optimize BGR to 5-5-5:
  836. if (palDst.bIs555())
  837. {
  838. while (1)
  839. {
  840. i = cx;
  841. if (((ULONG_PTR) pjDst) & 2)
  842. {
  843. ul = ((*(pjSrc) >> 3))
  844. | ((*(pjSrc + 1) << 2) & 0x03e0)
  845. | ((*(pjSrc + 2) << 7) & 0x7c00);
  846. *((USHORT*) pjDst) = (USHORT) ul;
  847. pjDst += 2;
  848. pjSrc += 3;
  849. i--;
  850. }
  851. #if defined(_X86_)
  852. _asm {
  853. mov esi, pjSrc
  854. mov edi, pjDst
  855. sub i, 2
  856. js Done_555_Loop
  857. Middle_555_Loop:
  858. movzx eax, byte ptr [esi]
  859. movzx ebx, byte ptr [esi+1]
  860. shr eax, 3
  861. shl ebx, 2
  862. movzx edx, byte ptr [esi+2]
  863. movzx ecx, byte ptr [esi+3]
  864. shl edx, 7
  865. shl ecx, 13
  866. or eax, edx
  867. or ebx, ecx
  868. movzx edx, byte ptr [esi+4]
  869. movzx ecx, byte ptr [esi+5]
  870. shl edx, 18
  871. shl ecx, 23
  872. or eax, edx
  873. or ebx, ecx
  874. and eax, 0x03e07c1f
  875. and ebx, 0x7c1f03e0
  876. or eax, ebx
  877. add esi, 6
  878. mov [edi], eax
  879. add edi, 4
  880. sub i, 2
  881. jns Middle_555_Loop
  882. Done_555_Loop:
  883. mov pjSrc, esi
  884. mov pjDst, edi
  885. }
  886. #else
  887. while (1)
  888. {
  889. i -= 2;
  890. if (i < 0)
  891. break;
  892. ul0 = (*(pjSrc) >> 3)
  893. | (*(pjSrc + 2) << 7)
  894. | (*(pjSrc + 4) << 18);
  895. ul1 = (*(pjSrc + 1) << 2)
  896. | (*(pjSrc + 3) << 13)
  897. | (*(pjSrc + 5) << 23);
  898. *((ULONG*) pjDst) = (ul0 & 0x03e07c1f)
  899. | (ul1 & 0x7c1f03e0);
  900. pjDst += 4;
  901. pjSrc += 6;
  902. }
  903. #endif
  904. if (i & 1)
  905. {
  906. ul = ((*(pjSrc) >> 3))
  907. | ((*(pjSrc + 1) << 2) & 0x03e0)
  908. | ((*(pjSrc + 2) << 7) & 0x7c00);
  909. *((USHORT*) pjDst) = (USHORT) ul;
  910. pjDst += 2;
  911. pjSrc += 3;
  912. }
  913. if (--cy == 0)
  914. break;
  915. pjSrc += lSrcSkip;
  916. pjDst += lDstSkip;
  917. }
  918. VERIFYS24D16(psb);
  919. return;
  920. }
  921. }
  922. // Finally, fall back to the generic case:
  923. pfnXlate = pxlo->pfnXlateBetweenBitfields();
  924. while (1)
  925. {
  926. i = cx;
  927. do {
  928. ul = ((ULONG) *(pjSrc))
  929. | ((ULONG) *(pjSrc + 1) << 8)
  930. | ((ULONG) *(pjSrc + 2) << 16);
  931. *((USHORT*) pjDst) = (USHORT) pfnXlate(pxlo, ul);
  932. pjDst += 2;
  933. pjSrc += 3;
  934. } while (--i != 0);
  935. if (--cy == 0)
  936. break;
  937. pjSrc += lSrcSkip;
  938. pjDst += lDstSkip;
  939. }
  940. VERIFYS24D16(psb);
  941. }
  942. /******************************Public*Routine******************************\
  943. * vSrcCopyS32D16
  944. *
  945. *
  946. * History:
  947. * 07-Feb-1991 -by- Patrick Haluptzok patrickh
  948. * Wrote it.
  949. \**************************************************************************/
  950. VOID vSrcCopyS32D16(PBLTINFO psb)
  951. {
  952. // We assume we are doing left to right top to bottom blting.
  953. ASSERTGDI(psb->xDir == 1, "vSrcCopyS32D16 - direction not left to right");
  954. ASSERTGDI(psb->yDir == 1, "vSrcCopyS32D16 - direction not up to down");
  955. // These are our holding variables
  956. PBYTE pjSrc = psb->pjSrc + (4 * psb->xSrcStart);
  957. PBYTE pjDst = psb->pjDst + (2 * psb->xDstStart);
  958. ULONG cx = psb->cx;
  959. ULONG cy = psb->cy;
  960. LONG lSrcSkip = psb->lDeltaSrc - (cx * 4);
  961. LONG lDstSkip = psb->lDeltaDst - (cx * 2);
  962. XLATE *pxlo = psb->pxlo;
  963. XEPALOBJ palSrc(pxlo->ppalSrc);
  964. XEPALOBJ palDst(pxlo->ppalDst);
  965. PFN_pfnXlate pfnXlate;
  966. ULONG ul;
  967. ULONG ul0;
  968. ULONG ul1;
  969. LONG i;
  970. ASSERTGDI(cy != 0, "ERROR: Src Move cy == 0");
  971. if (palSrc.bIsBGR())
  972. {
  973. // First, try to optimize BGR to 5-6-5:
  974. if (palDst.bIs565())
  975. {
  976. while (1)
  977. {
  978. i = cx;
  979. if (((ULONG_PTR) pjDst) & 2)
  980. {
  981. ul = ((*(pjSrc) >> 3))
  982. | ((*(pjSrc + 1) << 3) & 0x07e0)
  983. | ((*(pjSrc + 2) << 8) & 0xf800);
  984. *((USHORT*) pjDst) = (USHORT) ul;
  985. pjDst += 2;
  986. pjSrc += 4;
  987. i--;
  988. }
  989. #if defined(_X86_)
  990. _asm {
  991. mov esi, pjSrc
  992. mov edi, pjDst
  993. sub i, 2
  994. js Done_565_Loop
  995. Middle_565_Loop:
  996. movzx eax, byte ptr [esi]
  997. movzx ebx, byte ptr [esi+1]
  998. shr eax, 3
  999. shl ebx, 3
  1000. movzx edx, byte ptr [esi+2]
  1001. movzx ecx, byte ptr [esi+4]
  1002. shl edx, 8
  1003. shl ecx, 13
  1004. or eax, edx
  1005. or ebx, ecx
  1006. movzx edx, byte ptr [esi+5]
  1007. movzx ecx, byte ptr [esi+6]
  1008. shl edx, 19
  1009. shl ecx, 24
  1010. or eax, edx
  1011. or ebx, ecx
  1012. and eax, 0x07e0f81f
  1013. and ebx, 0xf81f07e0
  1014. or eax, ebx
  1015. add esi, 8
  1016. mov [edi], eax
  1017. add edi, 4
  1018. sub i, 2
  1019. jns Middle_565_Loop
  1020. Done_565_Loop:
  1021. mov pjSrc, esi
  1022. mov pjDst, edi
  1023. }
  1024. #else
  1025. while (1)
  1026. {
  1027. i -= 2;
  1028. if (i < 0)
  1029. break;
  1030. ul0 = (*(pjSrc) >> 3)
  1031. | (*(pjSrc + 2) << 8)
  1032. | (*(pjSrc + 5) << 19);
  1033. ul1 = (*(pjSrc + 1) << 3)
  1034. | (*(pjSrc + 4) << 13)
  1035. | (*(pjSrc + 6) << 24);
  1036. *((ULONG*) pjDst) = (ul0 & 0x07e0f81f)
  1037. | (ul1 & 0xf81f07e0);
  1038. pjDst += 4;
  1039. pjSrc += 8;
  1040. }
  1041. #endif
  1042. if (i & 1)
  1043. {
  1044. ul = ((*(pjSrc) >> 3))
  1045. | ((*(pjSrc + 1) << 3) & 0x07e0)
  1046. | ((*(pjSrc + 2) << 8) & 0xf800);
  1047. *((USHORT*) pjDst) = (USHORT) ul;
  1048. pjDst += 2;
  1049. pjSrc += 4;
  1050. }
  1051. if (--cy == 0)
  1052. break;
  1053. pjSrc += lSrcSkip;
  1054. pjDst += lDstSkip;
  1055. }
  1056. VERIFYS32D16(psb);
  1057. return;
  1058. }
  1059. // Next, try to optimize BGR to 5-5-5:
  1060. if (palDst.bIs555())
  1061. {
  1062. while (1)
  1063. {
  1064. i = cx;
  1065. if (((ULONG_PTR) pjDst) & 2)
  1066. {
  1067. ul = ((*(pjSrc) >> 3))
  1068. | ((*(pjSrc + 1) << 2) & 0x03e0)
  1069. | ((*(pjSrc + 2) << 7) & 0x7c00);
  1070. *((USHORT*) pjDst) = (USHORT) ul;
  1071. pjDst += 2;
  1072. pjSrc += 4;
  1073. i--;
  1074. }
  1075. #if defined(_X86_)
  1076. _asm {
  1077. mov esi, pjSrc
  1078. mov edi, pjDst
  1079. sub i, 2
  1080. js Done_555_Loop
  1081. Middle_555_Loop:
  1082. movzx eax, byte ptr [esi]
  1083. movzx ebx, byte ptr [esi+1]
  1084. shr eax, 3
  1085. shl ebx, 2
  1086. movzx edx, byte ptr [esi+2]
  1087. movzx ecx, byte ptr [esi+4]
  1088. shl edx, 7
  1089. shl ecx, 13
  1090. or eax, edx
  1091. or ebx, ecx
  1092. movzx edx, byte ptr [esi+5]
  1093. movzx ecx, byte ptr [esi+6]
  1094. shl edx, 18
  1095. shl ecx, 23
  1096. or eax, edx
  1097. or ebx, ecx
  1098. and eax, 0x03e07c1f
  1099. and ebx, 0x7c1f03e0
  1100. or eax, ebx
  1101. add esi, 8
  1102. mov [edi], eax
  1103. add edi, 4
  1104. sub i, 2
  1105. jns Middle_555_Loop
  1106. Done_555_Loop:
  1107. mov pjSrc, esi
  1108. mov pjDst, edi
  1109. }
  1110. #else
  1111. while (1)
  1112. {
  1113. i -= 2;
  1114. if (i < 0)
  1115. break;
  1116. ul0 = (*(pjSrc) >> 3)
  1117. | (*(pjSrc + 2) << 7)
  1118. | (*(pjSrc + 5) << 18);
  1119. ul1 = (*(pjSrc + 1) << 2)
  1120. | (*(pjSrc + 4) << 13)
  1121. | (*(pjSrc + 6) << 23);
  1122. *((ULONG*) pjDst) = (ul0 & 0x03e07c1f)
  1123. | (ul1 & 0x7c1f03e0);
  1124. pjDst += 4;
  1125. pjSrc += 8;
  1126. }
  1127. #endif
  1128. if (i & 1)
  1129. {
  1130. ul = ((*(pjSrc) >> 3))
  1131. | ((*(pjSrc + 1) << 2) & 0x03e0)
  1132. | ((*(pjSrc + 2) << 7) & 0x7c00);
  1133. *((USHORT*) pjDst) = (USHORT) ul;
  1134. pjDst += 2;
  1135. pjSrc += 4;
  1136. }
  1137. if (--cy == 0)
  1138. break;
  1139. pjSrc += lSrcSkip;
  1140. pjDst += lDstSkip;
  1141. }
  1142. VERIFYS32D16(psb);
  1143. return;
  1144. }
  1145. }
  1146. // Finally, fall back to the generic case:
  1147. pfnXlate = pxlo->pfnXlateBetweenBitfields();
  1148. while (1)
  1149. {
  1150. i = cx;
  1151. do {
  1152. *((USHORT*) pjDst) = (USHORT) pfnXlate(pxlo, *((ULONG*) pjSrc));
  1153. pjDst += 2;
  1154. pjSrc += 4;
  1155. } while (--i != 0);
  1156. if (--cy == 0)
  1157. break;
  1158. pjSrc += lSrcSkip;
  1159. pjDst += lDstSkip;
  1160. }
  1161. VERIFYS32D16(psb);
  1162. }