Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

708 lines
23 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; Monolith 8. Gouraud No Z buffer 565
  4. ;
  5. ; Globals(ATTENTION Darn multiprocessing.)
  6. ;
  7. ; StackPos - saves stack position
  8. ; uSpans - number of spans
  9. ;
  10. ;
  11. ; This monolith tries to processes 4 colors at once.
  12. ; This would allow writing of qwords. Since it writes
  13. ; qwords, then it is most benificial if they are aligned.
  14. ; The beginingpixels loop writes pixels until there screen
  15. ; memory is aligned. Then if there are four pixels, then the
  16. ; deltacolor values are check to make sure that they dont overflow
  17. ; when they get multiplied by four. Next, color,
  18. ; color+deltacolor, color+2*deltacolor, and color+3*deltacolor
  19. ; are calculated (Generate four starting pixels). To make color
  20. ; conversion easier, the red green and blue are all seperated
  21. ; into thier own registers (Seperate colors). Deltas for each component
  22. ; are also seperated (Seperate delta colors). Since all of the
  23. ; components are seperated, three additions will be needed to
  24. ; update the color.
  25. ;
  26. ; This allows 565 pixels to be calculated just like it would
  27. ; be done in C except it will generate 4 at a time.
  28. ;
  29. ; int16 red, green blue
  30. ; color = (red&f800) | ((green&07e0) >> 5) | (blue>>11);
  31. ;
  32. ; So in 16 instructions, four pixels are written to the screen,
  33. ; the color is updated, four pixels are converted from internal
  34. ; to 565 for next pass and dest and count are updated.
  35. ;
  36. ; Register Usage for FourPixelLoop
  37. ; edi - Dest screen pointer
  38. ; mm3 - result of four consecutive color converted 565 colors
  39. ; mm4 - four consecutive red values
  40. ; mm5 - four consecutive green values
  41. ; mm6 - four consecutive blue values
  42. ; mm0 - four delta red values (4 times actual delta red)
  43. ; mm1 - four delta green values (4 times actual delta green)
  44. ; mm2 - four delta blue values (4 times actual delta blue)
  45. ; mm7 - temp used to convert to 565
  46. ;
  47. ; This is the FourPixelLoop
  48. ;
  49. ; movq MMWORD PTR [edi], mm3 ; Write four 565 pixels at once.
  50. ; sub edi, 8
  51. ;
  52. ; paddw mm5, mm1 ; 4 greens plus 4 delta greens
  53. ; paddw mm4, mm0 ; 4 reds plus 4 delta reds
  54. ;
  55. ; movq mm3, mm5 ; Copy green
  56. ; paddw mm6, mm2 ; 4 blues plus 4 delta blues
  57. ;
  58. ; psrlw mm3, 5 ; Shift green to correct location
  59. ; movq mm7, mm4 ; Copy Red
  60. ;
  61. ; pand mm3, MMWORD PTR Val0x07E007E007E007E0 ; Mask off green 6 bits.
  62. ; pand mm7, MMWORD PTR Val0xf800f800f800f800 ; Mask off reds 5 upper bits
  63. ;
  64. ; por mm7, mm3 ; Combine red and green
  65. ; movq mm3, mm6 ; Copy Blue
  66. ;
  67. ; psrlw mm3, 11 ; Move blues 5 upper bits to the bottom.
  68. ; por mm3, mm7
  69. ;
  70. ; dec edx
  71. ; jnz FourPixelLoopRtoL
  72. ;
  73. ; If there are any pixels to write after the four pixel loop,
  74. ; they have already been calculated and eax will determine
  75. ; what needs to be written by its two lower bits.
  76. ;
  77. ; WriteIndividualPixels is called when there are less than
  78. ; four pixels to write after alignment or the deltacolor would
  79. ; have had an overflow.
  80. ;
  81. ;-----------------------------------------------------------------------------
  82. INCLUDE iammx.inc
  83. INCLUDE offs_acp.inc
  84. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  85. ; at the LSB, then six bits of green, then five bits of red.
  86. ;TBD check to see if this value is correct.
  87. COLOR_SHIFT equ 8
  88. .586
  89. .model flat
  90. ; Big seperating lines seperate code into span code
  91. ; and loop code. If span and loop are not going to
  92. ; end up being combined then it will be easy to
  93. ; seperate the code.
  94. .data
  95. Val0xe000e000e000 dq 00000e000e000e000h
  96. Val0xf800f800f800f800 dq 0f800f800f800f800h
  97. Val0x07e007e007e007e0 dq 007e007e007e007e0h
  98. ; Need externs for all of the variables that are needed for various beads
  99. EXTERN MaskRed565to888:MMWORD
  100. EXTERN MaskGreen565to888:MMWORD
  101. EXTERN MaskBlue565to888:MMWORD
  102. EXTERN MaskRed555to888:MMWORD
  103. EXTERN MaskGreen555to888:MMWORD
  104. EXTERN MaskBlue555to888:MMWORD
  105. EXTERN MaskAlpha1555to8888:MMWORD
  106. EXTERN MaskRed1555to8888:MMWORD
  107. EXTERN MaskGreen1555to8888:MMWORD
  108. EXTERN MaskBlue1555to8888:MMWORD
  109. ; TBD. I think that I want to do 0xffff instead of 0xff. This will
  110. ; have to be checked. There is a value very similiar to this in
  111. ; buf write.
  112. EXTERN SetAlphato0xffff:MMWORD
  113. EXTERN SetAlphato0xff:MMWORD
  114. ; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
  115. RedShift565to888 equ 8
  116. GreenShift565to888 equ 5
  117. BlueShift565to888 equ 3
  118. RedShift555to888 equ 9
  119. GreenShift555to888 equ 6
  120. BlueShift555to888 equ 3
  121. AlphaShift1555to8888 equ 16
  122. RedShift1555to8888 equ 9
  123. GreenShift1555to8888 equ 6
  124. BlueShift1555to8888 equ 3
  125. EXTERN Zero:MMWORD
  126. EXTERN SetAlphato0xff:MMWORD
  127. EXTERN u888to565RedBlueMask:MMWORD
  128. EXTERN u888to565GreenMask:MMWORD
  129. EXTERN u888to565Multiplier:MMWORD
  130. EXTERN uVal0x000007ff03ff07ff:MMWORD
  131. EXTERN uVal0x0000078003c00780:MMWORD
  132. EXTERN u888to555RedBlueMask:MMWORD
  133. EXTERN u888to555GreenMask:MMWORD
  134. EXTERN u888to555Multiplier:MMWORD
  135. EXTERN uVal0x000007ff07ff07ff:MMWORD
  136. EXTERN uVal0x0000078007800780:MMWORD
  137. ;-----------------------------------------------------------------------------
  138. ; Span Variables
  139. StackPos dd ?
  140. uSpans dd ?
  141. ;-----------------------------------------------------------------------------
  142. ;-----------------------------------------------------------------------------
  143. ; Loop Variables
  144. ;-----------------------------------------------------------------------------
  145. .code
  146. PUBLIC _MMXMLRast_8
  147. _MMXMLRast_8:
  148. push ebp
  149. mov StackPos, esp
  150. mov eax, esp
  151. sub esp, 0Ch ; This will need to change if stack frame size changes.
  152. push ebx
  153. push esi
  154. push edi
  155. xor edi, edi
  156. ; Put pCtx into ebx
  157. mov ebx, [eax+8]
  158. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  159. mov ecx, [ebx+RASTCTX_pPrim]
  160. ;while (pP)
  161. ;{
  162. PrimLoop:
  163. cmp ecx, 0
  164. je ExitPrimLoop
  165. ;UINT16 uSpans = pP->uSpans;
  166. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  167. mov uSpans, eax
  168. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  169. mov ebp, ecx
  170. add ebp, SIZEOF_RASTPRIM
  171. ;while (uSpans-- > 0)
  172. ;{
  173. SpanLoop:
  174. mov edx, uSpans
  175. mov eax, edx
  176. dec eax
  177. mov uSpans, eax
  178. test edx, edx
  179. jle ExitSpanLoop
  180. mov edi, dword ptr [ebp+RASTSPAN_pSurface]
  181. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  182. ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
  183. ;{
  184. mov edx, [ecx+RASTPRIM_uFlags]
  185. and edx, D3DI_RASTPRIM_X_DEC
  186. test edx, edx
  187. jz LeftToRightSpan
  188. ; SCREWED UP RIGHT TO LEFT CASE
  189. movq mm0, [ebp+RASTSPAN_uB]
  190. ; This case is unnecessary if 16 bit color is always word aligned.
  191. ; It really should be, but it doesnt hurt to be safe for a 2 instruction
  192. ; penalty.
  193. test edi, 1
  194. jnz WriteIndividualPixelsRtoL
  195. beginingpixelsRtoL:
  196. ; Align color. Alignment is very different when drawing Right to Left.
  197. xor edi, 6 ; Simple trick to make alignment code work same as LtoR
  198. ; Could Possibly be better though.
  199. test edi, 7 ;Test to see if we are 4 pixel aligned.
  200. jz SetupFourPixelLoopRtoL
  201. xor edi, 6 ; Make pointer back to what it used to be
  202. ;WritePixel
  203. movq mm5, mm0
  204. psrlw mm5, 8 ; Convert color1 from 8.8 two 0.8
  205. packuswb mm5, mm5 ; Just makes a copy of itself in high and low dwords.
  206. movq mm3, mm5
  207. pand mm5, MMWORD PTR u888to565RedBlueMask
  208. pmaddwd mm5, MMWORD PTR u888to565Multiplier
  209. pand mm3, MMWORD PTR u888to565GreenMask
  210. por mm5, mm3
  211. psrld mm5, 5
  212. movd edx, mm5
  213. mov [edi], dx
  214. sub edi, 2 ; Increase destination pointer
  215. dec eax ; Reduce Pixel count
  216. jz NoMorePixelsRtoL
  217. ;pS->uB += pP->iDBDX; pS->uG += pP->iDGDX;
  218. ;pS->uR += pP->iDRDX; pS->uA += pP->iDADX;
  219. paddw mm0, [ecx+RASTPRIM_iDBDX]
  220. jmp beginingpixelsRtoL
  221. SetupFourPixelLoopRtoL:
  222. xor edi, 6 ; Make pointer back to what it used to be
  223. ; Only go through trouble of setting up four pixels if we have four pixels.
  224. ; Check to see if there are four pixels left over after aligning pixels.
  225. mov edx, eax
  226. shr edx, 2
  227. jz WriteIndividualPixelsRtoL
  228. xor edi, 6 ; Writing pixels from r to left so address needs to be left most address.
  229. ; !!! THIS EXTRA SETUP CODE PREVENTS A BUG THAT WOULD HAPPEN VERY SELDOMLY !!!
  230. ; Need to check for potential delta overflow.
  231. ; For example, if there are 5 pixels that change from 255 to 0
  232. ; then the delta would be -255/(5-1) = -63. Since we are doing at least 4 pixels,
  233. ; then we have delta*4 = -63*4 = -252 which doesnt fit in 8 signed bits.
  234. movq mm1, [ecx+RASTPRIM_iDBDX]
  235. movq mm2, mm1
  236. psraw mm1, 15 ; Make sign bit mask for a conditional negate. (Also called Absolute value last I checked.)
  237. pxor mm2, mm1
  238. psubw mm2, mm1 ; value should be between 0-128 in the upper byte of the words. (0-32768 for the word)
  239. pand mm2, MMWORD PTR Val0xe000e000e000 ; Check to see if any of the high three bits are set
  240. packuswb mm2, mm2 ; This will saturate if the high three bits are set.
  241. movd esi, mm2
  242. test esi, esi
  243. jnz WriteIndividualPixelsRtoL
  244. ; -----------------------------------
  245. ; Generate four starting color pixels
  246. ; -----------------------------------
  247. ; Put color + 0*delta in mm0. Changed it so that it started in mm0
  248. movq mm1, mm0
  249. paddw mm1, [ecx+RASTPRIM_iDBDX] ; Put color + 1*delta in mm1
  250. movq mm2, mm1
  251. paddw mm2, [ecx+RASTPRIM_iDBDX] ; Put color + 2*delta in mm2
  252. movq mm3, mm2
  253. paddw mm3, [ecx+RASTPRIM_iDBDX] ; Put color + 3*delta in mm3
  254. ; -----------------------------------
  255. ; Seperate colors.
  256. ; -----------------------------------
  257. ; Combine all reds into one mmx reg.
  258. movq mm4, mm3
  259. punpckhwd mm4, mm2
  260. movq mm5, mm1
  261. punpckhwd mm5, mm0
  262. punpckldq mm4, mm5
  263. ; Combine all greens into one mmx reg.
  264. movq mm5, mm3
  265. punpcklwd mm5, mm2
  266. movq mm6, mm1
  267. punpcklwd mm6, mm0
  268. punpckhdq mm5, mm6
  269. ; Combine all blues into one mmx reg.
  270. movq mm6, mm3
  271. punpcklwd mm6, mm2
  272. movq mm7, mm1
  273. punpcklwd mm7, mm0
  274. punpckldq mm6, mm7
  275. ; -----------------------------------
  276. ; Seperate delta colors.
  277. ; -----------------------------------
  278. ; If extra registers are needed, (i.e. Z buffer or dither) then deltas can be saved to memory. Three more regs.
  279. movq mm7, [ecx+RASTPRIM_iDBDX]
  280. psllw mm7, 2 ; Doing 4 pixels at a time so delta must be times 4.
  281. ; Combine all delta reds into one mmx reg.
  282. movq mm0, mm7
  283. punpckhwd mm0, mm7
  284. movq mm1, mm7
  285. punpckhwd mm1, mm7
  286. punpckldq mm0, mm1
  287. ; Combine all delta greens into one mmx reg.
  288. movq mm1, mm7
  289. punpcklwd mm1, mm7
  290. movq mm2, mm7
  291. punpcklwd mm2, mm7
  292. punpckhdq mm1, mm2
  293. ; Combine all delta blues into one mmx reg.
  294. movq mm2, mm7
  295. punpcklwd mm2, mm7
  296. movq mm3, mm7
  297. punpcklwd mm3, mm7
  298. punpckldq mm2, mm3
  299. ; Need to combine for first pixel write.
  300. movq mm3, mm5 ; Copy green
  301. psrlw mm3, 5 ; Shift green to correct location
  302. movq mm7, mm4 ; Copy Red
  303. pand mm3, MMWORD PTR Val0x07E007E007E007E0 ; Mask off green 6 bits.
  304. pand mm7, MMWORD PTR Val0xf800f800f800f800 ; Mask off reds 5 upper bits
  305. por mm7, mm3
  306. movq mm3, mm6
  307. psrlw mm3, 11 ; Move blues 5 upper bits to the bottom.
  308. por mm3, mm7
  309. FourPixelLoopRtoL:
  310. movq MMWORD PTR [edi], mm3 ; Write four 565 pixels at once.
  311. sub edi, 8
  312. paddw mm5, mm1 ; 4 greens plus 4 delta greens
  313. paddw mm4, mm0 ; 4 reds plus 4 delta reds
  314. movq mm3, mm5 ; Copy green
  315. paddw mm6, mm2 ; 4 blues plus 4 delta blues
  316. psrlw mm3, 5 ; Shift green to correct location
  317. movq mm7, mm4 ; Copy Red
  318. pand mm3, MMWORD PTR Val0x07E007E007E007E0 ; Mask off green 6 bits.
  319. pand mm7, MMWORD PTR Val0xf800f800f800f800 ; Mask off reds 5 upper bits
  320. por mm7, mm3 ; Combine red and green
  321. movq mm3, mm6 ; Copy Blue
  322. psrlw mm3, 11 ; Move blues 5 upper bits to the bottom.
  323. por mm3, mm7
  324. dec edx
  325. jnz FourPixelLoopRtoL
  326. LastPixelsRtoL:
  327. ; These can be written from values in mm0
  328. test eax, 2
  329. jz OnePixelLeftRtoL
  330. movq mm1, mm3
  331. psrlq mm1, 32
  332. movd MMWORD PTR [edi+4], mm1
  333. sub edi, 4
  334. psrlq mm0, 32
  335. OnePixelLeftRtoL:
  336. test eax, 1
  337. jz NoMorePixelsRtoL
  338. psrlq mm3, 48
  339. movd edx, mm3
  340. mov word ptr [edi+6], dx
  341. jmp NoMorePixelsRtoL
  342. WriteIndividualPixelsRtoL:
  343. movq mm5, mm0
  344. psrlw mm5, 8 ; Convert color1 from 8.8 two 0.8
  345. packuswb mm5, mm5 ; Just makes a copy of itself in high and low dwords.
  346. movq mm3, mm5
  347. pand mm5, MMWORD PTR u888to565RedBlueMask
  348. pmaddwd mm5, MMWORD PTR u888to565Multiplier
  349. pand mm3, MMWORD PTR u888to565GreenMask
  350. por mm5, mm3
  351. psrld mm5, 5
  352. movd edx, mm5
  353. mov [edi], dx
  354. sub edi, 2
  355. ;pS->uB += pP->iDBDX; pS->uG += pP->iDGDX;
  356. ;pS->uR += pP->iDRDX; pS->uA += pP->iDADX;
  357. paddw mm0, [ecx+RASTPRIM_iDBDX]
  358. dec eax
  359. jnz WriteIndividualPixelsRtoL
  360. NoMorePixelsRtoL:
  361. jmp DoneSpanDirif
  362. ;else
  363. ;{
  364. LeftToRightSpan:
  365. ; NORMAL LEFT TO RIGHT CASE
  366. movq mm0, [ebp+RASTSPAN_uB]
  367. ; This case is unnecessary if 16 bit color is always word aligned.
  368. ; It really should be, but it doesnt hurt to be safe for a 2 instruction
  369. ; penalty.
  370. test edi, 1
  371. jnz WriteIndividualPixelsLtoR
  372. beginingpixelsLtoR:
  373. test edi, 7 ;Test to see if we are 4 pixel aligned.
  374. jz SetupFourPixelLoopLtoR
  375. ;WritePixel
  376. movq mm5, mm0
  377. psrlw mm5, 8 ; Convert color1 from 8.8 two 0.8
  378. packuswb mm5, mm5 ; Just makes a copy of itself in high and low dwords.
  379. movq mm3, mm5
  380. pand mm5, MMWORD PTR u888to565RedBlueMask
  381. pmaddwd mm5, MMWORD PTR u888to565Multiplier
  382. pand mm3, MMWORD PTR u888to565GreenMask
  383. por mm5, mm3
  384. psrld mm5, 5
  385. movd edx, mm5
  386. mov [edi], dx
  387. add edi, 2 ; Increase destination pointer
  388. dec eax ; Reduce Pixel count
  389. jz NoMorePixelsLtoR
  390. ;pS->uB += pP->iDBDX; pS->uG += pP->iDGDX;
  391. ;pS->uR += pP->iDRDX; pS->uA += pP->iDADX;
  392. paddw mm0, [ecx+RASTPRIM_iDBDX]
  393. jmp beginingpixelsLtoR
  394. SetupFourPixelLoopLtoR:
  395. ; Only go through trouble of setting up four pixels if we have four pixels.
  396. mov edx, eax
  397. shr edx, 2
  398. jz WriteIndividualPixelsLtoR
  399. ; !!! THIS EXTRA SETUP CODE PREVENTS A BUG THAT WOULD HAPPEN VERY SELDOMLY !!!
  400. ; Need to check for potential delta overflow.
  401. ; For example, if there are 5 pixels that change from 255 to 0
  402. ; then the delta would be -255/(5-1) = -63. Since we are doing at least 4 pixels,
  403. ; then we have delta*4 = -63*4 = -252 which doesnt fit in 8 signed bits.
  404. movq mm1, [ecx+RASTPRIM_iDBDX]
  405. movq mm2, mm1
  406. psraw mm1, 15 ; Make sign bit mask for a conditional negate. (Also called Absolute value last I checked.)
  407. pxor mm2, mm1
  408. psubw mm2, mm1 ; value should be between 0-128 in the upper byte of the words. (0-32768 for the word)
  409. pand mm2, MMWORD PTR Val0xe000e000e000 ; Check to see if any of the high three bits are set
  410. packuswb mm2, mm2 ; This will saturate if the high three bits are set.
  411. movd esi, mm2
  412. test esi, esi
  413. jnz WriteIndividualPixelsLtoR
  414. ; -----------------------------------
  415. ; Generate four starting color pixels
  416. ; -----------------------------------
  417. ; Put color + 0*delta in mm0. Changed it so that it started in mm0
  418. movq mm1, mm0
  419. paddw mm1, [ecx+RASTPRIM_iDBDX] ; Put color + 1*delta in mm1
  420. movq mm2, mm1
  421. paddw mm2, [ecx+RASTPRIM_iDBDX] ; Put color + 2*delta in mm2
  422. movq mm3, mm2
  423. paddw mm3, [ecx+RASTPRIM_iDBDX] ; Put color + 3*delta in mm3
  424. ; -----------------------------------
  425. ; Seperate colors.
  426. ; -----------------------------------
  427. ; Combine all reds into one mmx reg.
  428. movq mm4, mm0
  429. punpckhwd mm4, mm1
  430. movq mm5, mm2
  431. punpckhwd mm5, mm3
  432. punpckldq mm4, mm5
  433. ; Combine all greens into one mmx reg.
  434. movq mm5, mm0
  435. punpcklwd mm5, mm1
  436. movq mm6, mm2
  437. punpcklwd mm6, mm3
  438. punpckhdq mm5, mm6
  439. ; Combine all blues into one mmx reg.
  440. movq mm6, mm0
  441. punpcklwd mm6, mm1
  442. movq mm7, mm2
  443. punpcklwd mm7, mm3
  444. punpckldq mm6, mm7
  445. ; -----------------------------------
  446. ; Seperate delta colors.
  447. ; -----------------------------------
  448. ; If extra registers are needed, (i.e. Z buffer or dither) then deltas can be saved to memory. Three more regs.
  449. movq mm7, [ecx+RASTPRIM_iDBDX]
  450. psllw mm7, 2 ; Doing 4 pixels at a time so delta must be times 4.
  451. ; Combine all delta reds into one mmx reg.
  452. movq mm0, mm7
  453. punpckhwd mm0, mm7
  454. movq mm1, mm7
  455. punpckhwd mm1, mm7
  456. punpckldq mm0, mm1
  457. ; Combine all delta greens into one mmx reg.
  458. movq mm1, mm7
  459. punpcklwd mm1, mm7
  460. movq mm2, mm7
  461. punpcklwd mm2, mm7
  462. punpckhdq mm1, mm2
  463. ; Combine all delta blues into one mmx reg.
  464. movq mm2, mm7
  465. punpcklwd mm2, mm7
  466. movq mm3, mm7
  467. punpcklwd mm3, mm7
  468. punpckldq mm2, mm3
  469. ; Need to combine for first pixel write.
  470. movq mm3, mm5 ; Copy green
  471. psrlw mm3, 5 ; Shift green to correct location
  472. movq mm7, mm4 ; Copy Red
  473. pand mm3, MMWORD PTR Val0x07E007E007E007E0 ; Mask off green 6 bits.
  474. pand mm7, MMWORD PTR Val0xf800f800f800f800 ; Mask off reds 5 upper bits
  475. por mm7, mm3
  476. movq mm3, mm6
  477. psrlw mm3, 11 ; Move blues 5 upper bits to the bottom.
  478. por mm3, mm7
  479. FourPixelLoopLtoR:
  480. movq MMWORD PTR [edi], mm3 ; Write four 565 pixels at once.
  481. add edi, 8
  482. paddw mm5, mm1 ; 4 greens plus 4 delta greens
  483. paddw mm4, mm0 ; 4 reds plus 4 delta reds
  484. movq mm3, mm5 ; Copy green
  485. paddw mm6, mm2 ; 4 blues plus 4 delta blues
  486. psrlw mm3, 5 ; Shift green to correct location
  487. movq mm7, mm4 ; Copy Red
  488. pand mm3, MMWORD PTR Val0x07E007E007E007E0 ; Mask off green 6 bits.
  489. pand mm7, MMWORD PTR Val0xf800f800f800f800 ; Mask off reds 5 upper bits
  490. por mm7, mm3
  491. movq mm3, mm6
  492. psrlw mm3, 11 ; Move blues 5 upper bits to the bottom.
  493. por mm3, mm7
  494. dec edx
  495. jnz FourPixelLoopLtoR
  496. LastPixelsLtoR:
  497. ; These can be written from values in mm3
  498. test eax, 2
  499. jz OnePixelLeftLtoR
  500. movd MMWORD PTR [edi], mm3
  501. add edi, 4
  502. psrlq mm0, 32
  503. OnePixelLeftLtoR:
  504. test eax, 1
  505. jz NoMorePixelsLtoR
  506. movd edx, mm3
  507. mov word ptr [edi], dx
  508. jmp NoMorePixelsLtoR
  509. WriteIndividualPixelsLtoR:
  510. movq mm5, mm0
  511. psrlw mm5, 8 ; Convert color1 from 8.8 two 0.8
  512. packuswb mm5, mm5 ; Just makes a copy of itself in high and low dwords.
  513. movq mm3, mm5
  514. pand mm5, MMWORD PTR u888to565RedBlueMask
  515. pmaddwd mm5, MMWORD PTR u888to565Multiplier
  516. pand mm3, MMWORD PTR u888to565GreenMask
  517. por mm5, mm3
  518. psrld mm5, 5
  519. movd edx, mm5
  520. mov [edi], dx
  521. add edi, 2
  522. ;pS->uB += pP->iDBDX; pS->uG += pP->iDGDX;
  523. ;pS->uR += pP->iDRDX; pS->uA += pP->iDADX;
  524. paddw mm0, [ecx+RASTPRIM_iDBDX]
  525. dec eax
  526. jnz WriteIndividualPixelsLtoR
  527. NoMorePixelsLtoR:
  528. ;}
  529. DoneSpanDirif:
  530. ; Setup Code Ends
  531. ; ----------------------------------------------------------------------------------------------------------------
  532. ; Loop Code Begins
  533. ExitPixelLoop:
  534. ; Loop code ends
  535. ;-----------------------------------------------------------------------------
  536. ; LoopAny code ends here
  537. ;-----------------------------------------------------------------------------
  538. ;pS++;
  539. add ebp, SIZEOF_RASTSPAN
  540. ;}
  541. jmp SpanLoop
  542. ExitSpanLoop:
  543. ;pP = pP->pNext;
  544. mov ecx, [ecx+RASTPRIM_pNext]
  545. ;}
  546. jmp PrimLoop
  547. ExitPrimLoop:
  548. ;_asm{
  549. emms
  550. ;}
  551. ;return S_OK;
  552. xor eax, eax
  553. ;}
  554. pop edi
  555. pop esi
  556. pop ebx
  557. mov esp, StackPos
  558. pop ebp
  559. ret
  560. END