Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

445 lines
14 KiB

  1. dnl----------------------------------------------------------------------------
  2. dnl
  3. dnl x86 assembly code generating macros for attribute handlers.
  4. dnl
  5. dnl Copyright (C) Microsoft Corporation, 1997.
  6. dnl
  7. dnl----------------------------------------------------------------------------
  8. dnl
  9. dnl d_AddAttrsCode
  10. dnl
  11. dnl Macro to generate fld/fadd/fstp for each argument with
  12. dnl pipelining across all arguments.
  13. dnl Achieves complete pipelining with four arguments, so
  14. dnl care should be taken to batch up at least four things.
  15. dnl A max of seven things should be added to avoid FP stack overflow.
  16. dnl
  17. define(`d_AddAttrsCodeLoop',
  18. ` fld DWORD PTR [ecx+d_Nth1($1, d_shift(d_shift($@)))]
  19. fadd DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))]
  20. ifelse(eval($2 > 1), `1',
  21. `d_AddAttrsCodeLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
  22. ifelse(eval($1 > $2), `1',
  23. ` fxch st(eval($1 - $2))
  24. ')dnl
  25. fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
  26. ')dnl
  27. define(`d_AddAttrsCode', `d_AddAttrsCodeLoop(`1', $#, $@)')dnl
  28. define(`d_AddUVowCode',
  29. ` ; Just in case ebx is used
  30. push ebx
  31. mov ebx, pStpCtx
  32. mov ebx, [ebx + SCTX_pCtx]
  33. xor eax, eax
  34. Loop$1$2:
  35. cmp eax, DWORD PTR[ebx + RCTX_cActTex]
  36. je Done$1$2
  37. fld DWORD PTR [ecx + ATTRSET_fUoW + eax * 4]
  38. fadd DWORD PTR [edx+ATTRSET_fUoW + eax * 4]
  39. fld DWORD PTR [ecx + ATTRSET_fVoW + eax * 4]
  40. fadd DWORD PTR [edx+ATTRSET_fVoW + eax * 4]
  41. fstp DWORD PTR [ecx+ATTRSET_fVoW + eax * 4]
  42. fstp DWORD PTR [ecx+ATTRSET_fUoW + eax * 4]
  43. inc eax
  44. jmp Loop$1$2
  45. Done$1$2:
  46. pop ebx
  47. ')dnl
  48. dnl
  49. dnl d_AddScaledAttrsCode
  50. dnl
  51. dnl Macro to generate fld/fmul/fadd/fstp for each argument with
  52. dnl pipelining across all arguments.
  53. dnl Achieves complete pipelining with four arguments, so
  54. dnl care should be taken to batch up at least four things.
  55. dnl A max of seven things should be added to avoid FP stack overflow.
  56. dnl
  57. define(`d_AddScaledAttrsLoadLoop',
  58. ` fld DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))]
  59. fmul fScaleVal
  60. ifelse(eval($2 > 1), `1',
  61. `d_AddScaledAttrsLoadLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
  62. ifelse(eval($1 > 1), `1',
  63. ` fxch st(decr($1))
  64. ',
  65. ` fxch st(decr($2))
  66. ')dnl
  67. fld DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
  68. faddp st(1), st(0)
  69. ')dnl
  70. define(`d_AddScaledAttrsStoreLoop',
  71. `ifelse(eval($2 > 1), `1',
  72. `d_AddScaledAttrsStoreLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
  73. ifelse(eval($1 > $2), `1',
  74. ` fxch st(eval($1 - $2))
  75. ')dnl
  76. ifelse($1, `1',
  77. ` fxch st(1)
  78. ')dnl
  79. fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
  80. ')dnl
  81. define(`d_AddScaledAttrsCode',
  82. `d_AddScaledAttrsLoadLoop(`1', $#, $@)dnl
  83. d_AddScaledAttrsStoreLoop(`0', $#, $@)')dnl
  84. define(`d_AddScaledUVowCode',
  85. ` ; Just in case ebx is used
  86. push ebx
  87. mov ebx, pStpCtx
  88. mov ebx, [ebx + SCTX_pCtx]
  89. xor eax, eax
  90. LoopScaled$1$2:
  91. cmp eax, DWORD PTR[ebx + RCTX_cActTex]
  92. je DoneScaled$1$2
  93. fld DWORD PTR [edx+ATTRSET_fUoW + eax * 4]
  94. fmul fScaleVal ; fU*fScale
  95. fld DWORD PTR [edx+ATTRSET_fVoW + eax * 4]
  96. fmul fScaleVal ; fV*fScale fU*fScale
  97. fld DWORD PTR [ecx+ATTRSET_fVoW + eax * 4] ; fVc fV*fScale fU*fScale
  98. faddp st(1), st(0) ; fVc+fV*fScale fU*fScale
  99. fxch st(1) ; fU*fScale fVc+fV*fScale
  100. fld DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] ; fUc fU*fScale fVc+fV*fScale
  101. faddp st(1), st(0) ; fUc+fU*fScale fVc+fV*fScale
  102. fstp DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] ; fVc+fV*fScale
  103. fstp DWORD PTR [ecx+ATTRSET_fVoW + eax * 4]
  104. inc eax
  105. jmp LoopScaled$1$2
  106. DoneScaled$1$2:
  107. pop ebx
  108. ')dnl
  109. dnl
  110. dnl d_AddFloatAttrsBody
  111. dnl
  112. dnl Generates the body of an FP attribute adder routine.
  113. dnl Attributes are processed in cache order as much as possible.
  114. dnl
  115. dnl $1 is one of Z_Diff, Z_Diff_Spec, Z_Diff_Tex, Z_Diff_Spec_Tex,
  116. dnl Z_DIdx, Z_DIdx_Tex, Z_Tex.
  117. dnl
  118. define(`d_AddFloatAttrsBody',
  119. `
  120. ; Add surface pointers.
  121. mov eax, [edx+ATTRSET_ipSurface]
  122. add eax, [ecx+ATTRSET_pSurface]
  123. mov [ecx+ATTRSET_pSurface], eax
  124. mov eax, [edx+ATTRSET_ipZ]
  125. add eax, [ecx+ATTRSET_pZ]
  126. mov [ecx+ATTRSET_pZ], eax
  127. ; Do FP additions.
  128. ifelse(`$1', `Z_Diff',
  129. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB',
  130. `ATTRSET_fG', `ATTRSET_fR',
  131. `ATTRSET_fA')dnl
  132. ')dnl
  133. ifelse(`$1', `Z_Diff_Spec',
  134. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB',
  135. `ATTRSET_fG', `ATTRSET_fR')dnl
  136. d_AddAttrsCode(`ATTRSET_fA', `ATTRSET_fBS',
  137. `ATTRSET_fGS', `ATTRSET_fRS')dnl
  138. ')dnl
  139. ifelse(`$1', `Z_Diff_Tex',
  140. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  141. `ATTRSET_fB', `ATTRSET_fG',
  142. `ATTRSET_fR', `ATTRSET_fA')dnl
  143. d_AddUVowCode($1, `Float')
  144. ')dnl
  145. ifelse(`$1', `Z_Diff_Spec_Tex',
  146. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  147. `ATTRSET_fB')dnl
  148. d_AddUVowCode($1, `Float')
  149. d_AddAttrsCode(`ATTRSET_fG', `ATTRSET_fR',
  150. `ATTRSET_fA', `ATTRSET_fBS',
  151. `ATTRSET_fGS', `ATTRSET_fRS')dnl
  152. ')dnl
  153. ifelse(`$1', `Z_DIdx',
  154. `d_AddAttrsCode(`ATTRSET_fZ',
  155. `ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl
  156. ')dnl
  157. ifelse(`$1', `Z_DIdx_Tex',
  158. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  159. `ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl
  160. d_AddUVowCode($1, `Float')
  161. ')dnl
  162. ifelse(`$1', `Z_Tex',
  163. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW')dnl
  164. d_AddUVowCode($1, `Float')
  165. ')dnl
  166. ')dnl
  167. dnl
  168. dnl d_AddFixedAttrsBody
  169. dnl
  170. dnl Generates the body of a fixed attribute adder routine.
  171. dnl Attributes are processed in cache order as much as possible.
  172. dnl
  173. dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix.
  174. dnl
  175. define(`d_AddFixedAttrsBody',
  176. `
  177. ; Add surface pointers.
  178. mov eax, [edx+ATTRSET_ipSurface]
  179. add eax, [ecx+ATTRSET_pSurface]
  180. mov [ecx+ATTRSET_pSurface], eax
  181. mov eax, [edx+ATTRSET_ipZ]
  182. add eax, [ecx+ATTRSET_pZ]
  183. mov [ecx+ATTRSET_pZ], eax
  184. ; Add attributes.
  185. ifelse(eval(d_index(`$1', `Z') >= 0), `1',
  186. `
  187. mov eax, [edx+ATTRSET_iZ]
  188. add eax, [ecx+ATTRSET_iZ]
  189. mov [ecx+ATTRSET_iZ], eax
  190. ')dnl
  191. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  192. `
  193. mov eax, [edx+ATTRSET_iOoW]
  194. add eax, [ecx+ATTRSET_iOoW]
  195. mov [ecx+ATTRSET_iOoW], eax
  196. ; Just in case ebx and edi are used
  197. push ebx
  198. push edi
  199. mov ebx, pStpCtx
  200. mov ebx, [ebx + SCTX_pCtx]
  201. xor edi, edi
  202. LoopFixed$1:
  203. cmp edi, DWORD PTR[ebx + RCTX_cActTex]
  204. je DoneFixed$1
  205. mov eax, [edx+ATTRSET_iUoW + 4 * edi]
  206. add eax, [ecx+ATTRSET_iUoW + 4 * edi]
  207. mov [ecx+ATTRSET_iUoW + 4 * edi], eax
  208. mov eax, [edx+ATTRSET_iVoW + 4 * edi]
  209. add eax, [ecx+ATTRSET_iVoW + 4 * edi]
  210. mov [ecx+ATTRSET_iVoW + 4 * edi], eax
  211. inc edi
  212. jmp LoopFixed$1
  213. DoneFixed$1:
  214. pop edi
  215. pop ebx
  216. ')dnl
  217. ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
  218. `
  219. mov eax, [edx+ATTRSET_iB]
  220. add eax, [ecx+ATTRSET_iB]
  221. mov [ecx+ATTRSET_iB], eax
  222. mov eax, [edx+ATTRSET_iG]
  223. add eax, [ecx+ATTRSET_iG]
  224. mov [ecx+ATTRSET_iG], eax
  225. mov eax, [edx+ATTRSET_iR]
  226. add eax, [ecx+ATTRSET_iR]
  227. mov [ecx+ATTRSET_iR], eax
  228. mov eax, [edx+ATTRSET_iA]
  229. add eax, [ecx+ATTRSET_iA]
  230. mov [ecx+ATTRSET_iA], eax
  231. ')dnl
  232. ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
  233. `
  234. mov eax, [edx+ATTRSET_iBS]
  235. add eax, [ecx+ATTRSET_iBS]
  236. mov [ecx+ATTRSET_iBS], eax
  237. mov eax, [edx+ATTRSET_iGS]
  238. add eax, [ecx+ATTRSET_iGS]
  239. mov [ecx+ATTRSET_iGS], eax
  240. mov eax, [edx+ATTRSET_iRS]
  241. add eax, [ecx+ATTRSET_iRS]
  242. mov [ecx+ATTRSET_iRS], eax
  243. ')dnl
  244. ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
  245. `
  246. mov eax, [edx+ATTRSET_iDIdx]
  247. add eax, [ecx+ATTRSET_iDIdx]
  248. mov [ecx+ATTRSET_iDIdx], eax
  249. mov eax, [edx+ATTRSET_iDIdxA]
  250. add eax, [ecx+ATTRSET_iDIdxA]
  251. mov [ecx+ATTRSET_iDIdxA], eax
  252. ')dnl
  253. ')dnl
  254. dnl
  255. dnl d_FillSpanFloatAttrsBody
  256. dnl
  257. dnl Generates the body of a FP span filler routine.
  258. dnl Suboptimal cache ordering due to attempt to overlap OoW divide with
  259. dnl integer ops.
  260. dnl
  261. dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix.
  262. dnl
  263. define(`d_FillSpanFloatAttrsBody',
  264. `ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  265. `
  266. fld DWORD PTR [ecx+ATTRSET_fOoW]
  267. fist DWORD PTR [edx+RASTSPAN_iOoW]
  268. fdivr DWORD PTR OOW_W_SCALE
  269. ')dnl
  270. ; Set surface pointers.
  271. mov eax, [ecx+ATTRSET_pSurface]
  272. mov [edx+RASTSPAN_pSurface], eax
  273. mov eax, [ecx+ATTRSET_pZ]
  274. mov [edx+RASTSPAN_pZ], eax
  275. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  276. `
  277. ; Clears both iLOD and iDLOD.
  278. xor eax, eax
  279. mov [edx+RASTSPAN_iLOD], eax
  280. fistp DWORD PTR [edx+RASTSPAN_iW]
  281. ; Just in case ebx is used
  282. push ebx
  283. mov ebx, pStpCtx
  284. mov ebx, [ebx + SCTX_pCtx]
  285. LoopFloat$1:
  286. cmp eax, DWORD PTR[ebx + RCTX_cActTex]
  287. je DoneFloat$1
  288. fld DWORD PTR [ecx+ATTRSET_fUoW + 4 * eax]
  289. fistp DWORD PTR [edx+RASTSPAN_UVoW + 8 * eax]
  290. fld DWORD PTR [ecx+ATTRSET_fVoW + 4 * eax]
  291. fistp DWORD PTR [edx+RASTSPAN_UVoW + 8 * eax + 4]
  292. inc eax
  293. jmp LoopFloat$1
  294. DoneFloat$1:
  295. pop ebx
  296. ')dnl
  297. ifelse(eval(d_index(`$1', `Z') >= 0), `1',
  298. `
  299. fld DWORD PTR [ecx+ATTRSET_fZ]
  300. fistp DWORD PTR [edx+RASTSPAN_uZ]
  301. ')dnl
  302. ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
  303. `
  304. ; Directly store DWORD-aligned fields, then whack in
  305. ; half DWORDs.
  306. ; ATTENTION - 8.8 color fields could use the FP fixing trick
  307. ; to use fstp instead of fistp. Adds could be overlapped
  308. ; so itd be free cycles?
  309. fld DWORD PTR [ecx+ATTRSET_fG]
  310. fistp DWORD PTR iVal
  311. fld DWORD PTR [ecx+ATTRSET_fB]
  312. fistp DWORD PTR [edx+RASTSPAN_uB]
  313. mov ax, WORD PTR iVal
  314. fld DWORD PTR [ecx+ATTRSET_fA]
  315. fistp DWORD PTR iVal
  316. mov WORD PTR [edx+RASTSPAN_uG], ax
  317. fld DWORD PTR [ecx+ATTRSET_fR]
  318. fistp DWORD PTR [edx+RASTSPAN_uR]
  319. mov ax, WORD PTR iVal
  320. mov WORD PTR [edx+RASTSPAN_uA], ax
  321. ')dnl
  322. ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
  323. `
  324. fld DWORD PTR [ecx+ATTRSET_fGS]
  325. fistp DWORD PTR iVal
  326. fld DWORD PTR [ecx+ATTRSET_fBS]
  327. fistp DWORD PTR [edx+RASTSPAN_uBS]
  328. mov ax, WORD PTR iVal
  329. mov WORD PTR [edx+RASTSPAN_uGS], ax
  330. ; Trashes uFog, but thats OK because fog isnt getting used.
  331. fld DWORD PTR [ecx+ATTRSET_fRS]
  332. fistp DWORD PTR [edx+RASTSPAN_uRS]
  333. ')dnl
  334. ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
  335. `
  336. fld DWORD PTR [ecx+ATTRSET_fDIdx]
  337. fistp DWORD PTR [edx+RASTSPAN_iIdx]
  338. fld DWORD PTR [ecx+ATTRSET_fDIdxA]
  339. fistp DWORD PTR [edx+RASTSPAN_iIdxA]
  340. ')dnl
  341. ')dnl
  342. dnl
  343. dnl d_FillSpanFixedAttrsBody
  344. dnl
  345. dnl Generates the body of a fixed span filler routine.
  346. dnl Cache ordered except for the overlap of the OoW divide.
  347. dnl
  348. dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix.
  349. dnl
  350. define(`d_FillSpanFixedAttrsBody',
  351. `ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  352. `
  353. fild DWORD PTR [ecx+ATTRSET_iOoW]
  354. fdivr DWORD PTR OOW_W_SCALE
  355. ')dnl
  356. ; Set surface pointers.
  357. mov eax, [ecx+ATTRSET_pSurface]
  358. mov [edx+RASTSPAN_pSurface], eax
  359. mov eax, [ecx+ATTRSET_pZ]
  360. mov [edx+RASTSPAN_pZ], eax
  361. ifelse(eval(d_index(`$1', `Z') >= 0), `1',
  362. `
  363. mov eax, [ecx+ATTRSET_uZ]
  364. mov [edx+RASTSPAN_uZ], eax
  365. ')dnl
  366. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  367. `
  368. ; Clears both iLOD and iDLOD.
  369. xor eax, eax
  370. mov [edx+RASTSPAN_iLOD], eax
  371. mov eax, [ecx+ATTRSET_iOoW]
  372. mov [edx+RASTSPAN_iOoW], eax
  373. ; Just in case ebx and edi are used
  374. push ebx
  375. push edi
  376. mov ebx, pStpCtx
  377. mov ebx, [ebx + SCTX_pCtx]
  378. xor edi, edi
  379. LoopFixed$1:
  380. cmp edi, DWORD PTR[ebx + RCTX_cActTex]
  381. je DoneFixed$1
  382. mov eax, [ecx+ATTRSET_iUoW + 4 * edi]
  383. mov [edx+RASTSPAN_UVoW + 8 * edi], eax
  384. mov eax, [ecx+ATTRSET_iVoW + 4 * edi]
  385. mov [edx+RASTSPAN_UVoW + 8 * edi + 4], eax
  386. inc edi
  387. jmp LoopFixed$1
  388. DoneFixed$1:
  389. pop edi
  390. pop ebx
  391. ')dnl
  392. ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
  393. `
  394. ; Directly store DWORD-aligned fields, then whack in
  395. ; half DWORDs.
  396. ; ATTENTION - Keep word pairs shifted and OR together to store
  397. ; as DWORDs instead?
  398. mov eax, [ecx+ATTRSET_uB]
  399. mov [edx+RASTSPAN_uB], eax
  400. mov eax, [ecx+ATTRSET_uR]
  401. mov [edx+RASTSPAN_uR], eax
  402. mov ax, [ecx+ATTRSET_uG]
  403. mov [edx+RASTSPAN_uG], ax
  404. mov ax, [ecx+ATTRSET_uA]
  405. mov [edx+RASTSPAN_uA], ax
  406. ')dnl
  407. ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
  408. `
  409. mov eax, [ecx+ATTRSET_uBS]
  410. mov [edx+RASTSPAN_uBS], eax
  411. mov eax, [ecx+ATTRSET_uRS]
  412. mov [edx+RASTSPAN_uRS], eax
  413. mov ax, [ecx+ATTRSET_uGS]
  414. mov [edx+RASTSPAN_uGS], ax
  415. ')dnl
  416. ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
  417. `
  418. mov eax, [ecx+ATTRSET_uDIdx]
  419. mov [edx+RASTSPAN_iIdx], eax
  420. mov eax, [ecx+ATTRSET_uDIdxA]
  421. mov [edx+RASTSPAN_iIdxA], eax
  422. ')dnl
  423. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  424. `
  425. fistp DWORD PTR [edx+RASTSPAN_iW]
  426. ')dnl
  427. ')dnl