Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

387 lines
12 KiB

  1. dnl----------------------------------------------------------------------------
  2. dnl
  3. dnl x86 assembly code generating macros for attribute handlers.
  4. dnl
  5. dnl Copyright (C) Microsoft Corporation, 1997.
  6. dnl
  7. dnl----------------------------------------------------------------------------
  8. dnl
  9. dnl d_AddAttrsCode
  10. dnl
  11. dnl Macro to generate fld/fadd/fstp for each argument with
  12. dnl pipelining across all arguments.
  13. dnl Achieves complete pipelining with four arguments, so
  14. dnl care should be taken to batch up at least four things.
  15. dnl A max of seven things should be added to avoid FP stack overflow.
  16. dnl
  17. define(`d_AddAttrsCodeLoop',
  18. ` fld DWORD PTR [ecx+d_Nth1($1, d_shift(d_shift($@)))]
  19. fadd DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))]
  20. ifelse(eval($2 > 1), `1',
  21. `d_AddAttrsCodeLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
  22. ifelse(eval($1 > $2), `1',
  23. ` fxch st(eval($1 - $2))
  24. ')dnl
  25. fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
  26. ')dnl
  27. define(`d_AddAttrsCode', `d_AddAttrsCodeLoop(`1', $#, $@)')dnl
  28. dnl
  29. dnl d_AddScaledAttrsCode
  30. dnl
  31. dnl Macro to generate fld/fmul/fadd/fstp for each argument with
  32. dnl pipelining across all arguments.
  33. dnl Achieves complete pipelining with four arguments, so
  34. dnl care should be taken to batch up at least four things.
  35. dnl A max of seven things should be added to avoid FP stack overflow.
  36. dnl
  37. define(`d_AddScaledAttrsLoadLoop',
  38. ` fld DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))]
  39. fmul fScaleVal
  40. ifelse(eval($2 > 1), `1',
  41. `d_AddScaledAttrsLoadLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
  42. ifelse(eval($1 > 1), `1',
  43. ` fxch st(decr($1))
  44. ',
  45. ` fxch st(decr($2))
  46. ')dnl
  47. fld DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
  48. faddp st(1), st(0)
  49. ')dnl
  50. define(`d_AddScaledAttrsStoreLoop',
  51. `ifelse(eval($2 > 1), `1',
  52. `d_AddScaledAttrsStoreLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl
  53. ifelse(eval($1 > $2), `1',
  54. ` fxch st(eval($1 - $2))
  55. ')dnl
  56. ifelse($1, `1',
  57. ` fxch st(1)
  58. ')dnl
  59. fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))]
  60. ')dnl
  61. define(`d_AddScaledAttrsCode',
  62. `d_AddScaledAttrsLoadLoop(`1', $#, $@)dnl
  63. d_AddScaledAttrsStoreLoop(`0', $#, $@)')dnl
  64. dnl
  65. dnl d_AddFloatAttrsBody
  66. dnl
  67. dnl Generates the body of an FP attribute adder routine.
  68. dnl Attributes are processed in cache order as much as possible.
  69. dnl
  70. dnl $1 is one of Z_Diff, Z_Diff_Spec, Z_Diff_Tex1, Z_Diff_Spec_Tex1,
  71. dnl Z_Tex1_Tex2, Z_DIdx, Z_DIdx_Tex1, Z_Tex1.
  72. dnl
  73. define(`d_AddFloatAttrsBody',
  74. `
  75. ; Add surface pointers.
  76. mov eax, [edx+ATTRSET_ipSurface]
  77. add eax, [ecx+ATTRSET_pSurface]
  78. mov [ecx+ATTRSET_pSurface], eax
  79. mov eax, [edx+ATTRSET_ipZ]
  80. add eax, [ecx+ATTRSET_pZ]
  81. mov [ecx+ATTRSET_pZ], eax
  82. ; Do FP additions.
  83. ifelse(`$1', `Z_Diff',
  84. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB',
  85. `ATTRSET_fG', `ATTRSET_fR',
  86. `ATTRSET_fA')dnl
  87. ')dnl
  88. ifelse(`$1', `Z_Diff_Spec',
  89. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB',
  90. `ATTRSET_fG', `ATTRSET_fR')dnl
  91. d_AddAttrsCode(`ATTRSET_fA', `ATTRSET_fBS',
  92. `ATTRSET_fGS', `ATTRSET_fRS')dnl
  93. ')dnl
  94. ifelse(`$1', `Z_Diff_Tex1',
  95. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  96. `ATTRSET_fUoW1', `ATTRSET_fVoW1')dnl
  97. d_AddAttrsCode(`ATTRSET_fB', `ATTRSET_fG',
  98. `ATTRSET_fR', `ATTRSET_fA')dnl
  99. ')dnl
  100. ifelse(`$1', `Z_Diff_Spec_Tex1',
  101. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  102. `ATTRSET_fUoW1', `ATTRSET_fVoW1',
  103. `ATTRSET_fB')dnl
  104. d_AddAttrsCode(`ATTRSET_fG', `ATTRSET_fR',
  105. `ATTRSET_fA', `ATTRSET_fBS',
  106. `ATTRSET_fGS', `ATTRSET_fRS')dnl
  107. ')dnl
  108. ifelse(`$1', `Z_Tex1_Tex2',
  109. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  110. `ATTRSET_fUoW1', `ATTRSET_fVoW1',
  111. `ATTRSET_fUoW2', `ATTRSET_fVoW2')dnl
  112. ')dnl
  113. ifelse(`$1', `Z_DIdx',
  114. `d_AddAttrsCode(`ATTRSET_fZ',
  115. `ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl
  116. ')dnl
  117. ifelse(`$1', `Z_DIdx_Tex1',
  118. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  119. `ATTRSET_fUoW1', `ATTRSET_fVoW1',
  120. `ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl
  121. ')dnl
  122. ifelse(`$1', `Z_Tex1',
  123. `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW',
  124. `ATTRSET_fUoW1', `ATTRSET_fVoW1')dnl
  125. ')dnl
  126. ')dnl
  127. dnl
  128. dnl d_AddFixedAttrsBody
  129. dnl
  130. dnl Generates the body of a fixed attribute adder routine.
  131. dnl Attributes are processed in cache order as much as possible.
  132. dnl
  133. dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex1 and Tex2 in any mix.
  134. dnl
  135. define(`d_AddFixedAttrsBody',
  136. `
  137. ; Add surface pointers.
  138. mov eax, [edx+ATTRSET_ipSurface]
  139. add eax, [ecx+ATTRSET_pSurface]
  140. mov [ecx+ATTRSET_pSurface], eax
  141. mov eax, [edx+ATTRSET_ipZ]
  142. add eax, [ecx+ATTRSET_pZ]
  143. mov [ecx+ATTRSET_pZ], eax
  144. ; Add attributes.
  145. ifelse(eval(d_index(`$1', `Z') >= 0), `1',
  146. `
  147. mov eax, [edx+ATTRSET_iZ]
  148. add eax, [ecx+ATTRSET_iZ]
  149. mov [ecx+ATTRSET_iZ], eax
  150. ')dnl
  151. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  152. `
  153. mov eax, [edx+ATTRSET_iOoW]
  154. add eax, [ecx+ATTRSET_iOoW]
  155. mov [ecx+ATTRSET_iOoW], eax
  156. mov eax, [edx+ATTRSET_iUoW1]
  157. add eax, [ecx+ATTRSET_iUoW1]
  158. mov [ecx+ATTRSET_iUoW1], eax
  159. mov eax, [edx+ATTRSET_iVoW1]
  160. add eax, [ecx+ATTRSET_iVoW1]
  161. mov [ecx+ATTRSET_iVoW1], eax
  162. ')dnl
  163. ifelse(eval(d_index(`$1', `Tex2') >= 0), `1',
  164. `
  165. mov eax, [edx+ATTRSET_iUoW2]
  166. add eax, [ecx+ATTRSET_iUoW2]
  167. mov [ecx+ATTRSET_iUoW2], eax
  168. mov eax, [edx+ATTRSET_iVoW2]
  169. add eax, [ecx+ATTRSET_iVoW2]
  170. mov [ecx+ATTRSET_iVoW2], eax
  171. ')dnl
  172. ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
  173. `
  174. mov eax, [edx+ATTRSET_iB]
  175. add eax, [ecx+ATTRSET_iB]
  176. mov [ecx+ATTRSET_iB], eax
  177. mov eax, [edx+ATTRSET_iG]
  178. add eax, [ecx+ATTRSET_iG]
  179. mov [ecx+ATTRSET_iG], eax
  180. mov eax, [edx+ATTRSET_iR]
  181. add eax, [ecx+ATTRSET_iR]
  182. mov [ecx+ATTRSET_iR], eax
  183. mov eax, [edx+ATTRSET_iA]
  184. add eax, [ecx+ATTRSET_iA]
  185. mov [ecx+ATTRSET_iA], eax
  186. ')dnl
  187. ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
  188. `
  189. mov eax, [edx+ATTRSET_iBS]
  190. add eax, [ecx+ATTRSET_iBS]
  191. mov [ecx+ATTRSET_iBS], eax
  192. mov eax, [edx+ATTRSET_iGS]
  193. add eax, [ecx+ATTRSET_iGS]
  194. mov [ecx+ATTRSET_iGS], eax
  195. mov eax, [edx+ATTRSET_iRS]
  196. add eax, [ecx+ATTRSET_iRS]
  197. mov [ecx+ATTRSET_iRS], eax
  198. ')dnl
  199. ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
  200. `
  201. mov eax, [edx+ATTRSET_iDIdx]
  202. add eax, [ecx+ATTRSET_iDIdx]
  203. mov [ecx+ATTRSET_iDIdx], eax
  204. mov eax, [edx+ATTRSET_iDIdxA]
  205. add eax, [ecx+ATTRSET_iDIdxA]
  206. mov [ecx+ATTRSET_iDIdxA], eax
  207. ')dnl
  208. ')dnl
  209. dnl
  210. dnl d_FillSpanFloatAttrsBody
  211. dnl
  212. dnl Generates the body of a FP span filler routine.
  213. dnl Suboptimal cache ordering due to attempt to overlap OoW divide with
  214. dnl integer ops.
  215. dnl
  216. dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex1 and Tex2 in any mix.
  217. dnl
  218. define(`d_FillSpanFloatAttrsBody',
  219. `ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  220. `
  221. fld DWORD PTR [ecx+ATTRSET_fOoW]
  222. fist DWORD PTR [edx+RASTSPAN_iOoW]
  223. fdivr DWORD PTR OOW_W_SCALE
  224. ')dnl
  225. ; Set surface pointers.
  226. mov eax, [ecx+ATTRSET_pSurface]
  227. mov [edx+RASTSPAN_pSurface], eax
  228. mov eax, [ecx+ATTRSET_pZ]
  229. mov [edx+RASTSPAN_pZ], eax
  230. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  231. `
  232. ; Clears both iLOD and iDLOD.
  233. xor eax, eax
  234. mov [edx+RASTSPAN_iLOD], eax
  235. fistp DWORD PTR [edx+RASTSPAN_iW]
  236. fld DWORD PTR [ecx+ATTRSET_fUoW1]
  237. fistp DWORD PTR [edx+RASTSPAN_iUoW1]
  238. fld DWORD PTR [ecx+ATTRSET_fVoW1]
  239. fistp DWORD PTR [edx+RASTSPAN_iVoW1]
  240. ')dnl
  241. ifelse(eval(d_index(`$1', `Tex2') >= 0), `1',
  242. `
  243. fld DWORD PTR [ecx+ATTRSET_fUoW2]
  244. fistp DWORD PTR [edx+RASTSPAN_iUoW2]
  245. fld DWORD PTR [ecx+ATTRSET_fVoW2]
  246. fistp DWORD PTR [edx+RASTSPAN_iVoW2]
  247. ')dnl
  248. ifelse(eval(d_index(`$1', `Z') >= 0), `1',
  249. `
  250. fld DWORD PTR [ecx+ATTRSET_fZ]
  251. fistp DWORD PTR [edx+RASTSPAN_uZ]
  252. ')dnl
  253. ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
  254. `
  255. ; Directly store DWORD-aligned fields, then whack in
  256. ; half DWORDs.
  257. ; ATTENTION - 8.8 color fields could use the FP fixing trick
  258. ; to use fstp instead of fistp. Adds could be overlapped
  259. ; so itd be free cycles?
  260. fld DWORD PTR [ecx+ATTRSET_fG]
  261. fistp DWORD PTR iVal
  262. fld DWORD PTR [ecx+ATTRSET_fB]
  263. fistp DWORD PTR [edx+RASTSPAN_uB]
  264. mov ax, WORD PTR iVal
  265. fld DWORD PTR [ecx+ATTRSET_fA]
  266. fistp DWORD PTR iVal
  267. mov WORD PTR [edx+RASTSPAN_uG], ax
  268. fld DWORD PTR [ecx+ATTRSET_fR]
  269. fistp DWORD PTR [edx+RASTSPAN_uR]
  270. mov ax, WORD PTR iVal
  271. mov WORD PTR [edx+RASTSPAN_uA], ax
  272. ')dnl
  273. ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
  274. `
  275. fld DWORD PTR [ecx+ATTRSET_fGS]
  276. fistp DWORD PTR iVal
  277. fld DWORD PTR [ecx+ATTRSET_fBS]
  278. fistp DWORD PTR [edx+RASTSPAN_uBS]
  279. mov ax, WORD PTR iVal
  280. mov WORD PTR [edx+RASTSPAN_uGS], ax
  281. ; Trashes uFog, but thats OK because fog isnt getting used.
  282. fld DWORD PTR [ecx+ATTRSET_fRS]
  283. fistp DWORD PTR [edx+RASTSPAN_uRS]
  284. ')dnl
  285. ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
  286. `
  287. fld DWORD PTR [ecx+ATTRSET_fDIdx]
  288. fistp DWORD PTR [edx+RASTSPAN_iIdx]
  289. fld DWORD PTR [ecx+ATTRSET_fDIdxA]
  290. fistp DWORD PTR [edx+RASTSPAN_iIdxA]
  291. ')dnl
  292. ')dnl
  293. dnl
  294. dnl d_FillSpanFixedAttrsBody
  295. dnl
  296. dnl Generates the body of a fixed span filler routine.
  297. dnl Cache ordered except for the overlap of the OoW divide.
  298. dnl
  299. dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex1 and Tex2 in any mix.
  300. dnl
  301. define(`d_FillSpanFixedAttrsBody',
  302. `ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  303. `
  304. fild DWORD PTR [ecx+ATTRSET_iOoW]
  305. fdivr DWORD PTR OOW_W_SCALE
  306. ')dnl
  307. ; Set surface pointers.
  308. mov eax, [ecx+ATTRSET_pSurface]
  309. mov [edx+RASTSPAN_pSurface], eax
  310. mov eax, [ecx+ATTRSET_pZ]
  311. mov [edx+RASTSPAN_pZ], eax
  312. ifelse(eval(d_index(`$1', `Z') >= 0), `1',
  313. `
  314. mov eax, [ecx+ATTRSET_uZ]
  315. mov [edx+RASTSPAN_uZ], eax
  316. ')dnl
  317. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  318. `
  319. ; Clears both iLOD and iDLOD.
  320. xor eax, eax
  321. mov [edx+RASTSPAN_iLOD], eax
  322. mov eax, [ecx+ATTRSET_iOoW]
  323. mov [edx+RASTSPAN_iOoW], eax
  324. mov eax, [ecx+ATTRSET_iUoW1]
  325. mov [edx+RASTSPAN_iUoW1], eax
  326. mov eax, [ecx+ATTRSET_iVoW1]
  327. mov [edx+RASTSPAN_iVoW1], eax
  328. ')dnl
  329. ifelse(eval(d_index(`$1', `Tex2') >= 0), `1',
  330. `
  331. mov eax, [ecx+ATTRSET_iUoW2]
  332. mov [edx+RASTSPAN_iUoW2], eax
  333. mov eax, [ecx+ATTRSET_iVoW2]
  334. mov [edx+RASTSPAN_iVoW2], eax
  335. ')dnl
  336. ifelse(eval(d_index(`$1', `Diff') >= 0), `1',
  337. `
  338. ; Directly store DWORD-aligned fields, then whack in
  339. ; half DWORDs.
  340. ; ATTENTION - Keep word pairs shifted and OR together to store
  341. ; as DWORDs instead?
  342. mov eax, [ecx+ATTRSET_uB]
  343. mov [edx+RASTSPAN_uB], eax
  344. mov eax, [ecx+ATTRSET_uR]
  345. mov [edx+RASTSPAN_uR], eax
  346. mov ax, [ecx+ATTRSET_uG]
  347. mov [edx+RASTSPAN_uG], ax
  348. mov ax, [ecx+ATTRSET_uA]
  349. mov [edx+RASTSPAN_uA], ax
  350. ')dnl
  351. ifelse(eval(d_index(`$1', `Spec') >= 0), `1',
  352. `
  353. mov eax, [ecx+ATTRSET_uBS]
  354. mov [edx+RASTSPAN_uBS], eax
  355. mov eax, [ecx+ATTRSET_uRS]
  356. mov [edx+RASTSPAN_uRS], eax
  357. mov ax, [ecx+ATTRSET_uGS]
  358. mov [edx+RASTSPAN_uGS], ax
  359. ')dnl
  360. ifelse(eval(d_index(`$1', `DIdx') >= 0), `1',
  361. `
  362. mov eax, [ecx+ATTRSET_uDIdx]
  363. mov [edx+RASTSPAN_iIdx], eax
  364. mov eax, [ecx+ATTRSET_uDIdxA]
  365. mov [edx+RASTSPAN_iIdxA], eax
  366. ')dnl
  367. ifelse(eval(d_index(`$1', `Tex') >= 0), `1',
  368. `
  369. fistp DWORD PTR [edx+RASTSPAN_iW]
  370. ')dnl
  371. ')dnl