dnl---------------------------------------------------------------------------- dnl dnl x86 assembly code generating macros for attribute handlers. dnl dnl Copyright (C) Microsoft Corporation, 1997. dnl dnl---------------------------------------------------------------------------- dnl dnl d_AddAttrsCode dnl dnl Macro to generate fld/fadd/fstp for each argument with dnl pipelining across all arguments. dnl Achieves complete pipelining with four arguments, so dnl care should be taken to batch up at least four things. dnl A max of seven things should be added to avoid FP stack overflow. dnl define(`d_AddAttrsCodeLoop', ` fld DWORD PTR [ecx+d_Nth1($1, d_shift(d_shift($@)))] fadd DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))] ifelse(eval($2 > 1), `1', `d_AddAttrsCodeLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl ifelse(eval($1 > $2), `1', ` fxch st(eval($1 - $2)) ')dnl fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))] ')dnl define(`d_AddAttrsCode', `d_AddAttrsCodeLoop(`1', $#, $@)')dnl define(`d_AddUVowCode', ` ; Just in case ebx is used push ebx mov ebx, pStpCtx mov ebx, [ebx + SCTX_pCtx] xor eax, eax Loop$1$2: cmp eax, DWORD PTR[ebx + RCTX_cActTex] je Done$1$2 fld DWORD PTR [ecx + ATTRSET_fUoW + eax * 4] fadd DWORD PTR [edx+ATTRSET_fUoW + eax * 4] fld DWORD PTR [ecx + ATTRSET_fVoW + eax * 4] fadd DWORD PTR [edx+ATTRSET_fVoW + eax * 4] fstp DWORD PTR [ecx+ATTRSET_fVoW + eax * 4] fstp DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] inc eax jmp Loop$1$2 Done$1$2: pop ebx ')dnl dnl dnl d_AddScaledAttrsCode dnl dnl Macro to generate fld/fmul/fadd/fstp for each argument with dnl pipelining across all arguments. dnl Achieves complete pipelining with four arguments, so dnl care should be taken to batch up at least four things. dnl A max of seven things should be added to avoid FP stack overflow. dnl define(`d_AddScaledAttrsLoadLoop', ` fld DWORD PTR [edx+d_Nth1($1, d_shift(d_shift($@)))] fmul fScaleVal ifelse(eval($2 > 1), `1', `d_AddScaledAttrsLoadLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl ifelse(eval($1 > 1), `1', ` fxch st(decr($1)) ', ` fxch st(decr($2)) ')dnl fld DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))] faddp st(1), st(0) ')dnl define(`d_AddScaledAttrsStoreLoop', `ifelse(eval($2 > 1), `1', `d_AddScaledAttrsStoreLoop(incr($1), decr($2), d_shift(d_shift($@)))')dnl ifelse(eval($1 > $2), `1', ` fxch st(eval($1 - $2)) ')dnl ifelse($1, `1', ` fxch st(1) ')dnl fstp DWORD PTR [ecx+d_Nth1($2, d_shift(d_shift($@)))] ')dnl define(`d_AddScaledAttrsCode', `d_AddScaledAttrsLoadLoop(`1', $#, $@)dnl d_AddScaledAttrsStoreLoop(`0', $#, $@)')dnl define(`d_AddScaledUVowCode', ` ; Just in case ebx is used push ebx mov ebx, pStpCtx mov ebx, [ebx + SCTX_pCtx] xor eax, eax LoopScaled$1$2: cmp eax, DWORD PTR[ebx + RCTX_cActTex] je DoneScaled$1$2 fld DWORD PTR [edx+ATTRSET_fUoW + eax * 4] fmul fScaleVal ; fU*fScale fld DWORD PTR [edx+ATTRSET_fVoW + eax * 4] fmul fScaleVal ; fV*fScale fU*fScale fld DWORD PTR [ecx+ATTRSET_fVoW + eax * 4] ; fVc fV*fScale fU*fScale faddp st(1), st(0) ; fVc+fV*fScale fU*fScale fxch st(1) ; fU*fScale fVc+fV*fScale fld DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] ; fUc fU*fScale fVc+fV*fScale faddp st(1), st(0) ; fUc+fU*fScale fVc+fV*fScale fstp DWORD PTR [ecx+ATTRSET_fUoW + eax * 4] ; fVc+fV*fScale fstp DWORD PTR [ecx+ATTRSET_fVoW + eax * 4] inc eax jmp LoopScaled$1$2 DoneScaled$1$2: pop ebx ')dnl dnl dnl d_AddFloatAttrsBody dnl dnl Generates the body of an FP attribute adder routine. dnl Attributes are processed in cache order as much as possible. dnl dnl $1 is one of Z_Diff, Z_Diff_Spec, Z_Diff_Tex, Z_Diff_Spec_Tex, dnl Z_DIdx, Z_DIdx_Tex, Z_Tex. dnl define(`d_AddFloatAttrsBody', ` ; Add surface pointers. mov eax, [edx+ATTRSET_ipSurface] add eax, [ecx+ATTRSET_pSurface] mov [ecx+ATTRSET_pSurface], eax mov eax, [edx+ATTRSET_ipZ] add eax, [ecx+ATTRSET_pZ] mov [ecx+ATTRSET_pZ], eax ; Do FP additions. ifelse(`$1', `Z_Diff', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB', `ATTRSET_fG', `ATTRSET_fR', `ATTRSET_fA')dnl ')dnl ifelse(`$1', `Z_Diff_Spec', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fB', `ATTRSET_fG', `ATTRSET_fR')dnl d_AddAttrsCode(`ATTRSET_fA', `ATTRSET_fBS', `ATTRSET_fGS', `ATTRSET_fRS')dnl ')dnl ifelse(`$1', `Z_Diff_Tex', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW', `ATTRSET_fB', `ATTRSET_fG', `ATTRSET_fR', `ATTRSET_fA')dnl d_AddUVowCode($1, `Float') ')dnl ifelse(`$1', `Z_Diff_Spec_Tex', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW', `ATTRSET_fB')dnl d_AddUVowCode($1, `Float') d_AddAttrsCode(`ATTRSET_fG', `ATTRSET_fR', `ATTRSET_fA', `ATTRSET_fBS', `ATTRSET_fGS', `ATTRSET_fRS')dnl ')dnl ifelse(`$1', `Z_DIdx', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl ')dnl ifelse(`$1', `Z_DIdx_Tex', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW', `ATTRSET_fDIdx', `ATTRSET_fDIdxA')dnl d_AddUVowCode($1, `Float') ')dnl ifelse(`$1', `Z_Tex', `d_AddAttrsCode(`ATTRSET_fZ', `ATTRSET_fOoW')dnl d_AddUVowCode($1, `Float') ')dnl ')dnl dnl dnl d_AddFixedAttrsBody dnl dnl Generates the body of a fixed attribute adder routine. dnl Attributes are processed in cache order as much as possible. dnl dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix. dnl define(`d_AddFixedAttrsBody', ` ; Add surface pointers. mov eax, [edx+ATTRSET_ipSurface] add eax, [ecx+ATTRSET_pSurface] mov [ecx+ATTRSET_pSurface], eax mov eax, [edx+ATTRSET_ipZ] add eax, [ecx+ATTRSET_pZ] mov [ecx+ATTRSET_pZ], eax ; Add attributes. ifelse(eval(d_index(`$1', `Z') >= 0), `1', ` mov eax, [edx+ATTRSET_iZ] add eax, [ecx+ATTRSET_iZ] mov [ecx+ATTRSET_iZ], eax ')dnl ifelse(eval(d_index(`$1', `Tex') >= 0), `1', ` mov eax, [edx+ATTRSET_iOoW] add eax, [ecx+ATTRSET_iOoW] mov [ecx+ATTRSET_iOoW], eax ; Just in case ebx and edi are used push ebx push edi mov ebx, pStpCtx mov ebx, [ebx + SCTX_pCtx] xor edi, edi LoopFixed$1: cmp edi, DWORD PTR[ebx + RCTX_cActTex] je DoneFixed$1 mov eax, [edx+ATTRSET_iUoW + 4 * edi] add eax, [ecx+ATTRSET_iUoW + 4 * edi] mov [ecx+ATTRSET_iUoW + 4 * edi], eax mov eax, [edx+ATTRSET_iVoW + 4 * edi] add eax, [ecx+ATTRSET_iVoW + 4 * edi] mov [ecx+ATTRSET_iVoW + 4 * edi], eax inc edi jmp LoopFixed$1 DoneFixed$1: pop edi pop ebx ')dnl ifelse(eval(d_index(`$1', `Diff') >= 0), `1', ` mov eax, [edx+ATTRSET_iB] add eax, [ecx+ATTRSET_iB] mov [ecx+ATTRSET_iB], eax mov eax, [edx+ATTRSET_iG] add eax, [ecx+ATTRSET_iG] mov [ecx+ATTRSET_iG], eax mov eax, [edx+ATTRSET_iR] add eax, [ecx+ATTRSET_iR] mov [ecx+ATTRSET_iR], eax mov eax, [edx+ATTRSET_iA] add eax, [ecx+ATTRSET_iA] mov [ecx+ATTRSET_iA], eax ')dnl ifelse(eval(d_index(`$1', `Spec') >= 0), `1', ` mov eax, [edx+ATTRSET_iBS] add eax, [ecx+ATTRSET_iBS] mov [ecx+ATTRSET_iBS], eax mov eax, [edx+ATTRSET_iGS] add eax, [ecx+ATTRSET_iGS] mov [ecx+ATTRSET_iGS], eax mov eax, [edx+ATTRSET_iRS] add eax, [ecx+ATTRSET_iRS] mov [ecx+ATTRSET_iRS], eax ')dnl ifelse(eval(d_index(`$1', `DIdx') >= 0), `1', ` mov eax, [edx+ATTRSET_iDIdx] add eax, [ecx+ATTRSET_iDIdx] mov [ecx+ATTRSET_iDIdx], eax mov eax, [edx+ATTRSET_iDIdxA] add eax, [ecx+ATTRSET_iDIdxA] mov [ecx+ATTRSET_iDIdxA], eax ')dnl ')dnl dnl dnl d_FillSpanFloatAttrsBody dnl dnl Generates the body of a FP span filler routine. dnl Suboptimal cache ordering due to attempt to overlap OoW divide with dnl integer ops. dnl dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix. dnl define(`d_FillSpanFloatAttrsBody', `ifelse(eval(d_index(`$1', `Tex') >= 0), `1', ` fld DWORD PTR [ecx+ATTRSET_fOoW] fist DWORD PTR [edx+RASTSPAN_iOoW] fdivr DWORD PTR OOW_W_SCALE ')dnl ; Set surface pointers. mov eax, [ecx+ATTRSET_pSurface] mov [edx+RASTSPAN_pSurface], eax mov eax, [ecx+ATTRSET_pZ] mov [edx+RASTSPAN_pZ], eax ifelse(eval(d_index(`$1', `Tex') >= 0), `1', ` ; Clears both iLOD and iDLOD. xor eax, eax mov [edx+RASTSPAN_iLOD], eax fistp DWORD PTR [edx+RASTSPAN_iW] ; Just in case ebx is used push ebx mov ebx, pStpCtx mov ebx, [ebx + SCTX_pCtx] LoopFloat$1: cmp eax, DWORD PTR[ebx + RCTX_cActTex] je DoneFloat$1 fld DWORD PTR [ecx+ATTRSET_fUoW + 4 * eax] fistp DWORD PTR [edx+RASTSPAN_UVoW + 8 * eax] fld DWORD PTR [ecx+ATTRSET_fVoW + 4 * eax] fistp DWORD PTR [edx+RASTSPAN_UVoW + 8 * eax + 4] inc eax jmp LoopFloat$1 DoneFloat$1: pop ebx ')dnl ifelse(eval(d_index(`$1', `Z') >= 0), `1', ` fld DWORD PTR [ecx+ATTRSET_fZ] fistp DWORD PTR [edx+RASTSPAN_uZ] ')dnl ifelse(eval(d_index(`$1', `Diff') >= 0), `1', ` ; Directly store DWORD-aligned fields, then whack in ; half DWORDs. ; ATTENTION - 8.8 color fields could use the FP fixing trick ; to use fstp instead of fistp. Adds could be overlapped ; so itd be free cycles? fld DWORD PTR [ecx+ATTRSET_fG] fistp DWORD PTR iVal fld DWORD PTR [ecx+ATTRSET_fB] fistp DWORD PTR [edx+RASTSPAN_uB] mov ax, WORD PTR iVal fld DWORD PTR [ecx+ATTRSET_fA] fistp DWORD PTR iVal mov WORD PTR [edx+RASTSPAN_uG], ax fld DWORD PTR [ecx+ATTRSET_fR] fistp DWORD PTR [edx+RASTSPAN_uR] mov ax, WORD PTR iVal mov WORD PTR [edx+RASTSPAN_uA], ax ')dnl ifelse(eval(d_index(`$1', `Spec') >= 0), `1', ` fld DWORD PTR [ecx+ATTRSET_fGS] fistp DWORD PTR iVal fld DWORD PTR [ecx+ATTRSET_fBS] fistp DWORD PTR [edx+RASTSPAN_uBS] mov ax, WORD PTR iVal mov WORD PTR [edx+RASTSPAN_uGS], ax ; Trashes uFog, but thats OK because fog isnt getting used. fld DWORD PTR [ecx+ATTRSET_fRS] fistp DWORD PTR [edx+RASTSPAN_uRS] ')dnl ifelse(eval(d_index(`$1', `DIdx') >= 0), `1', ` fld DWORD PTR [ecx+ATTRSET_fDIdx] fistp DWORD PTR [edx+RASTSPAN_iIdx] fld DWORD PTR [ecx+ATTRSET_fDIdxA] fistp DWORD PTR [edx+RASTSPAN_iIdxA] ')dnl ')dnl dnl dnl d_FillSpanFixedAttrsBody dnl dnl Generates the body of a fixed span filler routine. dnl Cache ordered except for the overlap of the OoW divide. dnl dnl $1 contains substrings Z, Diff, Spec, DIdx, Tex in any mix. dnl define(`d_FillSpanFixedAttrsBody', `ifelse(eval(d_index(`$1', `Tex') >= 0), `1', ` fild DWORD PTR [ecx+ATTRSET_iOoW] fdivr DWORD PTR OOW_W_SCALE ')dnl ; Set surface pointers. mov eax, [ecx+ATTRSET_pSurface] mov [edx+RASTSPAN_pSurface], eax mov eax, [ecx+ATTRSET_pZ] mov [edx+RASTSPAN_pZ], eax ifelse(eval(d_index(`$1', `Z') >= 0), `1', ` mov eax, [ecx+ATTRSET_uZ] mov [edx+RASTSPAN_uZ], eax ')dnl ifelse(eval(d_index(`$1', `Tex') >= 0), `1', ` ; Clears both iLOD and iDLOD. xor eax, eax mov [edx+RASTSPAN_iLOD], eax mov eax, [ecx+ATTRSET_iOoW] mov [edx+RASTSPAN_iOoW], eax ; Just in case ebx and edi are used push ebx push edi mov ebx, pStpCtx mov ebx, [ebx + SCTX_pCtx] xor edi, edi LoopFixed$1: cmp edi, DWORD PTR[ebx + RCTX_cActTex] je DoneFixed$1 mov eax, [ecx+ATTRSET_iUoW + 4 * edi] mov [edx+RASTSPAN_UVoW + 8 * edi], eax mov eax, [ecx+ATTRSET_iVoW + 4 * edi] mov [edx+RASTSPAN_UVoW + 8 * edi + 4], eax inc edi jmp LoopFixed$1 DoneFixed$1: pop edi pop ebx ')dnl ifelse(eval(d_index(`$1', `Diff') >= 0), `1', ` ; Directly store DWORD-aligned fields, then whack in ; half DWORDs. ; ATTENTION - Keep word pairs shifted and OR together to store ; as DWORDs instead? mov eax, [ecx+ATTRSET_uB] mov [edx+RASTSPAN_uB], eax mov eax, [ecx+ATTRSET_uR] mov [edx+RASTSPAN_uR], eax mov ax, [ecx+ATTRSET_uG] mov [edx+RASTSPAN_uG], ax mov ax, [ecx+ATTRSET_uA] mov [edx+RASTSPAN_uA], ax ')dnl ifelse(eval(d_index(`$1', `Spec') >= 0), `1', ` mov eax, [ecx+ATTRSET_uBS] mov [edx+RASTSPAN_uBS], eax mov eax, [ecx+ATTRSET_uRS] mov [edx+RASTSPAN_uRS], eax mov ax, [ecx+ATTRSET_uGS] mov [edx+RASTSPAN_uGS], ax ')dnl ifelse(eval(d_index(`$1', `DIdx') >= 0), `1', ` mov eax, [ecx+ATTRSET_uDIdx] mov [edx+RASTSPAN_iIdx], eax mov eax, [ecx+ATTRSET_uDIdxA] mov [edx+RASTSPAN_iIdxA], eax ')dnl ifelse(eval(d_index(`$1', `Tex') >= 0), `1', ` fistp DWORD PTR [edx+RASTSPAN_iW] ')dnl ')dnl