Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4507 lines
162 KiB

  1. /*
  2. ** Copyright 1991, 1992, 1993, Silicon Graphics, Inc.
  3. ** All Rights Reserved.
  4. **
  5. ** This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
  6. ** the contents of this file may not be disclosed to third parties, copied or
  7. ** duplicated in any form, in whole or in part, without the prior written
  8. ** permission of Silicon Graphics, Inc.
  9. **
  10. ** RESTRICTED RIGHTS LEGEND:
  11. ** Use, duplication or disclosure by the Government is subject to restrictions
  12. ** as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
  13. ** and Computer Software clause at DFARS 252.227-7013, and/or in similar or
  14. ** successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
  15. ** rights reserved under the Copyright Laws of the United States.
  16. */
  17. #include "precomp.h"
  18. #pragma hdrstop
  19. #ifdef _X86_
  20. #define SHADER __GLcontext.polygon.shader
  21. #define GENGCACCEL __GLGENcontext.genAccel
  22. #define SPANDELTA __GLGENcontext.genAccel.spanDelta
  23. #define SPANVALUE __GLGENcontext.genAccel.spanValue
  24. #endif
  25. #define ENABLE_ASM 1
  26. #if DBG
  27. //#define FORCE_NPX_DEBUG 1
  28. #endif
  29. /**************************************************************************\
  30. \**************************************************************************/
  31. /* This routine sets gc->polygon.shader.cfb to gc->drawBuffer */
  32. void FASTCALL __fastGenFillSubTriangle(__GLcontext *gc, GLint iyBottom, GLint iyTop)
  33. {
  34. GLint ixLeft, ixRight;
  35. GLint ixLeftFrac, ixRightFrac;
  36. GLint spanWidth, clipY0, clipY1;
  37. ULONG ulSpanVisibility;
  38. GLint cWalls;
  39. GLint *Walls;
  40. #ifdef NT
  41. __GLstippleWord stackWords[__GL_MAX_STACK_STIPPLE_WORDS];
  42. __GLstippleWord *words;
  43. GLuint maxWidth;
  44. #else
  45. __GLstippleWord words[__GL_MAX_STIPPLE_WORDS];
  46. #endif
  47. BOOL bSurfaceDIB;
  48. BOOL bClipped;
  49. GLint xScr, yScr;
  50. GLint zFails;
  51. __GLzValue *zbuf, z;
  52. GLint r, g, b, s, t;
  53. __GLGENcontext *gengc = (__GLGENcontext *)gc;
  54. __genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr;
  55. __GLspanFunc zSpanFunc = GENACCEL(gc).__fastZSpanFuncPtr;
  56. int scansize;
  57. #ifdef NT
  58. maxWidth = (gc->transform.clipX1 - gc->transform.clipX0) + 31;
  59. if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
  60. {
  61. words = gcTempAlloc(gc, (maxWidth+__GL_STIPPLE_BITS-1)/8);
  62. if (words == NULL)
  63. {
  64. return;
  65. }
  66. }
  67. else
  68. {
  69. words = stackWords;
  70. }
  71. #endif
  72. gc->polygon.shader.stipplePat = words;
  73. scansize = gc->polygon.shader.cfb->buf.outerWidth;
  74. bSurfaceDIB = (gc->polygon.shader.cfb->buf.flags & DIB_FORMAT) != 0;
  75. bClipped = (!(gc->drawBuffer->buf.flags & NO_CLIP)) &&
  76. bSurfaceDIB;
  77. if (bSurfaceDIB)
  78. GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
  79. else
  80. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  81. ixLeft = gc->polygon.shader.ixLeft;
  82. ixLeftFrac = gc->polygon.shader.ixLeftFrac;
  83. ixRight = gc->polygon.shader.ixRight;
  84. ixRightFrac = gc->polygon.shader.ixRightFrac;
  85. clipY0 = gc->transform.clipY0;
  86. clipY1 = gc->transform.clipY1;
  87. r = GENACCEL(gc).spanValue.r;
  88. g = GENACCEL(gc).spanValue.g;
  89. b = GENACCEL(gc).spanValue.b;
  90. s = GENACCEL(gc).spanValue.s;
  91. t = GENACCEL(gc).spanValue.t;
  92. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  93. z = gc->polygon.shader.frag.z;
  94. if( gc->modes.depthBits == 32 )
  95. zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  96. ixLeft, iyBottom);
  97. else
  98. zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer,
  99. (__GLz16Value*),
  100. ixLeft, iyBottom);
  101. } else if ((gc->polygon.shader.modeFlags & __GL_SHADE_STIPPLE) == 0) {
  102. GLuint w;
  103. if (w = ((gc->transform.clipX1 - gc->transform.clipX0) + 31) >> 3)
  104. RtlFillMemoryUlong(words, w, ~((ULONG)0));
  105. GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
  106. }
  107. //
  108. // render the spans
  109. //
  110. while (iyBottom < iyTop) {
  111. spanWidth = ixRight - ixLeft;
  112. /*
  113. ** Only render spans that have non-zero width and which are
  114. ** not scissored out vertically.
  115. */
  116. if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) {
  117. gc->polygon.shader.frag.x = ixLeft;
  118. gc->polygon.shader.frag.y = iyBottom;
  119. gc->polygon.shader.zbuf = zbuf;
  120. gc->polygon.shader.frag.z = z;
  121. GENACCEL(gc).spanValue.r = r;
  122. GENACCEL(gc).spanValue.g = g;
  123. GENACCEL(gc).spanValue.b = b;
  124. GENACCEL(gc).spanValue.s = s;
  125. GENACCEL(gc).spanValue.t = t;
  126. // take care of horizontal scissoring
  127. if (!gc->transform.reasonableViewport) {
  128. GLint clipX0 = gc->transform.clipX0;
  129. GLint clipX1 = gc->transform.clipX1;
  130. // see if we skip entire span
  131. if ((ixRight <= clipX0) || (ixLeft >= clipX1))
  132. goto advance;
  133. // now clip right and left
  134. if (ixRight > clipX1)
  135. spanWidth = (clipX1 - ixLeft);
  136. if (ixLeft < clipX0) {
  137. GLuint delta;
  138. delta = clipX0 - ixLeft;
  139. spanWidth -= delta;
  140. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  141. GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r;
  142. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
  143. GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g;
  144. GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b;
  145. }
  146. }
  147. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  148. GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s;
  149. GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t;
  150. }
  151. gc->polygon.shader.frag.x = clipX0;
  152. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
  153. if( gc->modes.depthBits == 32 )
  154. gc->polygon.shader.zbuf += delta;
  155. else
  156. (__GLz16Value *)gc->polygon.shader.zbuf += delta;
  157. gc->polygon.shader.frag.z +=
  158. (gc->polygon.shader.dzdx * delta);
  159. }
  160. }
  161. }
  162. // now have span length
  163. gc->polygon.shader.length = spanWidth;
  164. // If a stipple is active, process it first
  165. if (gc->polygon.shader.modeFlags & __GL_SHADE_STIPPLE)
  166. {
  167. // If no pixels are left after stippling and depth
  168. // testing then we can skip the span
  169. // Note that this function handles the no-depth-
  170. // testing case also
  171. gc->polygon.shader.done = GL_FALSE;
  172. if (!(*GENACCEL(gc).__fastStippleDepthTestSpan)(gc) ||
  173. gc->polygon.shader.done)
  174. {
  175. goto advance;
  176. }
  177. GENACCEL(gc).flags |= HAVE_STIPPLE;
  178. }
  179. // Do z-buffering if needed, and short-circuit rest of span
  180. // operations if nothing will be drawn.
  181. else if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
  182. // initially assume no stippling
  183. GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
  184. if ((zFails = (*zSpanFunc)(gc)) == 1)
  185. goto advance;
  186. else if (zFails)
  187. GENACCEL(gc).flags |= HAVE_STIPPLE;
  188. }
  189. if (gc->state.raster.drawBuffer == GL_FRONT_AND_BACK) {
  190. gc->polygon.shader.cfb = &gc->frontBuffer;
  191. xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
  192. gc->frontBuffer.buf.xOrigin;
  193. yScr = __GL_UNBIAS_Y(gc, iyBottom) +
  194. gc->frontBuffer.buf.yOrigin;
  195. // If the front buffer is a DIB, we're drawing straight to
  196. // the screen, so we must check clipping.
  197. if ((gc->frontBuffer.buf.flags &
  198. (DIB_FORMAT | NO_CLIP)) == DIB_FORMAT) {
  199. ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth,
  200. &cWalls, &Walls);
  201. // If the span is completely visible, we can treat the
  202. // screen as a DIB.
  203. if (ulSpanVisibility == WGL_SPAN_ALL) {
  204. GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
  205. (*cSpanFunc)(gengc);
  206. } else if (ulSpanVisibility == WGL_SPAN_PARTIAL) {
  207. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  208. if (GENACCEL(gc).flags & HAVE_STIPPLE)
  209. (*gengc->pfnCopyPixels)(gengc,
  210. gc->polygon.shader.cfb,
  211. xScr, yScr, spanWidth,
  212. FALSE);
  213. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  214. (*cSpanFunc)(gengc);
  215. (*gengc->pfnCopyPixels)(gengc,
  216. gc->polygon.shader.cfb,
  217. xScr, yScr, spanWidth,
  218. TRUE);
  219. }
  220. } else {
  221. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  222. if (GENACCEL(gc).flags & HAVE_STIPPLE)
  223. (*gengc->pfnCopyPixels)(gengc,
  224. gc->polygon.shader.cfb,
  225. xScr, yScr, spanWidth,
  226. FALSE);
  227. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  228. (*cSpanFunc)(gengc);
  229. (*gengc->pfnCopyPixels)(gengc,
  230. gc->polygon.shader.cfb,
  231. xScr, yScr, spanWidth,
  232. TRUE);
  233. }
  234. // The back buffer is always DIB-compatible
  235. gc->polygon.shader.cfb = &gc->backBuffer;
  236. GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
  237. (*cSpanFunc)(gengc);
  238. } else {
  239. if (bClipped) {
  240. xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
  241. gc->drawBuffer->buf.xOrigin;
  242. yScr = __GL_UNBIAS_Y(gc, iyBottom) +
  243. gc->drawBuffer->buf.yOrigin;
  244. ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth,
  245. &cWalls, &Walls);
  246. if (ulSpanVisibility == WGL_SPAN_ALL) {
  247. GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
  248. (*cSpanFunc)(gengc);
  249. } else if (ulSpanVisibility == WGL_SPAN_PARTIAL) {
  250. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  251. if (GENACCEL(gc).flags & HAVE_STIPPLE)
  252. (*gengc->pfnCopyPixels)(gengc,
  253. gc->polygon.shader.cfb,
  254. xScr, yScr, spanWidth,
  255. FALSE);
  256. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  257. (*cSpanFunc)(gengc);
  258. (*gengc->pfnCopyPixels)(gengc,
  259. gc->polygon.shader.cfb,
  260. xScr, yScr, spanWidth,
  261. TRUE);
  262. }
  263. } else if (bSurfaceDIB) {
  264. (*cSpanFunc)(gengc);
  265. } else {
  266. xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
  267. gc->drawBuffer->buf.xOrigin;
  268. yScr = __GL_UNBIAS_Y(gc, iyBottom) +
  269. gc->drawBuffer->buf.yOrigin;
  270. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  271. if (GENACCEL(gc).flags & HAVE_STIPPLE)
  272. (*gengc->pfnCopyPixels)(gengc,
  273. gc->polygon.shader.cfb,
  274. xScr, yScr, spanWidth,
  275. FALSE);
  276. (*cSpanFunc)(gengc);
  277. if (!bSurfaceDIB)
  278. (*gengc->pfnCopyPixels)(gengc,
  279. gc->polygon.shader.cfb,
  280. xScr, yScr, spanWidth,
  281. TRUE);
  282. }
  283. }
  284. }
  285. advance:
  286. GENACCEL(gc).pPix += scansize;
  287. /* Advance right edge fixed point, adjusting for carry */
  288. ixRightFrac += gc->polygon.shader.dxRightFrac;
  289. if (ixRightFrac < 0) {
  290. /* Carry/Borrow'd. Use large step */
  291. ixRight += gc->polygon.shader.dxRightBig;
  292. ixRightFrac &= ~0x80000000;
  293. } else {
  294. ixRight += gc->polygon.shader.dxRightLittle;
  295. }
  296. iyBottom++;
  297. ixLeftFrac += gc->polygon.shader.dxLeftFrac;
  298. if (ixLeftFrac < 0) {
  299. /* Carry/Borrow'd. Use large step */
  300. ixLeft += gc->polygon.shader.dxLeftBig;
  301. ixLeftFrac &= ~0x80000000;
  302. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
  303. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  304. r += *((GLint *)&gc->polygon.shader.rBig);
  305. g += *((GLint *)&gc->polygon.shader.gBig);
  306. b += *((GLint *)&gc->polygon.shader.bBig);
  307. }
  308. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  309. s += *((GLint *)&gc->polygon.shader.sBig);
  310. t += *((GLint *)&gc->polygon.shader.tBig);
  311. }
  312. } else {
  313. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  314. r += *((GLint *)&gc->polygon.shader.rBig);
  315. }
  316. }
  317. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  318. z += gc->polygon.shader.zBig;
  319. /* The implicit multiply is taken out of the loop */
  320. zbuf = (__GLzValue*)((GLubyte*)zbuf +
  321. gc->polygon.shader.zbufBig);
  322. }
  323. } else {
  324. /* Use small step */
  325. ixLeft += gc->polygon.shader.dxLeftLittle;
  326. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
  327. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  328. r += *((GLint *)&gc->polygon.shader.rLittle);
  329. g += *((GLint *)&gc->polygon.shader.gLittle);
  330. b += *((GLint *)&gc->polygon.shader.bLittle);
  331. }
  332. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  333. s += *((GLint *)&gc->polygon.shader.sLittle);
  334. t += *((GLint *)&gc->polygon.shader.tLittle);
  335. }
  336. } else {
  337. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  338. r += *((GLint *)&gc->polygon.shader.rLittle);
  339. }
  340. }
  341. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  342. z += gc->polygon.shader.zLittle;
  343. /* The implicit multiply is taken out of the loop */
  344. zbuf = (__GLzValue*)((GLubyte*)zbuf +
  345. gc->polygon.shader.zbufLittle);
  346. }
  347. }
  348. }
  349. gc->polygon.shader.ixLeft = ixLeft;
  350. gc->polygon.shader.ixLeftFrac = ixLeftFrac;
  351. gc->polygon.shader.ixRight = ixRight;
  352. gc->polygon.shader.ixRightFrac = ixRightFrac;
  353. gc->polygon.shader.frag.z = z;
  354. GENACCEL(gc).spanValue.r = r;
  355. GENACCEL(gc).spanValue.g = g;
  356. GENACCEL(gc).spanValue.b = b;
  357. GENACCEL(gc).spanValue.s = s;
  358. GENACCEL(gc).spanValue.t = t;
  359. #ifdef NT
  360. if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
  361. {
  362. gcTempFree(gc, words);
  363. }
  364. #endif
  365. }
  366. void FASTCALL __fastGenFillSubTriangleTexRGBA(__GLcontext *gc, GLint iyBottom, GLint iyTop)
  367. {
  368. GLint ixLeft, ixRight;
  369. GLint ixLeftFrac, ixRightFrac;
  370. GLint spanWidth, clipY0, clipY1;
  371. ULONG ulSpanVisibility;
  372. GLint cWalls;
  373. GLint *Walls;
  374. BOOL bSurfaceDIB;
  375. BOOL bClipped;
  376. GLint xScr, yScr;
  377. __GLzValue *zbuf, z;
  378. GLint r, g, b, a, s, t;
  379. __GLfloat qw;
  380. __GLGENcontext *gengc = (__GLGENcontext *)gc;
  381. __genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr;
  382. int scansize;
  383. BOOL bReadPixels = (gc->state.enables.general & __GL_BLEND_ENABLE) ||
  384. (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST);
  385. #ifdef _MCD_
  386. GLboolean bMcdZ = ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) &&
  387. (gengc->pMcdState != NULL) &&
  388. (gengc->pMcdState->pDepthSpan != NULL) &&
  389. (gengc->pMcdState->pMcdSurf != NULL) &&
  390. !(gengc->pMcdState->McdBuffers.mcdDepthBuf.bufFlags & MCDBUF_ENABLED));
  391. #endif
  392. scansize = gc->polygon.shader.cfb->buf.outerWidth;
  393. bSurfaceDIB = (gc->polygon.shader.cfb->buf.flags & DIB_FORMAT) != 0;
  394. bClipped = (!(gc->drawBuffer->buf.flags & NO_CLIP)) &&
  395. bSurfaceDIB;
  396. if (bSurfaceDIB)
  397. GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
  398. else
  399. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  400. ixLeft = gc->polygon.shader.ixLeft;
  401. ixLeftFrac = gc->polygon.shader.ixLeftFrac;
  402. ixRight = gc->polygon.shader.ixRight;
  403. ixRightFrac = gc->polygon.shader.ixRightFrac;
  404. clipY0 = gc->transform.clipY0;
  405. clipY1 = gc->transform.clipY1;
  406. r = GENACCEL(gc).spanValue.r;
  407. g = GENACCEL(gc).spanValue.g;
  408. b = GENACCEL(gc).spanValue.b;
  409. a = GENACCEL(gc).spanValue.a;
  410. s = GENACCEL(gc).spanValue.s;
  411. t = GENACCEL(gc).spanValue.t;
  412. qw = gc->polygon.shader.frag.qw;
  413. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  414. z = gc->polygon.shader.frag.z;
  415. #ifdef _MCD_
  416. if (bMcdZ)
  417. {
  418. zbuf = (__GLzValue *)gengc->pMcdState->pMcdSurf->McdDepthBuf.pv;
  419. }
  420. else
  421. #endif
  422. {
  423. if( gc->modes.depthBits == 32 )
  424. zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  425. ixLeft, iyBottom);
  426. else
  427. zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer,
  428. (__GLz16Value*),
  429. ixLeft, iyBottom);
  430. }
  431. }
  432. //
  433. // render the spans
  434. //
  435. while (iyBottom < iyTop) {
  436. spanWidth = ixRight - ixLeft;
  437. /*
  438. ** Only render spans that have non-zero width and which are
  439. ** not scissored out vertically.
  440. */
  441. if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) {
  442. gc->polygon.shader.frag.x = ixLeft;
  443. gc->polygon.shader.frag.y = iyBottom;
  444. gc->polygon.shader.zbuf = zbuf;
  445. gc->polygon.shader.frag.z = z;
  446. GENACCEL(gc).spanValue.r = r;
  447. GENACCEL(gc).spanValue.g = g;
  448. GENACCEL(gc).spanValue.b = b;
  449. GENACCEL(gc).spanValue.a = a;
  450. GENACCEL(gc).spanValue.s = s;
  451. GENACCEL(gc).spanValue.t = t;
  452. gc->polygon.shader.frag.qw = qw;
  453. // take care of horizontal scissoring
  454. if (!gc->transform.reasonableViewport) {
  455. GLint clipX0 = gc->transform.clipX0;
  456. GLint clipX1 = gc->transform.clipX1;
  457. // see if we skip entire span
  458. if ((ixRight <= clipX0) || (ixLeft >= clipX1))
  459. goto advance;
  460. // now clip right and left
  461. if (ixRight > clipX1)
  462. spanWidth = (clipX1 - ixLeft);
  463. if (ixLeft < clipX0) {
  464. GLuint delta;
  465. delta = clipX0 - ixLeft;
  466. spanWidth -= delta;
  467. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  468. GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r;
  469. GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g;
  470. GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b;
  471. GENACCEL(gc).spanValue.a += delta * GENACCEL(gc).spanDelta.a;
  472. }
  473. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  474. GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s;
  475. GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t;
  476. gc->polygon.shader.frag.qw += delta * gc->polygon.shader.dqwdx;
  477. }
  478. gc->polygon.shader.frag.x = clipX0;
  479. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
  480. if( gc->modes.depthBits == 32 )
  481. gc->polygon.shader.zbuf += delta;
  482. else
  483. (__GLz16Value *)gc->polygon.shader.zbuf += delta;
  484. gc->polygon.shader.frag.z +=
  485. (gc->polygon.shader.dzdx * delta);
  486. }
  487. }
  488. }
  489. // now have span length
  490. gc->polygon.shader.length = spanWidth;
  491. #ifdef _MCD_
  492. // read from driver z buffer into z span buffer
  493. if (bMcdZ) {
  494. GenMcdReadZRawSpan(&gc->depthBuffer, gc->polygon.shader.frag.x,
  495. iyBottom, spanWidth);
  496. }
  497. #endif
  498. if (bClipped) {
  499. xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
  500. gc->drawBuffer->buf.xOrigin;
  501. yScr = __GL_UNBIAS_Y(gc, iyBottom) +
  502. gc->drawBuffer->buf.yOrigin;
  503. ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth,
  504. &cWalls, &Walls);
  505. if (ulSpanVisibility == WGL_SPAN_ALL) {
  506. GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
  507. (*cSpanFunc)(gengc);
  508. } else if (ulSpanVisibility == WGL_SPAN_PARTIAL) {
  509. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  510. if (bReadPixels)
  511. (*gengc->pfnCopyPixels)(gengc,
  512. gc->polygon.shader.cfb,
  513. xScr, yScr, spanWidth,
  514. FALSE);
  515. (*cSpanFunc)(gengc);
  516. (*gengc->pfnCopyPixels)(gengc,
  517. gc->polygon.shader.cfb,
  518. xScr, yScr, spanWidth,
  519. TRUE);
  520. }
  521. } else if (bSurfaceDIB) {
  522. (*cSpanFunc)(gengc);
  523. } else {
  524. xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
  525. gc->drawBuffer->buf.xOrigin;
  526. yScr = __GL_UNBIAS_Y(gc, iyBottom) +
  527. gc->drawBuffer->buf.yOrigin;
  528. GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
  529. if (bReadPixels)
  530. (*gengc->pfnCopyPixels)(gengc,
  531. gc->polygon.shader.cfb,
  532. xScr, yScr, spanWidth,
  533. FALSE);
  534. (*cSpanFunc)(gengc);
  535. if (!bSurfaceDIB)
  536. (*gengc->pfnCopyPixels)(gengc,
  537. gc->polygon.shader.cfb,
  538. xScr, yScr, spanWidth,
  539. TRUE);
  540. }
  541. #ifdef _MCD_
  542. // write z span buffer back to driver z buffer
  543. if (bMcdZ) {
  544. GenMcdWriteZRawSpan(&gc->depthBuffer,
  545. gc->polygon.shader.frag.x,
  546. iyBottom, spanWidth);
  547. }
  548. #endif
  549. }
  550. advance:
  551. GENACCEL(gc).pPix += scansize;
  552. /* Advance right edge fixed point, adjusting for carry */
  553. ixRightFrac += gc->polygon.shader.dxRightFrac;
  554. if (ixRightFrac < 0) {
  555. /* Carry/Borrow'd. Use large step */
  556. ixRight += gc->polygon.shader.dxRightBig;
  557. ixRightFrac &= ~0x80000000;
  558. } else {
  559. ixRight += gc->polygon.shader.dxRightLittle;
  560. }
  561. iyBottom++;
  562. ixLeftFrac += gc->polygon.shader.dxLeftFrac;
  563. if (ixLeftFrac < 0) {
  564. /* Carry/Borrow'd. Use large step */
  565. ixLeft += gc->polygon.shader.dxLeftBig;
  566. ixLeftFrac &= ~0x80000000;
  567. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  568. r += *((GLint *)&gc->polygon.shader.rBig);
  569. g += *((GLint *)&gc->polygon.shader.gBig);
  570. b += *((GLint *)&gc->polygon.shader.bBig);
  571. a += *((GLint *)&gc->polygon.shader.aBig);
  572. }
  573. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  574. s += *((GLint *)&gc->polygon.shader.sBig);
  575. t += *((GLint *)&gc->polygon.shader.tBig);
  576. qw += gc->polygon.shader.qwBig;
  577. }
  578. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  579. z += gc->polygon.shader.zBig;
  580. /* The implicit multiply is taken out of the loop */
  581. #ifdef _MCD_
  582. if (!bMcdZ)
  583. #endif
  584. {
  585. zbuf = (__GLzValue*)((GLubyte*)zbuf +
  586. gc->polygon.shader.zbufBig);
  587. }
  588. }
  589. } else {
  590. /* Use small step */
  591. ixLeft += gc->polygon.shader.dxLeftLittle;
  592. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  593. r += *((GLint *)&gc->polygon.shader.rLittle);
  594. g += *((GLint *)&gc->polygon.shader.gLittle);
  595. b += *((GLint *)&gc->polygon.shader.bLittle);
  596. a += *((GLint *)&gc->polygon.shader.aLittle);
  597. }
  598. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  599. s += *((GLint *)&gc->polygon.shader.sLittle);
  600. t += *((GLint *)&gc->polygon.shader.tLittle);
  601. qw += gc->polygon.shader.qwLittle;
  602. }
  603. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  604. z += gc->polygon.shader.zLittle;
  605. /* The implicit multiply is taken out of the loop */
  606. #ifdef _MCD_
  607. if (!bMcdZ)
  608. #endif
  609. {
  610. zbuf = (__GLzValue*)((GLubyte*)zbuf +
  611. gc->polygon.shader.zbufLittle);
  612. }
  613. }
  614. }
  615. }
  616. gc->polygon.shader.ixLeft = ixLeft;
  617. gc->polygon.shader.ixLeftFrac = ixLeftFrac;
  618. gc->polygon.shader.ixRight = ixRight;
  619. gc->polygon.shader.ixRightFrac = ixRightFrac;
  620. gc->polygon.shader.frag.z = z;
  621. gc->polygon.shader.zbuf = zbuf;
  622. GENACCEL(gc).spanValue.r = r;
  623. GENACCEL(gc).spanValue.g = g;
  624. GENACCEL(gc).spanValue.b = b;
  625. GENACCEL(gc).spanValue.a = a;
  626. GENACCEL(gc).spanValue.s = s;
  627. GENACCEL(gc).spanValue.t = t;
  628. gc->polygon.shader.frag.qw = qw;
  629. }
  630. /**************************************************************************\
  631. \**************************************************************************/
  632. void FASTCALL GenDrvFillSubTriangle(__GLcontext *gc, GLint iyBottom, GLint iyTop)
  633. {
  634. GLint ixLeft, ixRight;
  635. GLint ixLeftFrac, ixRightFrac;
  636. GLint spanWidth, clipY0, clipY1;
  637. #ifdef NT
  638. __GLstippleWord stackWords[__GL_MAX_STACK_STIPPLE_WORDS];
  639. __GLstippleWord *words;
  640. GLuint maxWidth;
  641. #else
  642. __GLstippleWord words[__GL_MAX_STIPPLE_WORDS];
  643. #endif
  644. GLint zFails;
  645. __GLzValue *zbuf = NULL, z;
  646. GLint r, g, b, a, s, t;
  647. __GLGENcontext *gengc = (__GLGENcontext *)gc;
  648. __genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr;
  649. __GLspanFunc zSpanFunc = GENACCEL(gc).__fastZSpanFuncPtr;
  650. #ifdef NT
  651. maxWidth = (gc->transform.clipX1 - gc->transform.clipX0) + 31;
  652. if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
  653. {
  654. words = gcTempAlloc(gc, (maxWidth+__GL_STIPPLE_BITS-1)/8);
  655. if (words == NULL)
  656. {
  657. return;
  658. }
  659. }
  660. else
  661. {
  662. words = stackWords;
  663. }
  664. #endif
  665. gc->polygon.shader.stipplePat = words;
  666. gc->polygon.shader.cfb = gc->drawBuffer;
  667. ixLeft = gc->polygon.shader.ixLeft;
  668. ixLeftFrac = gc->polygon.shader.ixLeftFrac;
  669. ixRight = gc->polygon.shader.ixRight;
  670. ixRightFrac = gc->polygon.shader.ixRightFrac;
  671. clipY0 = gc->transform.clipY0;
  672. clipY1 = gc->transform.clipY1;
  673. r = GENACCEL(gc).spanValue.r;
  674. g = GENACCEL(gc).spanValue.g;
  675. b = GENACCEL(gc).spanValue.b;
  676. a = GENACCEL(gc).spanValue.a;
  677. s = GENACCEL(gc).spanValue.s;
  678. t = GENACCEL(gc).spanValue.t;
  679. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  680. z = gc->polygon.shader.frag.z;
  681. if( gc->modes.depthBits == 32 )
  682. zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  683. ixLeft, iyBottom);
  684. else
  685. zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer,
  686. (__GLz16Value*),
  687. ixLeft, iyBottom);
  688. } else {
  689. GLuint w;
  690. if (w = ((gc->transform.clipX1 - gc->transform.clipX0) + 31) >> 3)
  691. RtlFillMemoryUlong(words, w, ~((ULONG)0));
  692. GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
  693. }
  694. while (iyBottom < iyTop) {
  695. spanWidth = ixRight - ixLeft;
  696. /*
  697. ** Only render spans that have non-zero width and which are
  698. ** not scissored out vertically.
  699. */
  700. if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) {
  701. gc->polygon.shader.frag.x = ixLeft;
  702. gc->polygon.shader.frag.y = iyBottom;
  703. gc->polygon.shader.zbuf = zbuf;
  704. gc->polygon.shader.frag.z = z;
  705. GENACCEL(gc).spanValue.r = r;
  706. GENACCEL(gc).spanValue.g = g;
  707. GENACCEL(gc).spanValue.b = b;
  708. GENACCEL(gc).spanValue.a = a;
  709. GENACCEL(gc).spanValue.s = s;
  710. GENACCEL(gc).spanValue.t = t;
  711. // take care of horizontal scissoring
  712. if (!gc->transform.reasonableViewport) {
  713. GLint clipX0 = gc->transform.clipX0;
  714. GLint clipX1 = gc->transform.clipX1;
  715. // see if we skip entire span
  716. if ((ixRight <= clipX0) || (ixLeft >= clipX1))
  717. goto advance;
  718. // now clip right and left
  719. if (ixRight > clipX1)
  720. spanWidth = (clipX1 - ixLeft);
  721. if (ixLeft < clipX0) {
  722. GLuint delta;
  723. delta = clipX0 - ixLeft;
  724. spanWidth -= delta;
  725. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  726. GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r;
  727. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
  728. GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g;
  729. GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b;
  730. }
  731. }
  732. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  733. GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s;
  734. GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t;
  735. }
  736. gc->polygon.shader.frag.x = clipX0;
  737. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
  738. if( gc->modes.depthBits == 32 )
  739. gc->polygon.shader.zbuf += delta;
  740. else
  741. (__GLz16Value *)gc->polygon.shader.zbuf += delta;
  742. gc->polygon.shader.frag.z +=
  743. (gc->polygon.shader.dzdx * delta);
  744. }
  745. }
  746. }
  747. // now have span length
  748. gc->polygon.shader.length = spanWidth;
  749. // Do z-buffering if needed, and short-circuit rest of span
  750. // operations if nothing will be drawn.
  751. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
  752. // initially assume no stippling
  753. GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
  754. if ((zFails = (*zSpanFunc)(gc)) == 1)
  755. goto advance;
  756. else if (zFails)
  757. GENACCEL(gc).flags |= HAVE_STIPPLE;
  758. }
  759. (*cSpanFunc)(gengc);
  760. }
  761. advance:
  762. /* Advance right edge fixed point, adjusting for carry */
  763. ixRightFrac += gc->polygon.shader.dxRightFrac;
  764. if (ixRightFrac < 0) {
  765. /* Carry/Borrow'd. Use large step */
  766. ixRight += gc->polygon.shader.dxRightBig;
  767. ixRightFrac &= ~0x80000000;
  768. } else {
  769. ixRight += gc->polygon.shader.dxRightLittle;
  770. }
  771. iyBottom++;
  772. ixLeftFrac += gc->polygon.shader.dxLeftFrac;
  773. if (ixLeftFrac < 0) {
  774. /* Carry/Borrow'd. Use large step */
  775. ixLeft += gc->polygon.shader.dxLeftBig;
  776. ixLeftFrac &= ~0x80000000;
  777. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
  778. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  779. r += *((GLint *)&gc->polygon.shader.rBig);
  780. g += *((GLint *)&gc->polygon.shader.gBig);
  781. b += *((GLint *)&gc->polygon.shader.bBig);
  782. a += *((GLint *)&gc->polygon.shader.aBig);
  783. }
  784. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  785. s += *((GLint *)&gc->polygon.shader.sBig);
  786. t += *((GLint *)&gc->polygon.shader.tBig);
  787. }
  788. } else {
  789. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  790. r += *((GLint *)&gc->polygon.shader.rBig);
  791. }
  792. }
  793. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  794. z += gc->polygon.shader.zBig;
  795. /* The implicit multiply is taken out of the loop */
  796. zbuf = (__GLzValue*)((GLubyte*)zbuf +
  797. gc->polygon.shader.zbufBig);
  798. }
  799. } else {
  800. /* Use small step */
  801. ixLeft += gc->polygon.shader.dxLeftLittle;
  802. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
  803. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  804. r += *((GLint *)&gc->polygon.shader.rLittle);
  805. g += *((GLint *)&gc->polygon.shader.gLittle);
  806. b += *((GLint *)&gc->polygon.shader.bLittle);
  807. a += *((GLint *)&gc->polygon.shader.aLittle);
  808. }
  809. if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
  810. s += *((GLint *)&gc->polygon.shader.sLittle);
  811. t += *((GLint *)&gc->polygon.shader.tLittle);
  812. }
  813. } else {
  814. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  815. r += *((GLint *)&gc->polygon.shader.rLittle);
  816. }
  817. }
  818. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  819. z += gc->polygon.shader.zLittle;
  820. /* The implicit multiply is taken out of the loop */
  821. zbuf = (__GLzValue*)((GLubyte*)zbuf +
  822. gc->polygon.shader.zbufLittle);
  823. }
  824. }
  825. }
  826. gc->polygon.shader.ixLeft = ixLeft;
  827. gc->polygon.shader.ixLeftFrac = ixLeftFrac;
  828. gc->polygon.shader.ixRight = ixRight;
  829. gc->polygon.shader.ixRightFrac = ixRightFrac;
  830. gc->polygon.shader.frag.z = z;
  831. GENACCEL(gc).spanValue.r = r;
  832. GENACCEL(gc).spanValue.g = g;
  833. GENACCEL(gc).spanValue.b = b;
  834. GENACCEL(gc).spanValue.a = a;
  835. GENACCEL(gc).spanValue.s = s;
  836. GENACCEL(gc).spanValue.t = t;
  837. #ifdef NT
  838. if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
  839. {
  840. gcTempFree(gc, words);
  841. }
  842. #endif
  843. }
  844. /**************************************************************************\
  845. \**************************************************************************/
  846. void GenSnapXLeft(__GLcontext *gc, __GLfloat xLeft, __GLfloat dxdyLeft)
  847. {
  848. GLint ixLeft, ixLeftFrac;
  849. ixLeft = __GL_VERTEX_FLOAT_TO_INT(xLeft);
  850. ixLeftFrac = __GL_VERTEX_PROMOTED_FRACTION(xLeft) + 0x40000000;
  851. gc->polygon.shader.ixLeftFrac = ixLeftFrac & ~0x80000000;
  852. gc->polygon.shader.ixLeft = ixLeft + (((GLuint) ixLeftFrac) >> 31);
  853. /* Compute big and little steps */
  854. gc->polygon.shader.dxLeftLittle = FTOL(dxdyLeft);
  855. gc->polygon.shader.dxLeftFrac =
  856. FLT_FRACTION(dxdyLeft - gc->polygon.shader.dxLeftLittle);
  857. if (gc->polygon.shader.dxLeftFrac < 0) {
  858. gc->polygon.shader.dxLeftBig = gc->polygon.shader.dxLeftLittle - 1;
  859. } else {
  860. gc->polygon.shader.dxLeftBig = gc->polygon.shader.dxLeftLittle + 1;
  861. }
  862. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
  863. /*
  864. ** Compute the big and little depth buffer steps. We walk the
  865. ** memory pointers for the depth buffer along the edge of the
  866. ** triangle as we walk the edge. This way we don't have to
  867. ** recompute the buffer address as we go.
  868. */
  869. if (gc->depthBuffer.buf.elementSize == 2) {
  870. gc->polygon.shader.zbufLittle =
  871. (gc->depthBuffer.buf.outerWidth +
  872. gc->polygon.shader.dxLeftLittle) << 1;
  873. gc->polygon.shader.zbufBig =
  874. (gc->depthBuffer.buf.outerWidth +
  875. gc->polygon.shader.dxLeftBig) << 1;
  876. } else {
  877. gc->polygon.shader.zbufLittle =
  878. (gc->depthBuffer.buf.outerWidth +
  879. gc->polygon.shader.dxLeftLittle) << 2;
  880. gc->polygon.shader.zbufBig =
  881. (gc->depthBuffer.buf.outerWidth +
  882. gc->polygon.shader.dxLeftBig) << 2;
  883. }
  884. }
  885. }
  886. /**************************************************************************\
  887. \**************************************************************************/
  888. void GenSnapXRight(__GLcontext *gc, __GLfloat xRight, __GLfloat dxdyRight)
  889. {
  890. GLint ixRight, ixRightFrac;
  891. ixRight = __GL_VERTEX_FLOAT_TO_INT(xRight);
  892. ixRightFrac = __GL_VERTEX_PROMOTED_FRACTION(xRight) + 0x40000000;
  893. gc->polygon.shader.ixRightFrac = ixRightFrac & ~0x80000000;
  894. gc->polygon.shader.ixRight = ixRight + (((GLuint) ixRightFrac) >> 31);
  895. /* Compute big and little steps */
  896. gc->polygon.shader.dxRightLittle = FTOL(dxdyRight);
  897. gc->polygon.shader.dxRightFrac =
  898. FLT_FRACTION(dxdyRight - gc->polygon.shader.dxRightLittle);
  899. if (gc->polygon.shader.dxRightFrac < 0) {
  900. gc->polygon.shader.dxRightBig = gc->polygon.shader.dxRightLittle - 1;
  901. } else {
  902. gc->polygon.shader.dxRightBig = gc->polygon.shader.dxRightLittle + 1;
  903. }
  904. }
  905. /**************************************************************************\
  906. \**************************************************************************/
  907. void __fastGenSetInitialParameters(
  908. __GLcontext *gc,
  909. const __GLvertex *a,
  910. __GLfloat fdx,
  911. __GLfloat fdy)
  912. {
  913. #define sh gc->polygon.shader
  914. #define bPolygonOffset \
  915. (gc->state.enables.general & __GL_POLYGON_OFFSET_FILL_ENABLE)
  916. __GLfloat zOffset;
  917. __GLfloat dxLeftLittle;
  918. #if _X86_ && ENABLE_ASM
  919. LARGE_INTEGER temp;
  920. _asm{
  921. mov edx, gc
  922. fild DWORD PTR [OFFSET(SHADER.dxLeftLittle)][edx]
  923. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  924. test edi, __GL_SHADE_DEPTH_ITER
  925. fstp dxLeftLittle
  926. je noZ
  927. }
  928. _asm{
  929. mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
  930. mov ecx, __glZero
  931. test ebx, __GL_POLYGON_OFFSET_FILL_ENABLE
  932. mov zOffset, ecx
  933. je noPolyOffset
  934. }
  935. zOffset = __glPolygonOffsetZ(gc);
  936. _asm{
  937. mov edx, gc
  938. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  939. }
  940. noPolyOffset:
  941. _asm{
  942. mov eax, a
  943. fld fdx
  944. fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
  945. fld fdy
  946. fmul DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
  947. // zy zx
  948. fxch ST(1)
  949. // zx zy
  950. fadd DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
  951. fld dxLeftLittle
  952. fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
  953. // zl zy zx
  954. fxch ST(1) // zy zl zx
  955. fadd zOffset
  956. fxch ST(1) // zl zy zx
  957. fadd DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
  958. // zl zy zx (+1)
  959. fxch ST(1) // zy zl zx
  960. faddp ST(2), ST // zl z
  961. fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
  962. // ZL z (+1)
  963. fxch ST(1) // z ZL
  964. fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
  965. // Z ZL
  966. fxch ST(1) // ZL Z
  967. fistp temp
  968. mov eax, DWORD PTR temp
  969. mov DWORD PTR [OFFSET(SHADER.zLittle)][edx], eax
  970. fistp temp
  971. mov eax, DWORD PTR temp
  972. mov DWORD PTR [OFFSET(SHADER.frag.z)][edx], eax
  973. }
  974. #if FORCE_NPX_DEBUG
  975. {
  976. ULONG fragZ = FTOL((a->window.z + fdx*sh.dzdxf +
  977. (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
  978. __GLfloat zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
  979. LONG shZLittle = FTOL(zLittle);
  980. if (sh.frag.z != fragZ)
  981. DbgPrint("fragZ %x %x\n", fragZ, sh.frag.z);
  982. if (sh.zLittle != shZLittle)
  983. DbgPrint("sh.zLittle %x %x\n", shZLittle, sh.zLittle);
  984. }
  985. _asm {
  986. mov edx, gc
  987. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  988. }
  989. #endif // FORCE_NPX_DEBUG
  990. noZ:
  991. _asm{
  992. test edi, __GL_SHADE_SMOOTH
  993. je done
  994. test edi, __GL_SHADE_RGB
  995. jne rgbShade
  996. }
  997. // ciShade:
  998. {
  999. CASTFIX(sh.rLittle) =
  1000. FLT_TO_FIX(gc->polygon.shader.drdy +
  1001. dxLeftLittle * gc->polygon.shader.drdx);
  1002. GENACCEL(gc).spanValue.r =
  1003. FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy);
  1004. }
  1005. _asm{
  1006. mov edx, gc
  1007. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1008. jmp done
  1009. }
  1010. rgbShade:
  1011. _asm
  1012. {
  1013. mov eax, a
  1014. fld dxLeftLittle
  1015. fmul DWORD PTR [OFFSET(SHADER.drdx)][edx]
  1016. fld dxLeftLittle
  1017. fmul DWORD PTR [OFFSET(SHADER.dgdx)][edx] // g r
  1018. fxch ST(1) // r g
  1019. fadd DWORD PTR [OFFSET(SHADER.drdy)][edx] // R g
  1020. fld dxLeftLittle
  1021. fmul DWORD PTR [OFFSET(SHADER.dbdx)][edx] // b R g
  1022. fxch ST(2) // g R b
  1023. fadd DWORD PTR [OFFSET(SHADER.dgdy)][edx] // G R b
  1024. fxch ST(2) // b R G
  1025. fadd DWORD PTR [OFFSET(SHADER.dbdy)][edx] // B R G
  1026. fxch ST(1) // R B G
  1027. fmul __glVal65536 // sR B G
  1028. fxch ST(2) // G B sR
  1029. fmul __glVal65536 // sG B sR
  1030. fxch ST(1) // B sG sR
  1031. fmul __glVal65536 // sB sG sR
  1032. fxch ST(2) // sR sG sB
  1033. fistp DWORD PTR [OFFSET(SHADER.rLittle)][edx]
  1034. fistp DWORD PTR [OFFSET(SHADER.gLittle)][edx]
  1035. fistp DWORD PTR [OFFSET(SHADER.bLittle)][edx]
  1036. fld DWORD PTR [OFFSET(SHADER.drdx)][edx]
  1037. mov eax, [OFFSET(__GLvertex.color)][eax]
  1038. fmul fdx
  1039. fld DWORD PTR [OFFSET(SHADER.drdy)][edx]
  1040. fmul fdy // r r
  1041. fxch ST(1) // r r
  1042. fadd DWORD PTR [OFFSET(__GLcolor.r)][eax]
  1043. fld DWORD PTR [OFFSET(SHADER.dgdx)][edx]
  1044. fmul fdx
  1045. fld DWORD PTR [OFFSET(SHADER.dgdy)][edx]
  1046. fmul fdy // g g r r
  1047. fxch ST(1) // g g r r
  1048. fadd DWORD PTR [OFFSET(__GLcolor.g)][eax]
  1049. fld DWORD PTR [OFFSET(SHADER.dbdx)][edx]
  1050. fmul fdx
  1051. fld DWORD PTR [OFFSET(SHADER.dbdy)][edx]
  1052. fmul fdy // b b g g r r
  1053. fxch ST(1) // b b g g r r
  1054. fadd DWORD PTR [OFFSET(__GLcolor.b)][eax]
  1055. fxch ST(4) // r b g g b r
  1056. faddp ST(5), ST // b g g b r
  1057. fxch ST(2) // g g b b r
  1058. faddp ST(1), ST // g b b r
  1059. fxch ST(2) // b b g r
  1060. faddp ST(1), ST // b g r
  1061. fxch ST(2) // r g b
  1062. fmul __glVal65536 // R g b
  1063. fxch ST(1) // g R b
  1064. fmul __glVal65536 // G R b
  1065. fxch ST(2) // b R G
  1066. fmul __glVal65536 // B R G
  1067. fxch ST(1) // R B G
  1068. fadd __glVal128 // R B G
  1069. fxch ST(2) // G B R
  1070. fadd __glVal128 // G B R
  1071. fxch ST(1) // B G R
  1072. fadd __glVal128 // B G R
  1073. fxch ST(2) // R G B
  1074. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.r)][edx]
  1075. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.g)][edx]
  1076. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.b)][edx]
  1077. }
  1078. #if FORCE_NPX_DEBUG
  1079. {
  1080. LONG rLittle = FLT_TO_FIX(gc->polygon.shader.drdy +
  1081. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.drdx);
  1082. LONG gLittle = FLT_TO_FIX(gc->polygon.shader.dgdy +
  1083. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dgdx);
  1084. LONG bLittle = FLT_TO_FIX(gc->polygon.shader.dbdy +
  1085. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dbdx);
  1086. LONG spanR = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080;
  1087. LONG spanG = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080;
  1088. LONG spanB = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080;
  1089. if (CASTFIX(sh.rLittle) != rLittle)
  1090. DbgPrint("rLittle: %x %x\n", rLittle, sh.rLittle);
  1091. if (CASTFIX(sh.gLittle) != gLittle)
  1092. DbgPrint("gLittle: %x %x\n", gLittle, sh.gLittle);
  1093. if (CASTFIX(sh.bLittle) != bLittle)
  1094. DbgPrint("bLittle: %x %x\n", bLittle, sh.bLittle);
  1095. if (spanR != GENACCEL(gc).spanValue.r)
  1096. DbgPrint("spanR: %x %x\n", spanR, GENACCEL(gc).spanValue.r);
  1097. if (spanG != GENACCEL(gc).spanValue.g)
  1098. DbgPrint("spanG: %x %x\n", spanG, GENACCEL(gc).spanValue.g);
  1099. if (spanB != GENACCEL(gc).spanValue.b)
  1100. DbgPrint("spanB: %x %x\n", spanB, GENACCEL(gc).spanValue.b);
  1101. }
  1102. _asm {
  1103. mov edx, gc
  1104. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1105. }
  1106. #endif // FORCE_NPX_DEBUG
  1107. done:
  1108. _asm {
  1109. mov eax, [OFFSET(SHADER.dxLeftBig)][edx]
  1110. mov ecx, [OFFSET(SHADER.dxLeftLittle)][edx]
  1111. cmp eax, ecx
  1112. jle littleGreater
  1113. test edi, __GL_SHADE_SMOOTH
  1114. je bigNoSmooth
  1115. mov eax, [OFFSET(SHADER.rLittle)][edx]
  1116. mov ecx, [OFFSET(SPANDELTA.r)][edx]
  1117. mov esi, [OFFSET(SHADER.gLittle)][edx]
  1118. mov ebx, [OFFSET(SPANDELTA.g)][edx]
  1119. add eax, ecx
  1120. add esi, ebx
  1121. mov [OFFSET(SHADER.rBig)][edx], eax
  1122. mov [OFFSET(SHADER.gBig)][edx], esi
  1123. mov eax, [OFFSET(SHADER.bLittle)][edx]
  1124. mov ecx, [OFFSET(SPANDELTA.b)][edx]
  1125. mov esi, [OFFSET(SHADER.zLittle)][edx]
  1126. mov ebx, [OFFSET(SHADER.dzdx)][edx]
  1127. add eax, ecx
  1128. add esi, ebx
  1129. mov [OFFSET(SHADER.bBig)][edx], eax
  1130. mov [OFFSET(SHADER.zBig)][edx], esi
  1131. bigNoSmooth:
  1132. test edi, __GL_SHADE_DEPTH_ITER
  1133. je done2
  1134. mov eax, [OFFSET(SHADER.zLittle)][edx]
  1135. mov ecx, [OFFSET(SHADER.dzdx)][edx]
  1136. add eax, ecx
  1137. mov [OFFSET(SHADER.zBig)][edx], eax
  1138. jmp done2
  1139. littleGreater:
  1140. test edi, __GL_SHADE_SMOOTH
  1141. je smallNoSmooth
  1142. mov eax, [OFFSET(SHADER.rLittle)][edx]
  1143. mov ecx, [OFFSET(SPANDELTA.r)][edx]
  1144. mov esi, [OFFSET(SHADER.gLittle)][edx]
  1145. mov ebx, [OFFSET(SPANDELTA.g)][edx]
  1146. sub eax, ecx
  1147. sub esi, ebx
  1148. mov [OFFSET(SHADER.rBig)][edx], eax
  1149. mov [OFFSET(SHADER.gBig)][edx], esi
  1150. mov eax, [OFFSET(SHADER.bLittle)][edx]
  1151. mov ecx, [OFFSET(SPANDELTA.b)][edx]
  1152. mov esi, [OFFSET(SHADER.zLittle)][edx]
  1153. mov ebx, [OFFSET(SHADER.dzdx)][edx]
  1154. sub eax, ecx
  1155. sub esi, ebx
  1156. mov [OFFSET(SHADER.bBig)][edx], eax
  1157. mov [OFFSET(SHADER.zBig)][edx], esi
  1158. smallNoSmooth:
  1159. test edi, __GL_SHADE_DEPTH_ITER
  1160. je done2
  1161. mov eax, [OFFSET(SHADER.zLittle)][edx]
  1162. mov ecx, [OFFSET(SHADER.dzdx)][edx]
  1163. sub eax, ecx
  1164. mov [OFFSET(SHADER.zBig)][edx], eax
  1165. done2:
  1166. }
  1167. #else _X86_
  1168. __GLfloat zLittle;
  1169. dxLeftLittle = (__GLfloat)sh.dxLeftLittle;
  1170. if (sh.modeFlags & __GL_SHADE_SMOOTH) {
  1171. if (sh.modeFlags & __GL_SHADE_RGB) {
  1172. CASTFIX(sh.rLittle) =
  1173. FLT_TO_FIX(gc->polygon.shader.drdy +
  1174. dxLeftLittle * gc->polygon.shader.drdx);
  1175. CASTFIX(sh.gLittle) =
  1176. FLT_TO_FIX(gc->polygon.shader.dgdy +
  1177. dxLeftLittle * gc->polygon.shader.dgdx);
  1178. CASTFIX(sh.bLittle) =
  1179. FLT_TO_FIX(gc->polygon.shader.dbdy +
  1180. dxLeftLittle * gc->polygon.shader.dbdx);
  1181. GENACCEL(gc).spanValue.r =
  1182. FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy);
  1183. GENACCEL(gc).spanValue.g =
  1184. FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy);
  1185. GENACCEL(gc).spanValue.b =
  1186. FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy);
  1187. } else {
  1188. CASTFIX(sh.rLittle) =
  1189. FLT_TO_FIX(gc->polygon.shader.drdy +
  1190. dxLeftLittle * gc->polygon.shader.drdx);
  1191. GENACCEL(gc).spanValue.r =
  1192. FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy);
  1193. }
  1194. }
  1195. if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
  1196. zOffset = bPolygonOffset ? __glPolygonOffsetZ(gc) : 0.0f;
  1197. sh.frag.z = FTOL((a->window.z + fdx*sh.dzdxf +
  1198. (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
  1199. zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
  1200. sh.zLittle = FTOL(zLittle);
  1201. }
  1202. if (sh.dxLeftBig > sh.dxLeftLittle) {
  1203. if (sh.modeFlags & __GL_SHADE_SMOOTH) {
  1204. CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) + GENACCEL(gc).spanDelta.r;
  1205. CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) + GENACCEL(gc).spanDelta.g;
  1206. CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) + GENACCEL(gc).spanDelta.b;
  1207. }
  1208. if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
  1209. sh.zBig = sh.zLittle + sh.dzdx;
  1210. }
  1211. } else {
  1212. if (sh.modeFlags & __GL_SHADE_SMOOTH) {
  1213. CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) - GENACCEL(gc).spanDelta.r;
  1214. CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) - GENACCEL(gc).spanDelta.g;
  1215. CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) - GENACCEL(gc).spanDelta.b;
  1216. }
  1217. if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
  1218. sh.zBig = sh.zLittle - sh.dzdx;
  1219. }
  1220. }
  1221. #endif
  1222. }
  1223. void __fastGenSetInitialParametersTexRGBA(
  1224. __GLcontext *gc,
  1225. const __GLvertex *a,
  1226. __GLfloat fdx,
  1227. __GLfloat fdy)
  1228. {
  1229. #define sh gc->polygon.shader
  1230. __GLfloat zOffset;
  1231. __GLfloat dxLeftLittle;
  1232. #if _X86_ && ENABLE_ASM
  1233. LARGE_INTEGER temp;
  1234. _asm{
  1235. mov edx, gc
  1236. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1237. fild DWORD PTR [OFFSET(SHADER.dxLeftLittle)][edx]
  1238. test edi, __GL_SHADE_TEXTURE
  1239. mov eax, [OFFSET(__GLcontext.state.texture.env)][edx]
  1240. je notTexture
  1241. mov ebx, [OFFSET(__GLtextureEnvState.mode)][eax]
  1242. cmp ebx, GL_REPLACE
  1243. je fastReplace
  1244. cmp ebx, GL_DECAL
  1245. jne notTexture
  1246. fastReplace:
  1247. fstp dxLeftLittle
  1248. jmp colorDone
  1249. notTexture:
  1250. test edi, __GL_SHADE_SMOOTH
  1251. fstp dxLeftLittle
  1252. je colorDone
  1253. }
  1254. _asm
  1255. {
  1256. mov eax, a
  1257. fld dxLeftLittle
  1258. fmul DWORD PTR [OFFSET(SHADER.drdx)][edx]
  1259. fld dxLeftLittle
  1260. fmul DWORD PTR [OFFSET(SHADER.dgdx)][edx] // g r
  1261. fxch ST(1) // r g
  1262. fadd DWORD PTR [OFFSET(SHADER.drdy)][edx] // R g
  1263. fld dxLeftLittle
  1264. fmul DWORD PTR [OFFSET(SHADER.dbdx)][edx] // b R g
  1265. fxch ST(2) // g R b
  1266. fadd DWORD PTR [OFFSET(SHADER.dgdy)][edx] // G R b
  1267. fxch ST(2) // b R G
  1268. fadd DWORD PTR [OFFSET(SHADER.dbdy)][edx] // B R G
  1269. fxch ST(1) // R B G
  1270. fmul __glVal65536 // sR B G
  1271. fxch ST(2) // G B sR
  1272. fmul __glVal65536 // sG B sR
  1273. fxch ST(1) // B sG sR
  1274. fmul __glVal65536 // sB sG sR
  1275. fxch ST(2) // sR sG sB
  1276. fistp DWORD PTR [OFFSET(SHADER.rLittle)][edx]
  1277. mov eax, [OFFSET(__GLvertex.color)][eax]
  1278. fistp DWORD PTR [OFFSET(SHADER.gLittle)][edx]
  1279. fistp DWORD PTR [OFFSET(SHADER.bLittle)][edx]
  1280. fld DWORD PTR [OFFSET(SHADER.drdx)][edx]
  1281. fmul fdx
  1282. fld DWORD PTR [OFFSET(SHADER.drdy)][edx]
  1283. fmul fdy // r r
  1284. fxch ST(1) // r r
  1285. fadd DWORD PTR [OFFSET(__GLcolor.r)][eax]
  1286. fld DWORD PTR [OFFSET(SHADER.dgdx)][edx]
  1287. fmul fdx
  1288. fld DWORD PTR [OFFSET(SHADER.dgdy)][edx]
  1289. fmul fdy // g g r r
  1290. fxch ST(1) // g g r r
  1291. fadd DWORD PTR [OFFSET(__GLcolor.g)][eax]
  1292. fld DWORD PTR [OFFSET(SHADER.dbdx)][edx]
  1293. fmul fdx
  1294. fld DWORD PTR [OFFSET(SHADER.dbdy)][edx]
  1295. fmul fdy // b b g g r r
  1296. fxch ST(1) // b b g g r r
  1297. fadd DWORD PTR [OFFSET(__GLcolor.b)][eax]
  1298. fxch ST(4) // r b g g b r
  1299. faddp ST(5), ST // b g g b r
  1300. fxch ST(2) // g g b b r
  1301. faddp ST(1), ST // g b b r
  1302. fxch ST(2) // b b g r
  1303. faddp ST(1), ST // b g r
  1304. fxch ST(2) // r g b
  1305. fmul __glVal65536 // R g b
  1306. fxch ST(1) // g R b
  1307. fmul __glVal65536 // G R b
  1308. fxch ST(2) // b R G
  1309. fmul __glVal65536 // B R G
  1310. fxch ST(1) // R B G
  1311. fadd __glVal128 // R B G
  1312. fxch ST(2) // G B R
  1313. fadd __glVal128 // G B R
  1314. fxch ST(1) // B G R
  1315. fadd __glVal128 // B G R
  1316. fxch ST(2) // R G B
  1317. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.r)][edx]
  1318. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.g)][edx]
  1319. mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
  1320. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.b)][edx]
  1321. }
  1322. _asm{
  1323. test ebx, __GL_BLEND_ENABLE
  1324. je noBlend
  1325. }
  1326. _asm{
  1327. mov eax, a
  1328. fld DWORD PTR [OFFSET(SHADER.dadx)][edx]
  1329. mov eax, DWORD PTR [OFFSET(__GLvertex.color)][eax]
  1330. fmul fdx
  1331. fld DWORD PTR [OFFSET(SHADER.dady)][edx]
  1332. fmul fdy // a a
  1333. fxch ST(1)
  1334. fadd DWORD PTR [OFFSET(__GLcolor.a)][eax] // a a
  1335. fld dxLeftLittle
  1336. fmul DWORD PTR [OFFSET(SHADER.dadx)][edx] // al a a
  1337. fxch ST(1) // a al a
  1338. faddp ST(2), ST // al a
  1339. fadd DWORD PTR [OFFSET(SHADER.dady)][edx] // al a (+1)
  1340. fxch ST(1) // a al
  1341. fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
  1342. // A al
  1343. fxch ST(1) // al A
  1344. fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
  1345. // AL A (+1)
  1346. fxch ST(1) // A AL
  1347. fadd __glVal128 // A AL (+1)
  1348. fxch ST(1) // AL A
  1349. fistp DWORD PTR [OFFSET(SHADER.aLittle)][edx]
  1350. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.a)][edx]
  1351. }
  1352. #if FORCE_NPX_DEBUG
  1353. {
  1354. LONG aLittle = FTOL((gc->polygon.shader.dady +
  1355. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dadx) *
  1356. GENACCEL(gc).aAccelScale);
  1357. LONG spanA = FTOL((a->color->a + fdx * sh.dadx + fdy * sh.dady) *
  1358. GENACCEL(gc).aAccelScale)+0x0080;
  1359. if (aLittle != CASTFIX(sh.aLittle))
  1360. DbgPrint("sh.aLittle %x %x\n", aLittle, CASTFIX(sh.aLittle));
  1361. if (spanA != GENACCEL(gc).spanValue.a)
  1362. DbgPrint("spanValue.a %x %x\n", spanA, GENACCEL(gc).spanValue.a);
  1363. }
  1364. _asm {
  1365. mov edx, gc
  1366. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1367. }
  1368. #endif // FORCE_NPX_DEBUG
  1369. noBlend:
  1370. #if FORCE_NPX_DEBUG
  1371. {
  1372. LONG rLittle = FLT_TO_FIX(gc->polygon.shader.drdy +
  1373. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.drdx);
  1374. LONG gLittle = FLT_TO_FIX(gc->polygon.shader.dgdy +
  1375. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dgdx);
  1376. LONG bLittle = FLT_TO_FIX(gc->polygon.shader.dbdy +
  1377. (__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dbdx);
  1378. LONG spanR = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080;
  1379. LONG spanG = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080;
  1380. LONG spanB = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080;
  1381. if (CASTFIX(sh.rLittle) != rLittle)
  1382. DbgPrint("rLittle: %x %x\n", rLittle, sh.rLittle);
  1383. if (CASTFIX(sh.gLittle) != gLittle)
  1384. DbgPrint("gLittle: %x %x\n", gLittle, sh.gLittle);
  1385. if (CASTFIX(sh.bLittle) != bLittle)
  1386. DbgPrint("bLittle: %x %x\n", bLittle, sh.bLittle);
  1387. if (spanR != GENACCEL(gc).spanValue.r)
  1388. DbgPrint("spanR: %x %x\n", spanR, GENACCEL(gc).spanValue.r);
  1389. if (spanG != GENACCEL(gc).spanValue.g)
  1390. DbgPrint("spanG: %x %x\n", spanG, GENACCEL(gc).spanValue.g);
  1391. if (spanB != GENACCEL(gc).spanValue.b)
  1392. DbgPrint("spanB: %x %x\n", spanB, GENACCEL(gc).spanValue.b);
  1393. }
  1394. _asm {
  1395. mov edx, gc
  1396. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1397. }
  1398. #endif // FORCE_NPX_DEBUG
  1399. colorDone:
  1400. _asm{
  1401. test edi, __GL_SHADE_TEXTURE
  1402. je doneTexture
  1403. mov ebx, [OFFSET(__GLcontext.state.hints.perspectiveCorrection)][edx]
  1404. cmp ebx, GL_NICEST
  1405. je nicestTex
  1406. }
  1407. _asm{
  1408. mov eax, a
  1409. fld dxLeftLittle
  1410. fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  1411. fld dxLeftLittle
  1412. fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  1413. // dt ds
  1414. fld fdx
  1415. fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  1416. fld fdy
  1417. fmul DWORD PTR [OFFSET(SHADER.dsdy)][edx]
  1418. fxch ST(1) // s s dt ds
  1419. fadd DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
  1420. fxch ST(3) // ds s dt s
  1421. fadd DWORD PTR [OFFSET(SHADER.dsdy)][edx]
  1422. fld fdx
  1423. fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  1424. fld fdy
  1425. fmul DWORD PTR [OFFSET(SHADER.dtdy)][edx]
  1426. fxch ST(1) // t t ds s dt s
  1427. fadd DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
  1428. fxch ST(4) // dt t ds s t s
  1429. fadd DWORD PTR [OFFSET(SHADER.dtdy)][edx]
  1430. fxch ST(5) // s t ds s t dt
  1431. faddp ST(3), ST // t ds s t dt
  1432. faddp ST(3), ST // ds s t dt
  1433. fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
  1434. // DS s t dt
  1435. fxch ST(3) // dt s t DS
  1436. fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
  1437. // DT s t DS
  1438. fxch ST(1) // s DT t DS
  1439. fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
  1440. // S DT t DS
  1441. fxch ST(2) // t DT S DS
  1442. fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
  1443. // T DT S DS
  1444. fxch ST(3) // DS DT S T
  1445. fistp DWORD PTR [OFFSET(SHADER.sLittle)][edx]
  1446. fistp DWORD PTR [OFFSET(SHADER.tLittle)][edx]
  1447. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.s)][edx]
  1448. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.t)][edx]
  1449. #if !FORCE_NPX_DEBUG
  1450. jmp doneTexture
  1451. #endif
  1452. }
  1453. #if FORCE_NPX_DEBUG
  1454. {
  1455. LONG sLittle = FTOL((gc->polygon.shader.dsdy + (__GLfloat)sh.dxLeftLittle *
  1456. gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale);
  1457. LONG tLittle = FTOL((gc->polygon.shader.dtdy + (__GLfloat)sh.dxLeftLittle *
  1458. gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale);
  1459. LONG spanS = FTOL((a->texture.x +
  1460. (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale);
  1461. LONG spanT = FTOL((a->texture.y +
  1462. (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale);
  1463. if (sLittle != CASTFIX(sh.sLittle))
  1464. DbgPrint("sLittle %x %x\n", sLittle, CASTFIX(sh.sLittle));
  1465. if (tLittle != CASTFIX(sh.tLittle))
  1466. DbgPrint("tLittle %x %x\n", tLittle, CASTFIX(sh.tLittle));
  1467. if (GENACCEL(gc).spanValue.s != spanS)
  1468. DbgPrint("spanValue.s %x %x\n", spanS, GENACCEL(gc).spanValue.s);
  1469. if (GENACCEL(gc).spanValue.t != spanT)
  1470. DbgPrint("spanValue.t %x %x\n", spanT, GENACCEL(gc).spanValue.t);
  1471. }
  1472. _asm {
  1473. mov edx, gc
  1474. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1475. jmp doneTexture;
  1476. }
  1477. #endif // FORCE_NPX_DEBUG
  1478. nicestTex:
  1479. _asm{
  1480. mov eax, a
  1481. fld dxLeftLittle
  1482. fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  1483. fld dxLeftLittle
  1484. fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  1485. // dt ds
  1486. fld DWORD PTR fdx
  1487. fmul DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
  1488. // qwx dt ds
  1489. fxch ST(2)
  1490. // ds dt qwx
  1491. fadd DWORD PTR [OFFSET(SHADER.dsdy)][edx]
  1492. fxch ST(1) // dt ds qwx
  1493. fadd DWORD PTR [OFFSET(SHADER.dtdy)][edx]
  1494. fxch ST(2) // qwx ds dt
  1495. fld DWORD PTR fdy
  1496. fmul DWORD PTR [OFFSET(SHADER.dqwdy)][edx]
  1497. // qwy qwx ds dt
  1498. fxch ST(2) // ds qwx qwy dt
  1499. fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
  1500. fxch ST(3) // dt qwx qwy ds
  1501. fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
  1502. fxch ST(2) // qwy qwx dt ds
  1503. fld DWORD PTR [OFFSET(__GLvertex.texture.w)][eax]
  1504. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
  1505. // qww qwy qwx dt ds
  1506. fxch ST(4) // ds qwy qwx dt qww
  1507. fistp DWORD PTR [OFFSET(SHADER.sLittle)][edx]
  1508. // qwy qwx dt qww
  1509. faddp ST(1), ST // qw dt qww
  1510. fxch ST(1) // dt qw qww
  1511. fistp DWORD PTR [OFFSET(SHADER.tLittle)][edx]
  1512. // qw qww
  1513. fld DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
  1514. fmul dxLeftLittle // lt qw qww
  1515. fxch ST(1) // qw lt qww
  1516. faddp ST(2), ST // lt qw
  1517. fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
  1518. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
  1519. // s lt qw
  1520. fxch ST(1) // lt s qw
  1521. fadd DWORD PTR [OFFSET(SHADER.dqwdy)][edx]
  1522. fxch ST(1) // s lt qw
  1523. fld fdx
  1524. fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  1525. fld fdy
  1526. fmul DWORD PTR [OFFSET(SHADER.dsdy)][edx]
  1527. fxch ST(1) // s s s lt qw
  1528. faddp ST(2), ST // s s lt qw
  1529. fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
  1530. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
  1531. fld fdx
  1532. fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  1533. // t t s s lt qw
  1534. fxch ST(2) // s t t s lt qw
  1535. faddp ST(3), ST // t t s lt qw
  1536. fld fdy
  1537. fmul DWORD PTR [OFFSET(SHADER.dtdy)][edx]
  1538. fxch ST(1) // t t t s lt qw
  1539. faddp ST(2), ST // t t s lt qw
  1540. fxch ST(2) // s t t lt qw
  1541. fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
  1542. // S t t lt qw
  1543. fxch ST(4) // qw t t lt S
  1544. fstp DWORD PTR [OFFSET(SHADER.frag.qw)][edx]
  1545. faddp ST(1), ST // t lt S
  1546. fxch ST(1) // lt t S
  1547. fstp DWORD PTR [OFFSET(SHADER.qwLittle)][edx]
  1548. // t S
  1549. fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] // (+1)
  1550. // T S
  1551. fxch ST(1) // S T
  1552. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.s)][edx]
  1553. fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.t)][edx]
  1554. }
  1555. #if FORCE_NPX_DEBUG
  1556. {
  1557. LONG sLittle = FTOL((gc->polygon.shader.dsdy + dxLeftLittle *
  1558. gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale);
  1559. LONG tLittle = FTOL((gc->polygon.shader.dtdy + dxLeftLittle *
  1560. gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale);
  1561. __GLfloat qw = (a->texture.w * a->window.w) + (fdx * sh.dqwdx) +
  1562. (fdy * sh.dqwdy);
  1563. __GLfloat qwLittle = sh.dqwdy + dxLeftLittle * sh.dqwdx;
  1564. LONG spanS = FTOL(((a->texture.x * a->window.w) +
  1565. (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale);
  1566. LONG spanT = FTOL(((a->texture.y * a->window.w) +
  1567. (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale);
  1568. if (sLittle != CASTFIX(sh.sLittle))
  1569. DbgPrint("sLittle %x %x\n", sLittle, CASTFIX(sh.sLittle));
  1570. if (tLittle != CASTFIX(sh.tLittle))
  1571. DbgPrint("tLittle %x %x\n", tLittle, CASTFIX(sh.tLittle));
  1572. if (qw != sh.frag.qw)
  1573. DbgPrint("qw %f %f\n", qw, sh.frag.qw);
  1574. if (qwLittle != sh.qwLittle)
  1575. DbgPrint("qw %f %f\n", qwLittle, sh.qwLittle);
  1576. if (GENACCEL(gc).spanValue.s != spanS)
  1577. DbgPrint("spanValue.s %x %x\n", spanS, GENACCEL(gc).spanValue.s);
  1578. if (GENACCEL(gc).spanValue.t != spanT)
  1579. DbgPrint("spanValue.t %x %x\n", spanT, GENACCEL(gc).spanValue.t);
  1580. }
  1581. _asm {
  1582. mov edx, gc
  1583. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1584. }
  1585. #endif // FORCE_NPX_DEBUG
  1586. doneTexture:
  1587. _asm{
  1588. test edi, __GL_SHADE_DEPTH_ITER
  1589. je noZ
  1590. }
  1591. _asm{
  1592. mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
  1593. mov ecx, __glZero
  1594. test ebx, __GL_POLYGON_OFFSET_FILL_ENABLE
  1595. mov zOffset, ecx
  1596. je noPolyOffset
  1597. }
  1598. zOffset = __glPolygonOffsetZ(gc);
  1599. _asm{
  1600. mov edx, gc
  1601. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1602. }
  1603. noPolyOffset:
  1604. _asm{
  1605. mov eax, a
  1606. fld fdx
  1607. fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
  1608. fld fdy
  1609. fmul DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
  1610. // zy zx
  1611. fxch ST(1)
  1612. // zx zy
  1613. fadd DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
  1614. fld dxLeftLittle
  1615. fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
  1616. // zl zy zx
  1617. fxch ST(1) // zy zl zx
  1618. fadd zOffset
  1619. fxch ST(1) // zl zy zx
  1620. fadd DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
  1621. // zl zy zx (+1)
  1622. fxch ST(1) // zy zl zx
  1623. faddp ST(2), ST // zl z
  1624. fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
  1625. // ZL z (+1)
  1626. fxch ST(1) // z ZL
  1627. fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
  1628. // Z ZL
  1629. fxch ST(1) // ZL Z
  1630. fistp temp
  1631. mov eax, DWORD PTR temp
  1632. mov DWORD PTR [OFFSET(SHADER.zLittle)][edx], eax
  1633. fistp temp
  1634. mov eax, DWORD PTR temp
  1635. mov DWORD PTR [OFFSET(SHADER.frag.z)][edx], eax
  1636. }
  1637. #if FORCE_NPX_DEBUG
  1638. {
  1639. ULONG fragZ = FTOL((a->window.z + fdx*sh.dzdxf +
  1640. (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
  1641. __GLfloat zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
  1642. LONG shZLittle = FTOL(zLittle);
  1643. if (sh.frag.z != fragZ)
  1644. DbgPrint("fragZ %x %x\n", fragZ, sh.frag.z);
  1645. if (sh.zLittle != shZLittle)
  1646. DbgPrint("sh.zLittle %x %x\n", shZLittle, sh.zLittle);
  1647. }
  1648. _asm {
  1649. mov edx, gc
  1650. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1651. }
  1652. #endif // FORCE_NPX_DEBUG
  1653. noZ:
  1654. _asm {
  1655. mov eax, [OFFSET(SHADER.dxLeftBig)][edx]
  1656. mov ecx, [OFFSET(SHADER.dxLeftLittle)][edx]
  1657. cmp eax, ecx
  1658. jle littleGreater
  1659. test edi, __GL_SHADE_SMOOTH
  1660. je bigNoSmooth
  1661. mov eax, [OFFSET(SHADER.rLittle)][edx]
  1662. mov ecx, [OFFSET(SPANDELTA.r)][edx]
  1663. mov esi, [OFFSET(SHADER.gLittle)][edx]
  1664. mov ebx, [OFFSET(SPANDELTA.g)][edx]
  1665. add eax, ecx
  1666. add esi, ebx
  1667. mov [OFFSET(SHADER.rBig)][edx], eax
  1668. mov [OFFSET(SHADER.gBig)][edx], esi
  1669. mov eax, [OFFSET(SHADER.bLittle)][edx]
  1670. mov ecx, [OFFSET(SPANDELTA.b)][edx]
  1671. mov esi, [OFFSET(SHADER.aLittle)][edx]
  1672. mov ebx, [OFFSET(SPANDELTA.a)][edx]
  1673. add eax, ecx
  1674. add esi, ebx
  1675. mov [OFFSET(SHADER.bBig)][edx], eax
  1676. mov [OFFSET(SHADER.aBig)][edx], esi
  1677. bigNoSmooth:
  1678. test edi, __GL_SHADE_TEXTURE
  1679. je bigNoTexture
  1680. fld DWORD PTR [OFFSET(SHADER.qwLittle)][edx]
  1681. mov eax, [OFFSET(SHADER.sLittle)][edx]
  1682. fadd DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
  1683. mov ecx, [OFFSET(SPANDELTA.s)][edx]
  1684. mov esi, [OFFSET(SHADER.tLittle)][edx]
  1685. mov ebx, [OFFSET(SPANDELTA.t)][edx]
  1686. add eax, ecx
  1687. add esi, ebx
  1688. mov [OFFSET(SHADER.sBig)][edx], eax
  1689. mov [OFFSET(SHADER.tBig)][edx], esi
  1690. fstp DWORD PTR [OFFSET(SHADER.qwBig)][edx]
  1691. bigNoTexture:
  1692. test edi, __GL_SHADE_DEPTH_ITER
  1693. je done
  1694. mov eax, [OFFSET(SHADER.zLittle)][edx]
  1695. mov ecx, [OFFSET(SHADER.dzdx)][edx]
  1696. add eax, ecx
  1697. mov [OFFSET(SHADER.zBig)][edx], eax
  1698. jmp done
  1699. littleGreater:
  1700. test edi, __GL_SHADE_SMOOTH
  1701. je smallNoSmooth
  1702. mov eax, [OFFSET(SHADER.rLittle)][edx]
  1703. mov ecx, [OFFSET(SPANDELTA.r)][edx]
  1704. mov esi, [OFFSET(SHADER.gLittle)][edx]
  1705. mov ebx, [OFFSET(SPANDELTA.g)][edx]
  1706. sub eax, ecx
  1707. sub esi, ebx
  1708. mov [OFFSET(SHADER.rBig)][edx], eax
  1709. mov [OFFSET(SHADER.gBig)][edx], esi
  1710. mov eax, [OFFSET(SHADER.bLittle)][edx]
  1711. mov ecx, [OFFSET(SPANDELTA.b)][edx]
  1712. mov esi, [OFFSET(SHADER.aLittle)][edx]
  1713. mov ebx, [OFFSET(SPANDELTA.a)][edx]
  1714. sub eax, ecx
  1715. sub esi, ebx
  1716. mov [OFFSET(SHADER.bBig)][edx], eax
  1717. mov [OFFSET(SHADER.aBig)][edx], esi
  1718. smallNoSmooth:
  1719. test edi, __GL_SHADE_TEXTURE
  1720. je smallNoTexture
  1721. fld DWORD PTR [OFFSET(SHADER.qwLittle)][edx]
  1722. mov eax, [OFFSET(SHADER.sLittle)][edx]
  1723. fsub DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
  1724. mov ecx, [OFFSET(SPANDELTA.s)][edx]
  1725. mov esi, [OFFSET(SHADER.tLittle)][edx]
  1726. mov ebx, [OFFSET(SPANDELTA.t)][edx]
  1727. sub eax, ecx
  1728. sub esi, ebx
  1729. mov [OFFSET(SHADER.sBig)][edx], eax
  1730. mov [OFFSET(SHADER.tBig)][edx], esi
  1731. fstp DWORD PTR [OFFSET(SHADER.qwBig)][edx]
  1732. smallNoTexture:
  1733. test edi, __GL_SHADE_DEPTH_ITER
  1734. je done
  1735. mov eax, [OFFSET(SHADER.zLittle)][edx]
  1736. mov ecx, [OFFSET(SHADER.dzdx)][edx]
  1737. sub eax, ecx
  1738. mov [OFFSET(SHADER.zBig)][edx], eax
  1739. done:
  1740. }
  1741. #else
  1742. __GLfloat zLittle;
  1743. __GLfloat tmp1, tmp2;
  1744. dxLeftLittle = (float)sh.dxLeftLittle;
  1745. // Don't bother with the color deltas if we're decaling or replacing
  1746. // with textures.
  1747. if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) &&
  1748. ((gc->state.texture.env[0].mode == GL_REPLACE) ||
  1749. (gc->state.texture.env[0].mode == GL_DECAL))) {
  1750. ;
  1751. } else if (sh.modeFlags & __GL_SHADE_SMOOTH) {
  1752. CASTFIX(sh.rLittle) =
  1753. FLT_TO_FIX(gc->polygon.shader.drdy +
  1754. dxLeftLittle * gc->polygon.shader.drdx);
  1755. CASTFIX(sh.gLittle) =
  1756. FLT_TO_FIX(gc->polygon.shader.dgdy +
  1757. dxLeftLittle * gc->polygon.shader.dgdx);
  1758. CASTFIX(sh.bLittle) =
  1759. FLT_TO_FIX(gc->polygon.shader.dbdy +
  1760. dxLeftLittle * gc->polygon.shader.dbdx);
  1761. GENACCEL(gc).spanValue.r =
  1762. FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080;
  1763. GENACCEL(gc).spanValue.g =
  1764. FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080;
  1765. GENACCEL(gc).spanValue.b =
  1766. FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080;
  1767. if (gc->state.enables.general & __GL_BLEND_ENABLE) {
  1768. CASTFIX(sh.aLittle) =
  1769. FTOL((gc->polygon.shader.dady +
  1770. dxLeftLittle * gc->polygon.shader.dadx) *
  1771. GENACCEL(gc).aAccelScale);
  1772. GENACCEL(gc).spanValue.a =
  1773. FTOL((a->color->a + fdx * sh.dadx + fdy * sh.dady) *
  1774. GENACCEL(gc).aAccelScale)+0x0080;
  1775. }
  1776. }
  1777. if (sh.modeFlags & __GL_SHADE_TEXTURE) {
  1778. if (gc->state.hints.perspectiveCorrection != GL_NICEST) {
  1779. tmp1 = (gc->polygon.shader.dsdy + dxLeftLittle *
  1780. gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale;
  1781. tmp2 = (gc->polygon.shader.dtdy + dxLeftLittle *
  1782. gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale;
  1783. CASTFIX(sh.sLittle) = FTOL(tmp1);
  1784. CASTFIX(sh.tLittle) = FTOL(tmp2);
  1785. tmp1 = (a->texture.x +
  1786. (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale;
  1787. tmp2 = (a->texture.y +
  1788. (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale;
  1789. GENACCEL(gc).spanValue.s = FTOL(tmp1);
  1790. GENACCEL(gc).spanValue.t = FTOL(tmp2);
  1791. } else {
  1792. tmp1 = (gc->polygon.shader.dsdy + dxLeftLittle *
  1793. gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale;
  1794. tmp2 = (gc->polygon.shader.dtdy + dxLeftLittle *
  1795. gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale;
  1796. CASTFIX(sh.sLittle) = FTOL(tmp1);
  1797. CASTFIX(sh.tLittle) = FTOL(tmp2);
  1798. sh.frag.qw = (a->texture.w * a->window.w) + (fdx * sh.dqwdx) +
  1799. (fdy * sh.dqwdy);
  1800. sh.qwLittle = sh.dqwdy + dxLeftLittle * sh.dqwdx;
  1801. tmp1 = ((a->texture.x * a->window.w) +
  1802. (fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale;
  1803. tmp2 = ((a->texture.y * a->window.w) +
  1804. (fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale;
  1805. GENACCEL(gc).spanValue.s = FTOL(tmp1);
  1806. GENACCEL(gc).spanValue.t = FTOL(tmp2);
  1807. }
  1808. }
  1809. if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
  1810. zOffset = bPolygonOffset ? __glPolygonOffsetZ(gc) : 0.0f;
  1811. sh.frag.z = FTOL((a->window.z + fdx*sh.dzdxf +
  1812. (fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
  1813. zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
  1814. sh.zLittle = FTOL(zLittle);
  1815. }
  1816. if (sh.dxLeftBig > sh.dxLeftLittle) {
  1817. if (sh.modeFlags & __GL_SHADE_SMOOTH) {
  1818. CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) + GENACCEL(gc).spanDelta.r;
  1819. CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) + GENACCEL(gc).spanDelta.g;
  1820. CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) + GENACCEL(gc).spanDelta.b;
  1821. if (gc->state.enables.general & __GL_BLEND_ENABLE)
  1822. CASTFIX(sh.aBig) = CASTFIX(sh.aLittle) + GENACCEL(gc).spanDelta.a;
  1823. }
  1824. if (sh.modeFlags & __GL_SHADE_TEXTURE) {
  1825. CASTFIX(sh.sBig) = CASTFIX(sh.sLittle) + GENACCEL(gc).spanDelta.s;
  1826. CASTFIX(sh.tBig) = CASTFIX(sh.tLittle) + GENACCEL(gc).spanDelta.t;
  1827. sh.qwBig = sh.qwLittle + sh.dqwdx;
  1828. }
  1829. if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
  1830. sh.zBig = sh.zLittle + sh.dzdx;
  1831. }
  1832. } else {
  1833. if (sh.modeFlags & __GL_SHADE_SMOOTH) {
  1834. CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) - GENACCEL(gc).spanDelta.r;
  1835. CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) - GENACCEL(gc).spanDelta.g;
  1836. CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) - GENACCEL(gc).spanDelta.b;
  1837. if (gc->state.enables.general & __GL_BLEND_ENABLE)
  1838. CASTFIX(sh.aBig) = CASTFIX(sh.aLittle) - GENACCEL(gc).spanDelta.a;
  1839. }
  1840. if (sh.modeFlags & __GL_SHADE_TEXTURE) {
  1841. CASTFIX(sh.sBig) = CASTFIX(sh.sLittle) - GENACCEL(gc).spanDelta.s;
  1842. CASTFIX(sh.tBig) = CASTFIX(sh.tLittle) - GENACCEL(gc).spanDelta.t;
  1843. sh.qwBig = sh.qwLittle - sh.dqwdx;
  1844. }
  1845. if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
  1846. sh.zBig = sh.zLittle - sh.dzdx;
  1847. }
  1848. }
  1849. #endif
  1850. }
  1851. /**************************************************************************\
  1852. \**************************************************************************/
  1853. void FASTCALL __fastGenCalcDeltas(
  1854. __GLcontext *gc,
  1855. __GLvertex *a,
  1856. __GLvertex *b,
  1857. __GLvertex *c)
  1858. {
  1859. __GLfloat oneOverArea, t1, t2, t3, t4;
  1860. #if _X86_ && ENABLE_ASM
  1861. LARGE_INTEGER temp;
  1862. _asm{
  1863. mov edx, gc
  1864. fld __glOne
  1865. fdiv DWORD PTR [OFFSET(SHADER.area)][edx]
  1866. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  1867. test edi, __GL_SHADE_RGB
  1868. je notRGB
  1869. test edi, __GL_SHADE_SMOOTH
  1870. je notSmoothRGB
  1871. }
  1872. _asm{
  1873. mov eax, a
  1874. mov ebx, b
  1875. mov ecx, c
  1876. fstp oneOverArea // finish divide
  1877. fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
  1878. mov eax, [OFFSET(__GLvertex.color)][eax]
  1879. fmul oneOverArea
  1880. fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
  1881. mov ebx, [OFFSET(__GLvertex.color)][ebx]
  1882. fmul oneOverArea // dyBC dyAC
  1883. fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
  1884. mov ecx, [OFFSET(__GLvertex.color)][ecx]
  1885. fmul oneOverArea // dxAC dyBC dyAC
  1886. fxch ST(1) // dyBC dxAC dyAC
  1887. fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
  1888. fmul oneOverArea // dxBC dyBC dxAC dyAC
  1889. fxch ST(3) // dyAC dyBC dxAC dxBC
  1890. fstp t1
  1891. fstp t2
  1892. fstp t3
  1893. fstp t4
  1894. // Now, calculate deltas:
  1895. // Red
  1896. fld DWORD PTR [OFFSET(__GLcolor.r)][eax]
  1897. fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
  1898. fld DWORD PTR [OFFSET(__GLcolor.r)][ebx]
  1899. fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
  1900. // drBC drAC
  1901. fld ST(1) // drAC drBC drAC
  1902. fmul t2 // drACt2 drBC drAC
  1903. fld ST(1) // drBC drACt2 drBC drAC
  1904. fmul t1 // drBCt1 drACt2 drBC drAC
  1905. fxch ST(2) // drBC drACt2 drBCt1 drAC
  1906. fmul t3 // drBCt3 drACt2 drBCt1 drAC
  1907. fxch ST(3) // drAC drACt2 drBCt1 drBCt3
  1908. fmul t4 // drACt4 drACt2 drBCt1 drBCt3
  1909. fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3
  1910. fsubp ST(1), ST // drACBC drACt4 drBCt3
  1911. fld DWORD PTR [OFFSET(__GLcolor.g)][ebx]
  1912. fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
  1913. // dgBC drACBC drACt4 drBCt3
  1914. fxch ST(2) // drACt4 drACBC dgBC drBCt3
  1915. fsubp ST(3), ST // drACBC dgBC drBCAC
  1916. fst DWORD PTR [OFFSET(SHADER.drdx)][edx]
  1917. fmul __glVal65536
  1918. // DRACBC dgBC drBCAC
  1919. fxch ST(2) // drBCAC dgBC DRACBC
  1920. fstp DWORD PTR [OFFSET(SHADER.drdy)][edx]
  1921. // dgBC DRACBC
  1922. fld DWORD PTR [OFFSET(__GLcolor.g)][eax]
  1923. fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
  1924. // dgAC dgBC DRACBC
  1925. fxch ST(2) // DRACBC dgBC dgAC
  1926. fistp DWORD PTR [OFFSET(SPANDELTA.r)][edx]
  1927. // Green
  1928. // dgBC dgAC
  1929. fld ST(1) // dgAC dgBC dgAC
  1930. fmul t2 // dgACt2 dgBC dgAC
  1931. fld ST(1) // dgBC dgACt2 dgBC dgAC
  1932. fmul t1 // dgBCt1 dgACt2 dgBC dgAC
  1933. fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC
  1934. fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC
  1935. fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3
  1936. fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3
  1937. fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3
  1938. fsubp ST(1), ST // dgACBC dgACt4 dgBCt3
  1939. fld DWORD PTR [OFFSET(__GLcolor.b)][ebx]
  1940. fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
  1941. // dbBC dgACBC dgACt4 dgBCt3
  1942. fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3
  1943. fsubp ST(3), ST // dgACBC dbBC dgBCAC
  1944. fst DWORD PTR [OFFSET(SHADER.dgdx)][edx]
  1945. fmul __glVal65536
  1946. // DGACBC dbBC dgBCAC
  1947. fxch ST(2) // dgBCAC dbBC DGACBC
  1948. fstp DWORD PTR [OFFSET(SHADER.dgdy)][edx]
  1949. // dbBC DGACBC
  1950. fld DWORD PTR [OFFSET(__GLcolor.b)][eax]
  1951. fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
  1952. // dbAC dbBC DGACBC
  1953. fxch ST(2) // DGACBC dbBC dbAC
  1954. fistp DWORD PTR [OFFSET(SPANDELTA.g)][edx]
  1955. // Blue
  1956. // dbBC dbAC
  1957. fld ST(1) // dbAC dbBC dbAC
  1958. fmul t2 // dbACt2 dbBC dbAC
  1959. fld ST(1) // dbBC dbACt2 dbBC dbAC
  1960. fmul t1 // dbBCt1 dbACt2 dbBC dbAC
  1961. fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC
  1962. fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC
  1963. fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3
  1964. fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3
  1965. fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3
  1966. fsubp ST(1), ST // dbACBC dbACt4 dbBCt3
  1967. fxch ST(1) // dbACt4 dbACBC dbBCt3
  1968. fsubp ST(2), ST // dbACBC dbBCAC (+1)
  1969. fst DWORD PTR [OFFSET(SHADER.dbdx)][edx]
  1970. fmul __glVal65536
  1971. // DBACBC dbBCAC
  1972. fxch ST(1) // dbBCAC DBACBC
  1973. fstp DWORD PTR [OFFSET(SHADER.dbdy)][edx]
  1974. fistp DWORD PTR [OFFSET(SPANDELTA.b)][edx]
  1975. mov ebx, [OFFSET(GENGCACCEL.__fastSmoothSpanFuncPtr)][edx]
  1976. mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
  1977. mov eax, [OFFSET(SPANDELTA.r)][edx]
  1978. mov ebx, [OFFSET(SPANDELTA.g)][edx]
  1979. mov ecx, [OFFSET(SPANDELTA.b)][edx]
  1980. or eax, ebx
  1981. or eax, ecx
  1982. jne notZeroDelta
  1983. mov eax, [OFFSET(GENGCACCEL.flags)][edx]
  1984. test eax, GEN_FASTZBUFFER
  1985. jne notZeroDelta
  1986. mov ebx, [OFFSET(GENGCACCEL.__fastFlatSpanFuncPtr)][edx]
  1987. mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
  1988. notZeroDelta:
  1989. #if !FORCE_NPX_DEBUG
  1990. jmp colorDone
  1991. #endif
  1992. }
  1993. #if FORCE_NPX_DEBUG
  1994. {
  1995. __GLfloat drAC, dgAC, dbAC, daAC;
  1996. __GLfloat drBC, dgBC, dbBC, daBC;
  1997. __GLcolor *ac, *bc, *cc;
  1998. __GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea;
  1999. __GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea;
  2000. __GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea;
  2001. __GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea;
  2002. __GLfloat drdx;
  2003. __GLfloat drdy;
  2004. __GLfloat dgdx;
  2005. __GLfloat dgdy;
  2006. __GLfloat dbdx;
  2007. __GLfloat dbdy;
  2008. LONG spanR, spanG, spanB;
  2009. ac = a->color;
  2010. bc = b->color;
  2011. cc = c->color;
  2012. drAC = ac->r - cc->r;
  2013. drBC = bc->r - cc->r;
  2014. dgAC = ac->g - cc->g;
  2015. dgBC = bc->g - cc->g;
  2016. dbAC = ac->b - cc->b;
  2017. dbBC = bc->b - cc->b;
  2018. drdx = drAC * t2 - drBC * t1;
  2019. drdy = drBC * t3 - drAC * t4;
  2020. dgdx = dgAC * t2 - dgBC * t1;
  2021. dgdy = dgBC * t3 - dgAC * t4;
  2022. dbdx = dbAC * t2 - dbBC * t1;
  2023. dbdy = dbBC * t3 - dbAC * t4;
  2024. spanR = FLT_TO_FIX(drdx);
  2025. spanG = FLT_TO_FIX(dgdx);
  2026. spanB = FLT_TO_FIX(dbdx);
  2027. if (ft1 != t1)
  2028. DbgPrint("t1 %f %f\n", t1, ft1);
  2029. if (ft2 != t2)
  2030. DbgPrint("t2 %f %f\n", t2, ft2);
  2031. if (ft3 != t3)
  2032. DbgPrint("t3 %f %f\n", t3, ft3);
  2033. if (ft4 != t4)
  2034. DbgPrint("t4 %f %f\n", t4, ft4);
  2035. if (drdx != gc->polygon.shader.drdx)
  2036. DbgPrint("drdx %f %f\n", drdx, gc->polygon.shader.drdx);
  2037. if (drdy != gc->polygon.shader.drdy)
  2038. DbgPrint("drdy %f %f\n", drdy, gc->polygon.shader.drdy);
  2039. if (dgdx != gc->polygon.shader.dgdx)
  2040. DbgPrint("dgdx %f %f\n", dgdx, gc->polygon.shader.dgdx);
  2041. if (dgdy != gc->polygon.shader.dgdy)
  2042. DbgPrint("dgdy %f %f\n", dgdy, gc->polygon.shader.dgdy);
  2043. if (dbdx != gc->polygon.shader.dbdx)
  2044. DbgPrint("dbdx %f %f\n", dbdx, gc->polygon.shader.dbdx);
  2045. if (dbdy != gc->polygon.shader.dbdy)
  2046. DbgPrint("dbdy %f %f\n", dbdy, gc->polygon.shader.dbdy);
  2047. if (spanR != GENACCEL(gc).spanDelta.r)
  2048. DbgPrint("spanDelta.r %x %x\n", spanR, GENACCEL(gc).spanDelta.r);
  2049. if (spanG!= GENACCEL(gc).spanDelta.g)
  2050. DbgPrint("spanDelta.g %x %x\n", spanG, GENACCEL(gc).spanDelta.g);
  2051. if (spanB != GENACCEL(gc).spanDelta.b)
  2052. DbgPrint("spanDelta.b %x %x\n", spanB, GENACCEL(gc).spanDelta.b);
  2053. }
  2054. _asm{
  2055. mov edx, gc
  2056. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2057. jmp colorDone
  2058. }
  2059. #endif // FORCE_NPX_DEBUG
  2060. notSmoothRGB:
  2061. _asm{
  2062. mov eax, [OFFSET(__GLcontext.vertex.provoking)][edx]
  2063. fld __glVal65536
  2064. mov eax, [OFFSET(__GLvertex.color)][eax]
  2065. fmul DWORD PTR [OFFSET(__GLcolor.r)][eax]
  2066. fld __glVal65536
  2067. fmul DWORD PTR [OFFSET(__GLcolor.g)][eax]
  2068. fld __glVal65536
  2069. fmul DWORD PTR [OFFSET(__GLcolor.b)][eax]
  2070. // B G R
  2071. fxch ST(2) // R G B
  2072. fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] // G B
  2073. fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx]
  2074. fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx]
  2075. mov ebx, [OFFSET(GENGCACCEL.__fastFlatSpanFuncPtr)][edx]
  2076. mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
  2077. jmp colorDone
  2078. }
  2079. notRGB:
  2080. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH)
  2081. {
  2082. __GLfloat drAC;
  2083. __GLfloat drBC;
  2084. __GLcolor *ac, *bc, *cc;
  2085. ac = a->color;
  2086. bc = b->color;
  2087. cc = c->color;
  2088. drAC = ac->r - cc->r;
  2089. drBC = bc->r - cc->r;
  2090. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  2091. t1 = gc->polygon.shader.dyAC * oneOverArea;
  2092. t2 = gc->polygon.shader.dyBC * oneOverArea;
  2093. t3 = gc->polygon.shader.dxAC * oneOverArea;
  2094. t4 = gc->polygon.shader.dxBC * oneOverArea;
  2095. gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
  2096. gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
  2097. GENACCEL(gc).spanDelta.r =
  2098. FLT_TO_FIX(gc->polygon.shader.drdx);
  2099. if (GENACCEL(gc).spanDelta.r == 0)
  2100. {
  2101. GENACCEL(gc).__fastSpanFuncPtr =
  2102. GENACCEL(gc).__fastFlatSpanFuncPtr;
  2103. }
  2104. else
  2105. {
  2106. GENACCEL(gc).__fastSpanFuncPtr =
  2107. GENACCEL(gc).__fastSmoothSpanFuncPtr;
  2108. }
  2109. }
  2110. else
  2111. {
  2112. GENACCEL(gc).spanValue.r =
  2113. FLT_TO_FIX(gc->vertex.provoking->color->r);
  2114. GENACCEL(gc).__fastSpanFuncPtr =
  2115. GENACCEL(gc).__fastFlatSpanFuncPtr;
  2116. }
  2117. _asm{
  2118. mov edx, gc
  2119. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2120. }
  2121. colorDone:
  2122. _asm{
  2123. test edi, __GL_SHADE_DEPTH_ITER
  2124. je noZ
  2125. test edi, __GL_SHADE_SMOOTH
  2126. jne areaOK
  2127. }
  2128. _asm{
  2129. fstp oneOverArea // finish divide
  2130. fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
  2131. fmul oneOverArea
  2132. fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
  2133. fmul oneOverArea // dyBC dyAC
  2134. fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
  2135. fmul oneOverArea // dxAC dyBC dyAC
  2136. fxch ST(1) // dyBC dxAC dyAC
  2137. fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
  2138. fmul oneOverArea // dxBC dyBC dxAC dyAC
  2139. fxch ST(3) // dyAC dyBC dxAC dxBC
  2140. fstp t1
  2141. fstp t2
  2142. fstp t3
  2143. fstp t4
  2144. }
  2145. #if FORCE_NPX_DEBUG
  2146. {
  2147. __GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea;
  2148. __GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea;
  2149. __GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea;
  2150. __GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea;
  2151. if (ft1 != t1)
  2152. DbgPrint("zt1 %f %f\n", t1, ft1);
  2153. if (ft2 != t2)
  2154. DbgPrint("zt2 %f %f\n", t2, ft2);
  2155. if (ft3 != t3)
  2156. DbgPrint("zt3 %f %f\n", t3, ft3);
  2157. if (ft4 != t4)
  2158. DbgPrint("zt4 %f %f\n", t4, ft4);
  2159. }
  2160. _asm{
  2161. mov edx, gc
  2162. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2163. }
  2164. #endif // FORCE_NPX_DEBUG
  2165. areaOK:
  2166. _asm{
  2167. mov ecx, c
  2168. mov eax, a
  2169. mov ebx, b
  2170. fld DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
  2171. fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
  2172. fld DWORD PTR [OFFSET(__GLvertex.window.z)][ebx]
  2173. fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
  2174. // dzBC dzAC
  2175. fld ST(1) // dzAC dzBC dzAC
  2176. fmul t2 // ACt2 dzBC dzAC
  2177. fld ST(1) // dzBC ACt2 dzBC dzAC
  2178. fmul t1 // BCt1 ACt2 dzBC dzAC
  2179. fxch ST(3) // dzAC ACt2 dzBC BCt1
  2180. fmul t4 // ACt4 ACt2 dzBC BCt1
  2181. fxch ST(2) // dzBC ACt2 ACt4 BCt1
  2182. fmul t3 // BCt3 ACt2 ACt4 BCt1
  2183. fsubrp ST(2),ST // ACt2 BCAC BCt1
  2184. fsubrp ST(2),ST // BCAC ACBC
  2185. fxch ST(1) // ACBC BCAC
  2186. // dzdx dzdy
  2187. fld ST(0) // dzdx dzdx dzdy
  2188. fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
  2189. // dzdxS dzdx dzdy
  2190. fxch ST(2) // dzdy dzdx dzdxS
  2191. fstp DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
  2192. fstp DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
  2193. fistp temp
  2194. mov ebx, DWORD PTR temp
  2195. mov DWORD PTR [OFFSET(SHADER.dzdx)][edx], ebx
  2196. mov DWORD PTR [OFFSET(SPANDELTA.z)][edx], ebx
  2197. #if !FORCE_NPX_DEBUG
  2198. jmp deltaDone
  2199. #endif
  2200. }
  2201. #if FORCE_NPX_DEBUG
  2202. {
  2203. __GLfloat dzdxf;
  2204. __GLfloat dzdyf;
  2205. __GLfloat dzAC, dzBC;
  2206. ULONG spanDeltaZ;
  2207. dzAC = a->window.z - c->window.z;
  2208. dzBC = b->window.z - c->window.z;
  2209. dzdxf = dzAC * t2 - dzBC * t1;
  2210. dzdyf = dzBC * t3 - dzAC * t4;
  2211. spanDeltaZ = FTOL(dzdxf * GENACCEL(gc).zScale);
  2212. if (dzdxf != gc->polygon.shader.dzdxf)
  2213. DbgPrint("dzdxf %f %f\n", dzdxf, gc->polygon.shader.dzdxf);
  2214. if (dzdyf != gc->polygon.shader.dzdyf)
  2215. DbgPrint("dzdyf %f %f\n", dzdyf, gc->polygon.shader.dzdyf);
  2216. if (spanDeltaZ != GENACCEL(gc).spanDelta.z)
  2217. DbgPrint("spanDeltaZ %x %x\n", spanDeltaZ, GENACCEL(gc).spanDelta.z);
  2218. goto deltaDone;
  2219. }
  2220. #endif // FORCE_NPX_DEBUG
  2221. noZ:
  2222. _asm{
  2223. test edi, __GL_SHADE_SMOOTH
  2224. jne deltaDone
  2225. fstp ST(0)
  2226. }
  2227. deltaDone:
  2228. return;
  2229. #else // _X86_
  2230. /* Pre-compute one over polygon area */
  2231. __GL_FLOAT_BEGIN_DIVIDE(__glOne, gc->polygon.shader.area, &oneOverArea);
  2232. /*
  2233. ** t1-4 are delta values for unit changes in x or y for each
  2234. ** parameter.
  2235. */
  2236. if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB)
  2237. {
  2238. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH)
  2239. {
  2240. __GLfloat drAC, dgAC, dbAC, daAC;
  2241. __GLfloat drBC, dgBC, dbBC, daBC;
  2242. __GLcolor *ac, *bc, *cc;
  2243. ac = a->color;
  2244. bc = b->color;
  2245. cc = c->color;
  2246. drAC = ac->r - cc->r;
  2247. drBC = bc->r - cc->r;
  2248. dgAC = ac->g - cc->g;
  2249. dgBC = bc->g - cc->g;
  2250. dbAC = ac->b - cc->b;
  2251. dbBC = bc->b - cc->b;
  2252. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  2253. t1 = gc->polygon.shader.dyAC * oneOverArea;
  2254. t2 = gc->polygon.shader.dyBC * oneOverArea;
  2255. t3 = gc->polygon.shader.dxAC * oneOverArea;
  2256. t4 = gc->polygon.shader.dxBC * oneOverArea;
  2257. gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
  2258. gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
  2259. gc->polygon.shader.dgdx = dgAC * t2 - dgBC * t1;
  2260. gc->polygon.shader.dgdy = dgBC * t3 - dgAC * t4;
  2261. gc->polygon.shader.dbdx = dbAC * t2 - dbBC * t1;
  2262. gc->polygon.shader.dbdy = dbBC * t3 - dbAC * t4;
  2263. GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx);
  2264. GENACCEL(gc).spanDelta.g = FLT_TO_FIX(gc->polygon.shader.dgdx);
  2265. GENACCEL(gc).spanDelta.b = FLT_TO_FIX(gc->polygon.shader.dbdx);
  2266. if ( ((GENACCEL(gc).spanDelta.r | GENACCEL(gc).spanDelta.g |
  2267. GENACCEL(gc).spanDelta.b) == 0)
  2268. && ((GENACCEL(gc).flags & GEN_FASTZBUFFER) == 0))
  2269. {
  2270. GENACCEL(gc).__fastSpanFuncPtr =
  2271. GENACCEL(gc).__fastFlatSpanFuncPtr;
  2272. }
  2273. else
  2274. {
  2275. GENACCEL(gc).__fastSpanFuncPtr =
  2276. GENACCEL(gc).__fastSmoothSpanFuncPtr;
  2277. }
  2278. }
  2279. else
  2280. {
  2281. __GLcolor *flatColor = gc->vertex.provoking->color;
  2282. GENACCEL(gc).spanValue.r = FLT_TO_FIX(flatColor->r);
  2283. GENACCEL(gc).spanValue.g = FLT_TO_FIX(flatColor->g);
  2284. GENACCEL(gc).spanValue.b = FLT_TO_FIX(flatColor->b);
  2285. GENACCEL(gc).__fastSpanFuncPtr =
  2286. GENACCEL(gc).__fastFlatSpanFuncPtr;
  2287. }
  2288. }
  2289. else
  2290. {
  2291. if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH)
  2292. {
  2293. __GLfloat drAC;
  2294. __GLfloat drBC;
  2295. __GLcolor *ac, *bc, *cc;
  2296. ac = a->color;
  2297. bc = b->color;
  2298. cc = c->color;
  2299. drAC = ac->r - cc->r;
  2300. drBC = bc->r - cc->r;
  2301. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  2302. t1 = gc->polygon.shader.dyAC * oneOverArea;
  2303. t2 = gc->polygon.shader.dyBC * oneOverArea;
  2304. t3 = gc->polygon.shader.dxAC * oneOverArea;
  2305. t4 = gc->polygon.shader.dxBC * oneOverArea;
  2306. gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
  2307. gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
  2308. GENACCEL(gc).spanDelta.r =
  2309. FLT_TO_FIX(gc->polygon.shader.drdx);
  2310. if (GENACCEL(gc).spanDelta.r == 0)
  2311. {
  2312. GENACCEL(gc).__fastSpanFuncPtr =
  2313. GENACCEL(gc).__fastFlatSpanFuncPtr;
  2314. }
  2315. else
  2316. {
  2317. GENACCEL(gc).__fastSpanFuncPtr =
  2318. GENACCEL(gc).__fastSmoothSpanFuncPtr;
  2319. }
  2320. }
  2321. else
  2322. {
  2323. GENACCEL(gc).spanValue.r =
  2324. FLT_TO_FIX(gc->vertex.provoking->color->r);
  2325. GENACCEL(gc).__fastSpanFuncPtr =
  2326. GENACCEL(gc).__fastFlatSpanFuncPtr;
  2327. }
  2328. }
  2329. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER)
  2330. {
  2331. __GLfloat dzAC, dzBC;
  2332. if ((gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) == 0)
  2333. {
  2334. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  2335. t1 = gc->polygon.shader.dyAC * oneOverArea;
  2336. t2 = gc->polygon.shader.dyBC * oneOverArea;
  2337. t3 = gc->polygon.shader.dxAC * oneOverArea;
  2338. t4 = gc->polygon.shader.dxBC * oneOverArea;
  2339. }
  2340. dzAC = a->window.z - c->window.z;
  2341. dzBC = b->window.z - c->window.z;
  2342. gc->polygon.shader.dzdxf = dzAC * t2 - dzBC * t1;
  2343. gc->polygon.shader.dzdyf = dzBC * t3 - dzAC * t4;
  2344. GENACCEL(gc).spanDelta.z = gc->polygon.shader.dzdx =
  2345. FTOL(gc->polygon.shader.dzdxf * GENACCEL(gc).zScale);
  2346. }
  2347. else if ((gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) == 0)
  2348. {
  2349. // In this case the divide hasn't been terminated yet so
  2350. // we need to complete it even though we don't use the result
  2351. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  2352. }
  2353. #endif // _X86_
  2354. }
  2355. void FASTCALL __fastGenCalcDeltasTexRGBA(
  2356. __GLcontext *gc,
  2357. __GLvertex *a,
  2358. __GLvertex *b,
  2359. __GLvertex *c)
  2360. {
  2361. __GLfloat oneOverArea, t1, t2, t3, t4;
  2362. GLboolean oneOverAreaDone;
  2363. #if _X86_ && ENABLE_ASM
  2364. LARGE_INTEGER temp;
  2365. _asm{
  2366. mov edx, gc
  2367. xor eax, eax
  2368. mov oneOverAreaDone, al
  2369. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2370. fld __glOne
  2371. fdiv DWORD PTR [OFFSET(SHADER.area)][edx]
  2372. mov ebx, [OFFSET(GENGCACCEL.__fastTexSpanFuncPtr)][edx]
  2373. test edi, __GL_SHADE_TEXTURE
  2374. mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
  2375. mov eax, [OFFSET(__GLcontext.state.texture.env)][edx]
  2376. je notReplace
  2377. mov ebx, [OFFSET(__GLtextureEnvState.mode)][eax]
  2378. cmp ebx, GL_REPLACE
  2379. je fastReplace
  2380. cmp ebx, GL_DECAL
  2381. jne notReplace
  2382. }
  2383. fastReplace:
  2384. _asm{
  2385. mov eax, [OFFSET(GENGCACCEL.constantR)][edx]
  2386. mov ebx, [OFFSET(GENGCACCEL.constantG)][edx]
  2387. mov [OFFSET(SPANVALUE.r)][edx], eax
  2388. mov [OFFSET(SPANVALUE.g)][edx], ebx
  2389. mov eax, [OFFSET(GENGCACCEL.constantB)][edx]
  2390. mov ebx, [OFFSET(GENGCACCEL.constantA)][edx]
  2391. mov [OFFSET(SPANVALUE.b)][edx], eax
  2392. mov [OFFSET(SPANVALUE.a)][edx], ebx
  2393. jmp colorDone
  2394. }
  2395. notReplace:
  2396. _asm{
  2397. test edi, __GL_SHADE_SMOOTH
  2398. je doFlat
  2399. mov al, 1
  2400. mov oneOverAreaDone, al
  2401. }
  2402. // smooth:
  2403. _asm{
  2404. mov eax, a
  2405. mov ebx, b
  2406. mov ecx, c
  2407. fstp oneOverArea // finish divide
  2408. fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
  2409. mov eax, [OFFSET(__GLvertex.color)][eax]
  2410. fmul oneOverArea
  2411. fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
  2412. mov ebx, [OFFSET(__GLvertex.color)][ebx]
  2413. fmul oneOverArea // dyBC dyAC
  2414. fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
  2415. mov ecx, [OFFSET(__GLvertex.color)][ecx]
  2416. fmul oneOverArea // dxAC dyBC dyAC
  2417. fxch ST(1) // dyBC dxAC dyAC
  2418. fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
  2419. fmul oneOverArea // dxBC dyBC dxAC dyAC
  2420. fxch ST(3) // dyAC dyBC dxAC dxBC
  2421. fstp t1
  2422. fstp t2
  2423. fstp t3
  2424. fstp t4
  2425. // Now, calculate deltas:
  2426. // Red
  2427. fld DWORD PTR [OFFSET(__GLcolor.r)][eax]
  2428. fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
  2429. fld DWORD PTR [OFFSET(__GLcolor.r)][ebx]
  2430. fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
  2431. // drBC drAC
  2432. fld ST(1) // drAC drBC drAC
  2433. fmul t2 // drACt2 drBC drAC
  2434. fld ST(1) // drBC drACt2 drBC drAC
  2435. fmul t1 // drBCt1 drACt2 drBC drAC
  2436. fxch ST(2) // drBC drACt2 drBCt1 drAC
  2437. fmul t3 // drBCt3 drACt2 drBCt1 drAC
  2438. fxch ST(3) // drAC drACt2 drBCt1 drBCt3
  2439. fmul t4 // drACt4 drACt2 drBCt1 drBCt3
  2440. fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3
  2441. fsubp ST(1), ST // drACBC drACt4 drBCt3
  2442. fld DWORD PTR [OFFSET(__GLcolor.g)][ebx]
  2443. fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
  2444. // dgBC drACBC drACt4 drBCt3
  2445. fxch ST(2) // drACt4 drACBC dgBC drBCt3
  2446. fsubp ST(3), ST // drACBC dgBC drBCAC
  2447. fst DWORD PTR [OFFSET(SHADER.drdx)][edx]
  2448. fmul __glVal65536
  2449. // DRACBC dgBC drBCAC
  2450. fxch ST(2) // drBCAC dgBC DRACBC
  2451. fstp DWORD PTR [OFFSET(SHADER.drdy)][edx]
  2452. // dgBC DRACBC
  2453. fld DWORD PTR [OFFSET(__GLcolor.g)][eax]
  2454. fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
  2455. // dgAC dgBC DRACBC
  2456. fxch ST(2) // DRACBC dgBC dgAC
  2457. fistp DWORD PTR [OFFSET(SPANDELTA.r)][edx]
  2458. // Green
  2459. // dgBC dgAC
  2460. fld ST(1) // dgAC dgBC dgAC
  2461. fmul t2 // dgACt2 dgBC dgAC
  2462. fld ST(1) // dgBC dgACt2 dgBC dgAC
  2463. fmul t1 // dgBCt1 dgACt2 dgBC dgAC
  2464. fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC
  2465. fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC
  2466. fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3
  2467. fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3
  2468. fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3
  2469. fsubp ST(1), ST // dgACBC dgACt4 dgBCt3
  2470. fld DWORD PTR [OFFSET(__GLcolor.b)][ebx]
  2471. fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
  2472. // dbBC dgACBC dgACt4 dgBCt3
  2473. fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3
  2474. fsubp ST(3), ST // dgACBC dbBC dgBCAC
  2475. fst DWORD PTR [OFFSET(SHADER.dgdx)][edx]
  2476. fmul __glVal65536
  2477. // DGACBC dbBC dgBCAC
  2478. fxch ST(2) // dgBCAC dbBC DGACBC
  2479. fstp DWORD PTR [OFFSET(SHADER.dgdy)][edx]
  2480. // dbBC DGACBC
  2481. fld DWORD PTR [OFFSET(__GLcolor.b)][eax]
  2482. fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
  2483. // dbAC dbBC DGACBC
  2484. fxch ST(2) // DGACBC dbBC dbAC
  2485. fistp DWORD PTR [OFFSET(SPANDELTA.g)][edx]
  2486. // Blue
  2487. // dbBC dbAC
  2488. fld ST(1) // dbAC dbBC dbAC
  2489. fmul t2 // dbACt2 dbBC dbAC
  2490. fld ST(1) // dbBC dbACt2 dbBC dbAC
  2491. fmul t1 // dbBCt1 dbACt2 dbBC dbAC
  2492. fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC
  2493. fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC
  2494. fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3
  2495. fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3
  2496. fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3
  2497. fsubp ST(1), ST // dbACBC dbACt4 dbBCt3
  2498. fxch ST(1) // dbACt4 dbACBC dbBCt3
  2499. fsubp ST(2), ST // dbACBC dbBCAC (+1)
  2500. fst DWORD PTR [OFFSET(SHADER.dbdx)][edx]
  2501. fmul __glVal65536
  2502. // DBACBC dbBCAC
  2503. fxch ST(1) // dbBCAC DBACBC
  2504. fstp DWORD PTR [OFFSET(SHADER.dbdy)][edx]
  2505. test [OFFSET(__GLcontext.state.enables.general)][edx], __GL_BLEND_ENABLE
  2506. fistp DWORD PTR [OFFSET(SPANDELTA.b)][edx]
  2507. je colorDone
  2508. fld DWORD PTR [OFFSET(__GLcolor.a)][eax]
  2509. fsub DWORD PTR [OFFSET(__GLcolor.a)][ecx]
  2510. // daAC
  2511. fld DWORD PTR [OFFSET(__GLcolor.a)][ebx]
  2512. fsub DWORD PTR [OFFSET(__GLcolor.a)][ecx]
  2513. // daBC daAC
  2514. fld ST(1) // daAC daBC daAC
  2515. fmul t2 // daACt2 daBC daAC
  2516. fld ST(1) // daBC daACt2 daBC daAC
  2517. fmul t1 // daBCt1 daACt2 daBC daAC
  2518. fxch ST(3) // daAC daACt2 daBC daBCt1
  2519. fmul t4 // daACt4 daACt2 daBC daBCt1
  2520. fxch ST(2) // daBC daACt2 daACt4 daBCt1
  2521. fmul t3 // daBCt3 daACt2 daACt4 daBCt1
  2522. fxch ST(3) // daBCt1 daACt2 daACt4 daBCt3
  2523. fsubp ST(1), ST // daACBC daACt4 daBCt3
  2524. fxch ST(1) // daACt4 daACBC daBCt3
  2525. fsubp ST(2), ST // daACBC daBCAC (+1)
  2526. fst DWORD PTR [OFFSET(SHADER.dadx)][edx]
  2527. fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
  2528. fxch ST(1)
  2529. fstp DWORD PTR [OFFSET(SHADER.dady)][edx]
  2530. fistp DWORD PTR [OFFSET(SPANDELTA.a)][edx] // (+1)
  2531. #if !FORCE_NPX_DEBUG
  2532. jmp colorDone
  2533. #endif
  2534. }
  2535. #if FORCE_NPX_DEBUG
  2536. {
  2537. __GLfloat drAC, dgAC, dbAC, daAC;
  2538. __GLfloat drBC, dgBC, dbBC, daBC;
  2539. __GLcolor *ac, *bc, *cc;
  2540. __GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea;
  2541. __GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea;
  2542. __GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea;
  2543. __GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea;
  2544. __GLfloat drdx;
  2545. __GLfloat drdy;
  2546. __GLfloat dgdx;
  2547. __GLfloat dgdy;
  2548. __GLfloat dbdx;
  2549. __GLfloat dbdy;
  2550. LONG spanR, spanG, spanB;
  2551. ac = a->color;
  2552. bc = b->color;
  2553. cc = c->color;
  2554. drAC = ac->r - cc->r;
  2555. drBC = bc->r - cc->r;
  2556. dgAC = ac->g - cc->g;
  2557. dgBC = bc->g - cc->g;
  2558. dbAC = ac->b - cc->b;
  2559. dbBC = bc->b - cc->b;
  2560. drdx = drAC * t2 - drBC * t1;
  2561. drdy = drBC * t3 - drAC * t4;
  2562. dgdx = dgAC * t2 - dgBC * t1;
  2563. dgdy = dgBC * t3 - dgAC * t4;
  2564. dbdx = dbAC * t2 - dbBC * t1;
  2565. dbdy = dbBC * t3 - dbAC * t4;
  2566. spanR = FLT_TO_FIX(drdx);
  2567. spanG = FLT_TO_FIX(dgdx);
  2568. spanB = FLT_TO_FIX(dbdx);
  2569. if (ft1 != t1)
  2570. DbgPrint("t1 %f %f\n", t1, ft1);
  2571. if (ft2 != t2)
  2572. DbgPrint("t2 %f %f\n", t2, ft2);
  2573. if (ft3 != t3)
  2574. DbgPrint("t3 %f %f\n", t3, ft3);
  2575. if (ft4 != t4)
  2576. DbgPrint("t4 %f %f\n", t4, ft4);
  2577. if (drdx != gc->polygon.shader.drdx)
  2578. DbgPrint("drdx %f %f\n", drdx, gc->polygon.shader.drdx);
  2579. if (drdy != gc->polygon.shader.drdy)
  2580. DbgPrint("drdy %f %f\n", drdy, gc->polygon.shader.drdy);
  2581. if (dgdx != gc->polygon.shader.dgdx)
  2582. DbgPrint("dgdx %f %f\n", dgdx, gc->polygon.shader.dgdx);
  2583. if (dgdy != gc->polygon.shader.dgdy)
  2584. DbgPrint("dgdy %f %f\n", dgdy, gc->polygon.shader.dgdy);
  2585. if (dbdx != gc->polygon.shader.dbdx)
  2586. DbgPrint("dbdx %f %f\n", dbdx, gc->polygon.shader.dbdx);
  2587. if (dbdy != gc->polygon.shader.dbdy)
  2588. DbgPrint("dbdy %f %f\n", dbdy, gc->polygon.shader.dbdy);
  2589. if (spanR != GENACCEL(gc).spanDelta.r)
  2590. DbgPrint("spanDelta.r %x %x\n", spanR, GENACCEL(gc).spanDelta.r);
  2591. if (spanG!= GENACCEL(gc).spanDelta.g)
  2592. DbgPrint("spanDelta.g %x %x\n", spanG, GENACCEL(gc).spanDelta.g);
  2593. if (spanB != GENACCEL(gc).spanDelta.b)
  2594. DbgPrint("spanDelta.b %x %x\n", spanB, GENACCEL(gc).spanDelta.b);
  2595. if (gc->state.enables.general & __GL_BLEND_ENABLE) {
  2596. __GLfloat dadx;
  2597. __GLfloat dady;
  2598. LONG a;
  2599. daAC = ac->a - cc->a;
  2600. daBC = bc->a - cc->a;
  2601. dadx = daAC * t2 - daBC * t1;
  2602. dady = daBC * t3 - daAC * t4;
  2603. a = FTOL(gc->polygon.shader.dadx * GENACCEL(gc).aAccelScale);
  2604. if (dadx != gc->polygon.shader.dadx)
  2605. DbgPrint("dadx %f %f\n", dadx, gc->polygon.shader.dadx);
  2606. if (dady != gc->polygon.shader.dady)
  2607. DbgPrint("dady %f %f\n", dady, gc->polygon.shader.dady);
  2608. if (a != GENACCEL(gc).spanDelta.a)
  2609. DbgPrint("spanDelta.a %x %x\n", a, GENACCEL(gc).spanDelta.a);
  2610. }
  2611. }
  2612. _asm {
  2613. mov edx, gc
  2614. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2615. jmp colorDone
  2616. }
  2617. #endif // FORCE_NPX_DEBUG
  2618. doFlat:
  2619. _asm{
  2620. mov eax, [OFFSET(__GLcontext.vertex.provoking)][edx]
  2621. fld __glVal65536
  2622. mov eax, [OFFSET(__GLvertex.color)][eax]
  2623. fmul DWORD PTR [OFFSET(__GLcolor.r)][eax]
  2624. fld __glVal65536
  2625. fmul DWORD PTR [OFFSET(__GLcolor.g)][eax]
  2626. mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
  2627. fld __glVal65536
  2628. test ebx, __GL_BLEND_ENABLE
  2629. fmul DWORD PTR [OFFSET(__GLcolor.b)][eax]
  2630. je noFlatBlend
  2631. fld DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
  2632. fmul DWORD PTR [OFFSET(__GLcolor.a)][eax]
  2633. // A B G R
  2634. fxch ST(3) // R B G A
  2635. fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx]
  2636. fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx]
  2637. fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx]
  2638. fistp DWORD PTR [OFFSET(SPANVALUE.a)][edx]
  2639. jmp short flatDone
  2640. noFlatBlend:
  2641. // B G R
  2642. fxch ST(2) // R G B
  2643. fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] // G B
  2644. fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx]
  2645. fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx]
  2646. flatDone:
  2647. }
  2648. colorDone:
  2649. _asm{
  2650. test edi, __GL_SHADE_TEXTURE
  2651. mov eax, [OFFSET(GENGCACCEL.texImage)][edx]
  2652. je texDone
  2653. test eax, eax
  2654. je texDone
  2655. }
  2656. _asm{
  2657. mov al, oneOverAreaDone
  2658. mov ebx, [OFFSET(__GLcontext.state.hints.perspectiveCorrection)][edx]
  2659. test al, al
  2660. jne areaDoneAlready
  2661. }
  2662. _asm{
  2663. fstp oneOverArea // finish divide
  2664. fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
  2665. fmul oneOverArea
  2666. fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
  2667. fmul oneOverArea // dyBC dyAC
  2668. fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
  2669. fmul oneOverArea // dxAC dyBC dyAC
  2670. fxch ST(1) // dyBC dxAC dyAC
  2671. fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
  2672. fmul oneOverArea // dxBC dyBC dxAC dyAC
  2673. fxch ST(3) // dyAC dyBC dxAC dxBC
  2674. fstp t1
  2675. inc eax
  2676. fstp t2
  2677. mov oneOverAreaDone, al
  2678. fstp t3
  2679. fstp t4
  2680. }
  2681. areaDoneAlready:
  2682. _asm{
  2683. cmp ebx, GL_NICEST
  2684. je doNicest
  2685. }
  2686. _asm{
  2687. mov eax, a
  2688. mov ecx, c
  2689. mov ebx, b
  2690. fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
  2691. fsub DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx]
  2692. // dsAC
  2693. fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ebx]
  2694. fsub DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx]
  2695. // dsBC dsAC
  2696. fld ST(1) // dsAC dsBC dsAC
  2697. fmul t2
  2698. fxch ST(2) // dsAC dsBC dsACt2
  2699. fmul t4 // dsACt4 dsBC dsACt2
  2700. fld ST(1) // dsBC dsACt4 dsBC dsACt2
  2701. fmul t1 // dsBCt1 dsACt4 dsBC dsACt2
  2702. fxch ST(2) // dsBC dsACt4 dsBCt1 dsACt2
  2703. fmul t3 // dsBCt3 dsACt4 dsBCt1 dsACt2
  2704. fxch ST(2) // dsBCt1 dsACt4 dsBCt3 dsACt2
  2705. fsubp ST(3), ST // dsACt4 dsBCt3 dsACBC
  2706. fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ebx]
  2707. fsub DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx]
  2708. // dtBC dsACt4 dsBCt3 dsACBC
  2709. fxch ST(1) // dsACt4 dtBC dsBCt3 dsACBC
  2710. fsubp ST(2), ST // dtBC dsBCAC dsACBC
  2711. fxch ST(2) // dsACBC dsBCAC dtBC
  2712. fst DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  2713. // dsdx dsBCAC dtBC
  2714. fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
  2715. fsub DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx]
  2716. // dtAC dsdx dsBCAC dtBC
  2717. fxch ST(2) // dsBCAC dsdx dtAC dtBC
  2718. fstp DWORD PTR [OFFSET(SHADER.dsdy)][edx]
  2719. // dsdx dtAC dtBC
  2720. fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
  2721. // deltaS dtAC dtBC
  2722. fxch ST(2) // dtBC dtAC deltaS
  2723. fld ST(1) // dtAC dtBC dtAC deltaS
  2724. fmul t2 // dtACt2 dtBC dtAC deltaS
  2725. fxch ST(2) // dtAC dtBC dtACt2 deltaS
  2726. fmul t4 // dtACt4 dtBC dtACt2 deltaS
  2727. fld ST(1) // dtBC dtACt4 dtBC dtACt2 deltaS
  2728. fmul t1 // dtBCt1 dtACt4 dtBC dtACt2 deltaS
  2729. fxch ST(2) // dtBC dtACt4 dtBCt1 dtACt2 deltaS
  2730. fmul t3 // dtBCt3 dtACt4 dtBCt1 dtACt2 deltaS
  2731. fxch ST(2) // dtBCt1 dtACt4 dtBCt3 dtACt2 deltaS
  2732. fsubp ST(3), ST // dtACt4 dtBCt3 dtACBC deltaS
  2733. fxch ST(3) // deltaS dtBCt3 dtACBC dtACt4
  2734. fistp DWORD PTR [OFFSET(SPANDELTA.s)][edx]
  2735. // dtBCt3 dtACBC dtACt4
  2736. fsubrp ST(2), ST // dtACBC dtBCAC
  2737. fst DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  2738. fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
  2739. fxch ST(1) // dtBCAC deltaT
  2740. fstp DWORD PTR [OFFSET(SHADER.dtdy)][edx]
  2741. mov eax, [OFFSET(SPANDELTA.s)][edx]
  2742. fistp DWORD PTR [OFFSET(SPANDELTA.t)][edx]
  2743. shl eax, TEX_SUBDIV_LOG2
  2744. mov ebx, [OFFSET(SPANDELTA.t)][edx]
  2745. shl ebx, TEX_SUBDIV_LOG2
  2746. mov [OFFSET(GENGCACCEL.sStepX)][edx], eax
  2747. mov [OFFSET(GENGCACCEL.tStepX)][edx], ebx
  2748. #if !FORCE_NPX_DEBUG
  2749. jmp texDone
  2750. #endif
  2751. }
  2752. #if FORCE_NPX_DEBUG
  2753. {
  2754. __GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv;
  2755. __GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC;
  2756. __GLfloat dsdx, dsdy;
  2757. __GLfloat dtdx, dtdy;
  2758. LONG spanDeltaS, spanDeltaT;
  2759. dsAC = a->texture.x - c->texture.x;
  2760. dsBC = b->texture.x - c->texture.x;
  2761. dsdx = dsAC * t2 - dsBC * t1;
  2762. dsdy = dsBC * t3 - dsAC * t4;
  2763. dtAC = a->texture.y - c->texture.y;
  2764. dtBC = b->texture.y - c->texture.y;
  2765. dtdx = dtAC * t2 - dtBC * t1;
  2766. dtdy = dtBC * t3 - dtAC * t4;
  2767. spanDeltaS = FTOL(dsdx * GENACCEL(gc).texXScale);
  2768. spanDeltaT = FTOL(dtdx * GENACCEL(gc).texYScale);
  2769. if (gc->polygon.shader.dsdx != dsdx)
  2770. DbgPrint("dsdx %f %f\n", dsdx, gc->polygon.shader.dsdx);
  2771. if (gc->polygon.shader.dsdy != dsdy)
  2772. DbgPrint("dsdy %f %f\n", dsdy, gc->polygon.shader.dsdy);
  2773. if (gc->polygon.shader.dtdx != dtdx)
  2774. DbgPrint("dtdx %f %f\n", dtdx, gc->polygon.shader.dtdx);
  2775. if (gc->polygon.shader.dtdy != dtdy)
  2776. DbgPrint("dtdy %f %f\n", dtdy, gc->polygon.shader.dtdy);
  2777. if (spanDeltaS != GENACCEL(gc).spanDelta.s)
  2778. DbgPrint("spanDelta.s %x %x\n", spanDeltaS, GENACCEL(gc).spanDelta.s);
  2779. if (spanDeltaT != GENACCEL(gc).spanDelta.t)
  2780. DbgPrint("spanDelta.t %x %x\n", spanDeltaT, GENACCEL(gc).spanDelta.t);
  2781. }
  2782. _asm {
  2783. mov edx, gc
  2784. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2785. jmp texDone
  2786. }
  2787. #endif // FORCE_NPX_DEBUG
  2788. doNicest:
  2789. // LATER - remove store/read of dsdx, dydx
  2790. _asm{
  2791. mov ecx, c
  2792. mov ebx, b
  2793. mov eax, a
  2794. fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx] // sc
  2795. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx]
  2796. fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
  2797. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // dsA sc
  2798. fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ebx]
  2799. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx]
  2800. // dsB dsA sc
  2801. fxch ST(2) // sc dsA dsB
  2802. fsub ST(1), ST // sc dsAC dsB
  2803. fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx] // tcwinv
  2804. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx]
  2805. // tc sc dsAC dsB
  2806. fxch ST(1) // sc tc dsAC dsB
  2807. fsubp ST(3), ST // tc dsAC dsBC
  2808. fxch ST(2) // dsBC dsAC tc
  2809. fld ST(1) // dsAC dsBC dsAC tc
  2810. fmul t2
  2811. fxch ST(2) // dsAC dsBC dsACt2 tc
  2812. fmul t4 // dsACt4 dsBC dsACt2 tc
  2813. fld ST(1) // dsBC dsACt4 dsBC dsACt2 tc
  2814. fmul t1 // dsBCt1 dsACt4 dsBC dsACt2 tc
  2815. fxch ST(2) // dsBC dsACt4 dsBCt1 dsACt2 tc
  2816. fmul t3 // dsBCt3 dsACt4 dsBCt1 dsACt2 tc
  2817. fxch ST(2) // dsBCt1 dsACt4 dsBCt3 dsACt2 tc
  2818. fsubp ST(3), ST // dsACt4 dsBCt3 dsACBC tc
  2819. fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
  2820. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
  2821. // dtA dsACt4 dsBCt3 dsACBC tc
  2822. fxch ST(1) // dsACt4 dtA dsBCt3 dsACBC tc
  2823. fsubp ST(2), ST // dtA dsBCAC dsACBC tc
  2824. fxch ST(2) // dsACBC dsBCAC dtA tc
  2825. fstp DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  2826. // dsBCAC dtA tc
  2827. fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ebx]
  2828. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx]
  2829. // dtB dsBCAC dtA tc
  2830. fxch ST(1) // dsBCAC dtB dtA tc
  2831. fstp DWORD PTR [OFFSET(SHADER.dsdy)][edx]
  2832. // dtB dtA tc
  2833. fxch ST(2) // tc dtA dtB
  2834. fsub ST(1), ST // tc dtAC dtB
  2835. fsubp ST(2), ST // dtAC dtBC
  2836. fld DWORD PTR [OFFSET(__GLvertex.texture.w)][ecx]
  2837. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx]
  2838. // qw dtAC dtBC
  2839. fxch ST(2) // dtBC dtAC qw
  2840. fld ST(1) // dtAC dtBC dtAC qw
  2841. fmul t2 // dtACt2 dtBC dtAC qw
  2842. fxch ST(2) // dtAC dtBC dtACt2 qw
  2843. fmul t4 // dtACt4 dtBC dtACt2 qw
  2844. fld ST(1) // dtBC dtACt4 dtBC dtACt2 qw
  2845. fmul t1 // dtBCt1 dtACt4 dtBC dtACt2 qw
  2846. fxch ST(2) // dtBC dtACt4 dtBCt1 dtACt2 qw
  2847. fmul t3 // dtBCt3 dtACt4 dtBCt1 dtACt2 qw
  2848. fxch ST(2) // dtBCt1 dtACt4 dtBCt3 dtACt2 qw
  2849. fsubp ST(3), ST // dtACt4 dtBCt3 dtACBC qw
  2850. fld DWORD PTR [OFFSET(__GLvertex.texture.w)][eax]
  2851. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
  2852. // dqA dtACt4 dtBCt3 dtACBC qw
  2853. fxch ST(1) // dtACt4 dqA dtBCt3 dtACBC qw
  2854. fsubp ST(2), ST // dqA dtBCAC dtACBC qw
  2855. fxch ST(2) // dtACBC dtBCAC dqA qw
  2856. fstp DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  2857. // dsBCAC dqA qw
  2858. fld DWORD PTR [OFFSET(__GLvertex.texture.w)][ebx]
  2859. fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx]
  2860. // dqB dsBCAC dqA qw
  2861. fxch ST(3) // qw dsBCAC dqA dqB
  2862. fsub ST(2), ST // qw dsBCAC dqAC dqB
  2863. fxch ST(1) // dsBCAC qw dqAC dqB
  2864. fstp DWORD PTR [OFFSET(SHADER.dtdy)][edx]
  2865. // qw dqAC dqB
  2866. fsubp ST(2), ST // dqAC dqBC
  2867. fxch ST(1) // dqBC dqAC
  2868. fld ST(1) // dqAC dqBC dqAC
  2869. fmul t2 // dqACt2 dqBC dqAC
  2870. fxch ST(2) // dqAC dqBC dqACt2
  2871. fmul t4 // dqACt4 dqBC dqACt2
  2872. fld ST(1) // dqBC dqACt4 dqBC dqACt2
  2873. fmul t1 // dqBCt1 dqACt4 dqBC dqACt2
  2874. fxch ST(2) // dqBC dqACt4 dqBCt1 dqACt2
  2875. fmul t3 // dqBCt3 dqACt4 dqBCt1 dqACt2
  2876. fxch ST(2) // dqBCt1 dqACt4 dqBCt3 dqACt2
  2877. fsubp ST(3), ST // dqACt4 dqBCt3 dqACBC
  2878. fxch ST(2) // dqACBC dqBCt3 dqACt4
  2879. fld DWORD PTR [OFFSET(SHADER.dsdx)][edx]
  2880. fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
  2881. // deltaS dqACBC dqBCt3 dqACt4
  2882. fxch ST(3) // dqACt4 dqACBC dqBCt3 deltaS
  2883. fsubp ST(2), ST // dqACBC dqBCAC deltaS
  2884. fld DWORD PTR [OFFSET(SHADER.dtdx)][edx]
  2885. fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
  2886. // deltaT dqACBC dqBCAC deltaS
  2887. fld __glTexSubDiv
  2888. fmul ST, ST(2)
  2889. // qwStep deltaT dqACBC dqBCAC deltaS
  2890. fxch ST(4) // deltaS deltaT dqACBC dqBCAC qwStep
  2891. fistp DWORD PTR [OFFSET(SPANDELTA.s)][edx]
  2892. fistp DWORD PTR [OFFSET(SPANDELTA.t)][edx]
  2893. // dqACBC dqBCAC qwStep
  2894. fstp DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
  2895. fstp DWORD PTR [OFFSET(SHADER.dqwdy)][edx]
  2896. mov eax, [OFFSET(SPANDELTA.s)][edx]
  2897. fstp DWORD PTR [OFFSET(GENGCACCEL.qwStepX)][edx]
  2898. shl eax, TEX_SUBDIV_LOG2
  2899. mov ebx, [OFFSET(SPANDELTA.t)][edx]
  2900. shl ebx, TEX_SUBDIV_LOG2
  2901. mov [OFFSET(GENGCACCEL.sStepX)][edx], eax
  2902. mov [OFFSET(GENGCACCEL.tStepX)][edx], ebx
  2903. }
  2904. #if FORCE_NPX_DEBUG
  2905. {
  2906. __GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv;
  2907. __GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC;
  2908. __GLfloat dsdx, dsdy;
  2909. __GLfloat dtdx, dtdy;
  2910. __GLfloat dqwdx, dqwdy;
  2911. __GLfloat qwStepX;
  2912. LONG spanDeltaS, spanDeltaT;
  2913. awinv = a->window.w;
  2914. bwinv = b->window.w;
  2915. cwinv = c->window.w;
  2916. scwinv = c->texture.x * cwinv;
  2917. tcwinv = c->texture.y * cwinv;
  2918. qwcwinv = c->texture.w * cwinv;
  2919. dsAC = a->texture.x * awinv - scwinv;
  2920. dsBC = b->texture.x * bwinv - scwinv;
  2921. dsdx = dsAC * t2 - dsBC * t1;
  2922. dsdy = dsBC * t3 - dsAC * t4;
  2923. dtAC = a->texture.y * awinv - tcwinv;
  2924. dtBC = b->texture.y * bwinv - tcwinv;
  2925. dtdx = dtAC * t2 - dtBC * t1;
  2926. dtdy = dtBC * t3 - dtAC * t4;
  2927. dqwAC = a->texture.w * awinv - qwcwinv;
  2928. dqwBC = b->texture.w * bwinv - qwcwinv;
  2929. dqwdx = dqwAC * t2 - dqwBC * t1;
  2930. dqwdy = dqwBC * t3 - dqwAC * t4;
  2931. spanDeltaS = FTOL(dsdx * GENACCEL(gc).texXScale);
  2932. spanDeltaT = FTOL(dtdx * GENACCEL(gc).texYScale);
  2933. qwStepX = (gc->polygon.shader.dqwdx * (__GLfloat)TEX_SUBDIV);
  2934. if (gc->polygon.shader.dsdx != dsdx)
  2935. DbgPrint("dsdx %f %f\n", dsdx, gc->polygon.shader.dsdx);
  2936. if (gc->polygon.shader.dsdy != dsdy)
  2937. DbgPrint("dsdy %f %f\n", dsdy, gc->polygon.shader.dsdy);
  2938. if (gc->polygon.shader.dtdx != dtdx)
  2939. DbgPrint("dtdx %f %f\n", dtdx, gc->polygon.shader.dtdx);
  2940. if (gc->polygon.shader.dtdy != dtdy)
  2941. DbgPrint("dtdy %f %f\n", dtdy, gc->polygon.shader.dtdy);
  2942. if (gc->polygon.shader.dqwdx != dqwdx)
  2943. DbgPrint("dqdx %f %f\n", dqwdx, gc->polygon.shader.dqwdx);
  2944. if (gc->polygon.shader.dqwdy != dqwdy)
  2945. DbgPrint("dqdy %f %f\n", dqwdy, gc->polygon.shader.dqwdy);
  2946. if (spanDeltaS != GENACCEL(gc).spanDelta.s)
  2947. DbgPrint("spanDelta.s %x %x\n", spanDeltaS, GENACCEL(gc).spanDelta.s);
  2948. if (spanDeltaT != GENACCEL(gc).spanDelta.t)
  2949. DbgPrint("spanDelta.t %x %x\n", spanDeltaT, GENACCEL(gc).spanDelta.t);
  2950. if (qwStepX != GENACCEL(gc).qwStepX)
  2951. DbgPrint("qwStepX %f %f\n", qwStepX, GENACCEL(gc).qwStepX);
  2952. }
  2953. _asm {
  2954. mov edx, gc
  2955. mov edi, [OFFSET(SHADER.modeFlags)][edx]
  2956. }
  2957. #endif // FORCE_NPX_DEBUG
  2958. texDone:
  2959. _asm{
  2960. test edi, __GL_SHADE_DEPTH_ITER
  2961. je noZ
  2962. mov al, oneOverAreaDone
  2963. test al, al
  2964. jne areaDoneAlready2
  2965. }
  2966. _asm{
  2967. fstp oneOverArea // finish divide
  2968. fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
  2969. fmul oneOverArea
  2970. fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
  2971. fmul oneOverArea // dyBC dyAC
  2972. fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
  2973. fmul oneOverArea // dxAC dyBC dyAC
  2974. fxch ST(1) // dyBC dxAC dyAC
  2975. fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
  2976. fmul oneOverArea // dxBC dyBC dxAC dyAC
  2977. fxch ST(3) // dyAC dyBC dxAC dxBC
  2978. fstp t1
  2979. inc eax
  2980. fstp t2
  2981. mov oneOverAreaDone, al
  2982. fstp t3
  2983. fstp t4
  2984. }
  2985. areaDoneAlready2:
  2986. _asm{
  2987. mov ecx, c
  2988. mov eax, a
  2989. mov ebx, b
  2990. fld DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
  2991. fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
  2992. fld DWORD PTR [OFFSET(__GLvertex.window.z)][ebx]
  2993. fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
  2994. // dzBC dzAC
  2995. fld ST(1) // dzAC dzBC dzAC
  2996. fmul t2 // ACt2 dzBC dzAC
  2997. fld ST(1) // dzBC ACt2 dzBC dzAC
  2998. fmul t1 // BCt1 ACt2 dzBC dzAC
  2999. fxch ST(3) // dzAC ACt2 dzBC BCt1
  3000. fmul t4 // ACt4 ACt2 dzBC BCt1
  3001. fxch ST(2) // dzBC ACt2 ACt4 BCt1
  3002. fmul t3 // BCt3 ACt2 ACt4 BCt1
  3003. fsubrp ST(2),ST // ACt2 BCAC BCt1
  3004. fsubrp ST(2),ST // BCAC ACBC
  3005. fxch ST(1) // ACBC BCAC
  3006. // dzdx dzdy
  3007. fld ST(0) // dzdx dzdx dzdy
  3008. fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
  3009. // dzdxS dzdx dzdy
  3010. fxch ST(2) // dzdy dzdx dzdxS
  3011. fstp DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
  3012. fstp DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
  3013. fistp temp
  3014. mov ebx, DWORD PTR temp
  3015. mov DWORD PTR [OFFSET(SHADER.dzdx)][edx], ebx
  3016. mov DWORD PTR [OFFSET(SPANDELTA.z)][edx], ebx
  3017. #if !FORCE_NPX_DEBUG
  3018. jmp deltaDone
  3019. #endif
  3020. }
  3021. #if FORCE_NPX_DEBUG
  3022. {
  3023. __GLfloat dzAC, dzBC;
  3024. __GLfloat dzdxf;
  3025. __GLfloat dzdyf;
  3026. ULONG spanDeltaZ;
  3027. dzAC = a->window.z - c->window.z;
  3028. dzBC = b->window.z - c->window.z;
  3029. dzdxf = dzAC * t2 - dzBC * t1;
  3030. dzdyf = dzBC * t3 - dzAC * t4;
  3031. spanDeltaZ = FTOL(dzdxf * GENACCEL(gc).zScale);
  3032. if (dzdxf != gc->polygon.shader.dzdxf)
  3033. DbgPrint("dzdxf %f %f\n", dzdxf, gc->polygon.shader.dzdxf);
  3034. if (dzdyf != gc->polygon.shader.dzdyf)
  3035. DbgPrint("dzdyf %f %f\n", dzdyf, gc->polygon.shader.dzdyf);
  3036. if (spanDeltaZ != GENACCEL(gc).spanDelta.z)
  3037. DbgPrint("spanDeltaZ %x %x\n", spanDeltaZ, GENACCEL(gc).spanDelta.z);
  3038. }
  3039. #endif // FORCE_NPX_DEBUG
  3040. noZ:
  3041. _asm{
  3042. mov al, oneOverAreaDone
  3043. test al, al
  3044. jne deltaDone
  3045. fstp ST(0)
  3046. }
  3047. deltaDone:
  3048. return;
  3049. #else
  3050. /* Pre-compute one over polygon area */
  3051. __GL_FLOAT_BEGIN_DIVIDE(__glOne, gc->polygon.shader.area, &oneOverArea);
  3052. oneOverAreaDone = GL_FALSE;
  3053. /*
  3054. ** Compute delta values for unit changes in x or y for each
  3055. ** parameter.
  3056. */
  3057. GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastTexSpanFuncPtr;
  3058. if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) &&
  3059. ((gc->state.texture.env[0].mode == GL_REPLACE) ||
  3060. (gc->state.texture.env[0].mode == GL_DECAL))) {
  3061. GENACCEL(gc).spanValue.r = GENACCEL(gc).constantR;
  3062. GENACCEL(gc).spanValue.g = GENACCEL(gc).constantG;
  3063. GENACCEL(gc).spanValue.b = GENACCEL(gc).constantB;
  3064. GENACCEL(gc).spanValue.a = GENACCEL(gc).constantA;
  3065. } else if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
  3066. __GLfloat drAC, dgAC, dbAC, daAC;
  3067. __GLfloat drBC, dgBC, dbBC, daBC;
  3068. __GLcolor *ac, *bc, *cc;
  3069. oneOverAreaDone = GL_TRUE;
  3070. ac = a->color;
  3071. bc = b->color;
  3072. cc = c->color;
  3073. drAC = ac->r - cc->r;
  3074. drBC = bc->r - cc->r;
  3075. dgAC = ac->g - cc->g;
  3076. dgBC = bc->g - cc->g;
  3077. dbAC = ac->b - cc->b;
  3078. dbBC = bc->b - cc->b;
  3079. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  3080. t1 = gc->polygon.shader.dyAC * oneOverArea;
  3081. t2 = gc->polygon.shader.dyBC * oneOverArea;
  3082. t3 = gc->polygon.shader.dxAC * oneOverArea;
  3083. t4 = gc->polygon.shader.dxBC * oneOverArea;
  3084. gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
  3085. gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
  3086. gc->polygon.shader.dgdx = dgAC * t2 - dgBC * t1;
  3087. gc->polygon.shader.dgdy = dgBC * t3 - dgAC * t4;
  3088. gc->polygon.shader.dbdx = dbAC * t2 - dbBC * t1;
  3089. gc->polygon.shader.dbdy = dbBC * t3 - dbAC * t4;
  3090. GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx);
  3091. GENACCEL(gc).spanDelta.g = FLT_TO_FIX(gc->polygon.shader.dgdx);
  3092. GENACCEL(gc).spanDelta.b = FLT_TO_FIX(gc->polygon.shader.dbdx);
  3093. if (gc->state.enables.general & __GL_BLEND_ENABLE) {
  3094. daAC = ac->a - cc->a;
  3095. daBC = bc->a - cc->a;
  3096. gc->polygon.shader.dadx = daAC * t2 - daBC * t1;
  3097. gc->polygon.shader.dady = daBC * t3 - daAC * t4;
  3098. GENACCEL(gc).spanDelta.a =
  3099. FTOL(gc->polygon.shader.dadx * GENACCEL(gc).aAccelScale);
  3100. }
  3101. #ifdef GENERIC_CAN_BLEND
  3102. //!! Note: this is not enabled in the assembly code above
  3103. if ( ((GENACCEL(gc).spanDelta.r | GENACCEL(gc).spanDelta.g | GENACCEL(gc).spanDelta.b) == 0)
  3104. && ((GENACCEL(gc).flags & GEN_FASTZBUFFER) == 0)
  3105. ) {
  3106. GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr;
  3107. } else {
  3108. GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastSmoothSpanFuncPtr;
  3109. }
  3110. #endif
  3111. } else {
  3112. __GLcolor *flatColor = gc->vertex.provoking->color;
  3113. GENACCEL(gc).spanValue.r = FLT_TO_FIX(flatColor->r);
  3114. GENACCEL(gc).spanValue.g = FLT_TO_FIX(flatColor->g);
  3115. GENACCEL(gc).spanValue.b = FLT_TO_FIX(flatColor->b);
  3116. if (gc->state.enables.general & __GL_BLEND_ENABLE)
  3117. GENACCEL(gc).spanValue.a = FTOL(flatColor->a * GENACCEL(gc).aAccelScale);
  3118. #ifdef GENERIC_CAN_BLEND
  3119. //!! Note: this is not enabled in the assembly code above
  3120. GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr;
  3121. #endif
  3122. }
  3123. if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) && (GENACCEL(gc).texImage)) {
  3124. __GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv;
  3125. __GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC;
  3126. #ifdef GENERIC_CAN_BLEND
  3127. GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastTexSpanFuncPtr;
  3128. #endif
  3129. if (!oneOverAreaDone)
  3130. {
  3131. oneOverAreaDone = GL_TRUE;
  3132. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  3133. t1 = gc->polygon.shader.dyAC * oneOverArea;
  3134. t2 = gc->polygon.shader.dyBC * oneOverArea;
  3135. t3 = gc->polygon.shader.dxAC * oneOverArea;
  3136. t4 = gc->polygon.shader.dxBC * oneOverArea;
  3137. }
  3138. if (gc->state.hints.perspectiveCorrection != GL_NICEST) {
  3139. dsAC = a->texture.x - c->texture.x;
  3140. dsBC = b->texture.x - c->texture.x;
  3141. gc->polygon.shader.dsdx = dsAC * t2 - dsBC * t1;
  3142. gc->polygon.shader.dsdy = dsBC * t3 - dsAC * t4;
  3143. dtAC = a->texture.y - c->texture.y;
  3144. dtBC = b->texture.y - c->texture.y;
  3145. gc->polygon.shader.dtdx = dtAC * t2 - dtBC * t1;
  3146. gc->polygon.shader.dtdy = dtBC * t3 - dtAC * t4;
  3147. GENACCEL(gc).spanDelta.s =
  3148. FTOL(gc->polygon.shader.dsdx * GENACCEL(gc).texXScale);
  3149. GENACCEL(gc).spanDelta.t =
  3150. FTOL(gc->polygon.shader.dtdx * GENACCEL(gc).texYScale);
  3151. GENACCEL(gc).sStepX = (GENACCEL(gc).spanDelta.s * TEX_SUBDIV);
  3152. GENACCEL(gc).tStepX = (GENACCEL(gc).spanDelta.t * TEX_SUBDIV);
  3153. } else {
  3154. awinv = a->window.w;
  3155. bwinv = b->window.w;
  3156. cwinv = c->window.w;
  3157. scwinv = c->texture.x * cwinv;
  3158. tcwinv = c->texture.y * cwinv;
  3159. qwcwinv = c->texture.w * cwinv;
  3160. dsAC = a->texture.x * awinv - scwinv;
  3161. dsBC = b->texture.x * bwinv - scwinv;
  3162. gc->polygon.shader.dsdx = dsAC * t2 - dsBC * t1;
  3163. gc->polygon.shader.dsdy = dsBC * t3 - dsAC * t4;
  3164. dtAC = a->texture.y * awinv - tcwinv;
  3165. dtBC = b->texture.y * bwinv - tcwinv;
  3166. gc->polygon.shader.dtdx = dtAC * t2 - dtBC * t1;
  3167. gc->polygon.shader.dtdy = dtBC * t3 - dtAC * t4;
  3168. dqwAC = a->texture.w * awinv - qwcwinv;
  3169. dqwBC = b->texture.w * bwinv - qwcwinv;
  3170. gc->polygon.shader.dqwdx = dqwAC * t2 - dqwBC * t1;
  3171. gc->polygon.shader.dqwdy = dqwBC * t3 - dqwAC * t4;
  3172. GENACCEL(gc).spanDelta.s = FTOL(gc->polygon.shader.dsdx * GENACCEL(gc).texXScale);
  3173. GENACCEL(gc).spanDelta.t = FTOL(gc->polygon.shader.dtdx * GENACCEL(gc).texYScale);
  3174. GENACCEL(gc).qwStepX = (gc->polygon.shader.dqwdx * (__GLfloat)TEX_SUBDIV);
  3175. GENACCEL(gc).sStepX = (GENACCEL(gc).spanDelta.s * TEX_SUBDIV);
  3176. GENACCEL(gc).tStepX = (GENACCEL(gc).spanDelta.t * TEX_SUBDIV);
  3177. }
  3178. }
  3179. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
  3180. __GLfloat dzAC, dzBC;
  3181. if (!oneOverAreaDone) {
  3182. oneOverAreaDone = GL_TRUE;
  3183. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  3184. t1 = gc->polygon.shader.dyAC * oneOverArea;
  3185. t2 = gc->polygon.shader.dyBC * oneOverArea;
  3186. t3 = gc->polygon.shader.dxAC * oneOverArea;
  3187. t4 = gc->polygon.shader.dxBC * oneOverArea;
  3188. }
  3189. dzAC = a->window.z - c->window.z;
  3190. dzBC = b->window.z - c->window.z;
  3191. gc->polygon.shader.dzdxf = dzAC * t2 - dzBC * t1;
  3192. gc->polygon.shader.dzdyf = dzBC * t3 - dzAC * t4;
  3193. GENACCEL(gc).spanDelta.z = gc->polygon.shader.dzdx =
  3194. FTOL(gc->polygon.shader.dzdxf * GENACCEL(gc).zScale);
  3195. }
  3196. if (!oneOverAreaDone)
  3197. {
  3198. // In this case the divide hasn't been terminated yet so
  3199. // we need to complete it even though we don't use the result
  3200. __GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
  3201. }
  3202. #endif // _X86_
  3203. }
  3204. /**************************************************************************\
  3205. \**************************************************************************/
  3206. void FASTCALL __fastGenFillTriangle(
  3207. __GLcontext *gc,
  3208. __GLvertex *a,
  3209. __GLvertex *b,
  3210. __GLvertex *c,
  3211. GLboolean ccw)
  3212. {
  3213. GLint aIY, bIY, cIY;
  3214. __GLfloat dxdyAC, dxdyBC, dxdyBA;
  3215. __GLfloat dx, dy;
  3216. __GLfloat invDyAB, invDyBC, invDyAC;
  3217. #if DBG && CHECK_FPU
  3218. {
  3219. USHORT cw;
  3220. __asm {
  3221. _asm fnstcw cw
  3222. _asm mov ax, cw
  3223. _asm and ah, (~0x3f)
  3224. _asm mov cw,ax
  3225. _asm fldcw cw
  3226. }
  3227. }
  3228. #endif
  3229. //
  3230. // Snap each y coordinate to its pixel center
  3231. //
  3232. aIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(a->window.y)+
  3233. __GL_VERTEX_FRAC_HALF);
  3234. cIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(c->window.y)+
  3235. __GL_VERTEX_FRAC_HALF);
  3236. if (aIY == cIY) {
  3237. return;
  3238. }
  3239. bIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(b->window.y)+
  3240. __GL_VERTEX_FRAC_HALF);
  3241. if (cIY - aIY > __GL_MAX_INV_TABLE)
  3242. goto bigTriangle;
  3243. gc->polygon.shader.cfb = gc->drawBuffer;
  3244. CASTFIX(invDyAB) = CASTFIX(invTable[CASTFIX(b->window.y) - CASTFIX(a->window.y)]) | 0x80000000;
  3245. CASTFIX(invDyBC) = CASTFIX(invTable[CASTFIX(c->window.y) - CASTFIX(b->window.y)]) | 0x80000000;
  3246. CASTFIX(invDyAC) = CASTFIX(invTable[CASTFIX(c->window.y) - CASTFIX(a->window.y)]) | 0x80000000;
  3247. //
  3248. // Calculate delta values for unit changes in x or y
  3249. //
  3250. GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c);
  3251. //
  3252. // calculate the destination address
  3253. //
  3254. GENACCEL(gc).pPix =
  3255. (BYTE *)gc->polygon.shader.cfb->buf.base
  3256. + ( gc->polygon.shader.cfb->buf.outerWidth
  3257. * (
  3258. aIY
  3259. - gc->constants.viewportYAdjust
  3260. + gc->polygon.shader.cfb->buf.yOrigin
  3261. )
  3262. )
  3263. + ( GENACCEL(gc).xMultiplier
  3264. * (
  3265. - gc->constants.viewportXAdjust
  3266. + gc->polygon.shader.cfb->buf.xOrigin
  3267. )
  3268. );
  3269. // Calculate destination Z
  3270. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3271. {
  3272. if ( gc->modes.depthBits == 32 )
  3273. {
  3274. gc->polygon.shader.zbuf =
  3275. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3276. 0, aIY);
  3277. }
  3278. else
  3279. {
  3280. gc->polygon.shader.zbuf = (__GLzValue *)
  3281. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3282. 0, aIY);
  3283. }
  3284. }
  3285. /*
  3286. ** This algorithm always fills from bottom to top, left to right.
  3287. ** Because of this, ccw triangles are inherently faster because
  3288. ** the parameter values need not be recomputed.
  3289. */
  3290. if (ccw)
  3291. {
  3292. dy = (aIY + __glHalf) - a->window.y;
  3293. dxdyAC = gc->polygon.shader.dxAC * invDyAC;
  3294. GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC);
  3295. dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
  3296. GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
  3297. if (aIY != bIY)
  3298. {
  3299. dxdyBA = (a->window.x - b->window.x) * invDyAB;
  3300. GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA);
  3301. if (bIY == cIY)
  3302. gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
  3303. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3304. if (bIY != cIY)
  3305. {
  3306. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3307. {
  3308. if ( gc->modes.depthBits == 32 )
  3309. {
  3310. gc->polygon.shader.zbuf = (__GLzValue *)
  3311. ((GLubyte *)gc->polygon.shader.zbuf-
  3312. (gc->polygon.shader.ixLeft << 2));
  3313. }
  3314. else
  3315. {
  3316. gc->polygon.shader.zbuf = (__GLzValue *)
  3317. ((GLubyte *)gc->polygon.shader.zbuf-
  3318. (gc->polygon.shader.ixLeft << 1));
  3319. }
  3320. }
  3321. }
  3322. }
  3323. if (bIY != cIY)
  3324. {
  3325. dy = (bIY + __glHalf) - b->window.y;
  3326. dxdyBC = (b->window.x - c->window.x) * invDyBC;
  3327. GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC);
  3328. gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
  3329. GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
  3330. }
  3331. }
  3332. else
  3333. {
  3334. dy = (aIY + __glHalf) - a->window.y;
  3335. dxdyAC = gc->polygon.shader.dxAC * invDyAC;
  3336. GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC);
  3337. if (aIY != bIY)
  3338. {
  3339. dxdyBA = (a->window.x - b->window.x) * invDyAB;
  3340. GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA);
  3341. dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
  3342. GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
  3343. if (bIY == cIY)
  3344. gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
  3345. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3346. if (bIY != cIY)
  3347. {
  3348. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3349. {
  3350. if ( gc->modes.depthBits == 32 )
  3351. {
  3352. gc->polygon.shader.zbuf = (__GLzValue *)
  3353. ((GLubyte *)gc->polygon.shader.zbuf-
  3354. (gc->polygon.shader.ixLeft << 2));
  3355. }
  3356. else
  3357. {
  3358. gc->polygon.shader.zbuf = (__GLzValue *)
  3359. ((GLubyte *)gc->polygon.shader.zbuf-
  3360. (gc->polygon.shader.ixLeft << 1));
  3361. }
  3362. }
  3363. }
  3364. }
  3365. if (bIY != cIY)
  3366. {
  3367. dy = (bIY + __glHalf) - b->window.y;
  3368. dxdyBC = gc->polygon.shader.dxBC * invDyBC;
  3369. GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC);
  3370. dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x;
  3371. GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy);
  3372. gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
  3373. GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
  3374. }
  3375. }
  3376. gc->polygon.shader.modeFlags &= ~(__GL_SHADE_LAST_SUBTRI);
  3377. return;
  3378. bigTriangle:
  3379. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxAC,
  3380. gc->polygon.shader.dyAC,
  3381. &dxdyAC);
  3382. gc->polygon.shader.cfb = gc->drawBuffer;
  3383. //
  3384. // Calculate delta values for unit changes in x or y
  3385. //
  3386. GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c);
  3387. //
  3388. // calculate the destination address
  3389. //
  3390. GENACCEL(gc).pPix =
  3391. (BYTE *)gc->polygon.shader.cfb->buf.base
  3392. + ( gc->polygon.shader.cfb->buf.outerWidth
  3393. * (
  3394. aIY
  3395. - gc->constants.viewportYAdjust
  3396. + gc->polygon.shader.cfb->buf.yOrigin
  3397. )
  3398. )
  3399. + ( GENACCEL(gc).xMultiplier
  3400. * (
  3401. - gc->constants.viewportXAdjust
  3402. + gc->polygon.shader.cfb->buf.xOrigin
  3403. )
  3404. );
  3405. // Calculate destination Z
  3406. if ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) &&
  3407. aIY != bIY)
  3408. {
  3409. if ( gc->modes.depthBits == 32 )
  3410. {
  3411. gc->polygon.shader.zbuf =
  3412. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3413. 0, aIY);
  3414. }
  3415. else
  3416. {
  3417. gc->polygon.shader.zbuf = (__GLzValue *)
  3418. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3419. 0, aIY);
  3420. }
  3421. }
  3422. /*
  3423. ** This algorithm always fills from bottom to top, left to right.
  3424. ** Because of this, ccw triangles are inherently faster because
  3425. ** the parameter values need not be recomputed.
  3426. */
  3427. if (ccw)
  3428. {
  3429. dy = (aIY + __glHalf) - a->window.y;
  3430. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
  3431. GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC);
  3432. dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
  3433. GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
  3434. if (aIY != bIY)
  3435. {
  3436. dxdyBA = (a->window.x - b->window.x) /
  3437. (a->window.y - b->window.y);
  3438. GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA);
  3439. if (bIY != cIY)
  3440. {
  3441. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3442. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3443. gc->polygon.shader.dyBC,
  3444. &dxdyBC);
  3445. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3446. {
  3447. if ( gc->modes.depthBits == 32 )
  3448. {
  3449. gc->polygon.shader.zbuf = (__GLzValue *)
  3450. ((GLubyte *)gc->polygon.shader.zbuf-
  3451. (gc->polygon.shader.ixLeft << 2));
  3452. }
  3453. else
  3454. {
  3455. gc->polygon.shader.zbuf = (__GLzValue *)
  3456. ((GLubyte *)gc->polygon.shader.zbuf-
  3457. (gc->polygon.shader.ixLeft << 1));
  3458. }
  3459. }
  3460. }
  3461. else
  3462. {
  3463. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3464. }
  3465. }
  3466. else if (bIY != cIY)
  3467. {
  3468. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3469. {
  3470. if ( gc->modes.depthBits == 32 )
  3471. {
  3472. gc->polygon.shader.zbuf =
  3473. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3474. 0, bIY);
  3475. }
  3476. else
  3477. {
  3478. gc->polygon.shader.zbuf = (__GLzValue *)
  3479. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3480. 0, bIY);
  3481. }
  3482. }
  3483. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3484. gc->polygon.shader.dyBC,
  3485. &dxdyBC);
  3486. }
  3487. if (bIY != cIY)
  3488. {
  3489. dy = (bIY + __glHalf) - b->window.y;
  3490. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
  3491. GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC);
  3492. GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
  3493. }
  3494. }
  3495. else
  3496. {
  3497. dy = (aIY + __glHalf) - a->window.y;
  3498. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
  3499. GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC);
  3500. if (aIY != bIY)
  3501. {
  3502. dxdyBA = (a->window.x - b->window.x) /
  3503. (a->window.y - b->window.y);
  3504. GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA);
  3505. dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
  3506. GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
  3507. if (bIY != cIY)
  3508. {
  3509. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3510. gc->polygon.shader.dyBC,
  3511. &dxdyBC);
  3512. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3513. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3514. {
  3515. if ( gc->modes.depthBits == 32 )
  3516. {
  3517. gc->polygon.shader.zbuf = (__GLzValue *)
  3518. ((GLubyte *)gc->polygon.shader.zbuf-
  3519. (gc->polygon.shader.ixLeft << 2));
  3520. }
  3521. else
  3522. {
  3523. gc->polygon.shader.zbuf = (__GLzValue *)
  3524. ((GLubyte *)gc->polygon.shader.zbuf-
  3525. (gc->polygon.shader.ixLeft << 1));
  3526. }
  3527. }
  3528. }
  3529. else
  3530. {
  3531. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3532. }
  3533. }
  3534. else if (bIY != cIY)
  3535. {
  3536. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3537. {
  3538. if ( gc->modes.depthBits == 32 )
  3539. {
  3540. gc->polygon.shader.zbuf =
  3541. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3542. 0, bIY);
  3543. }
  3544. else
  3545. {
  3546. gc->polygon.shader.zbuf = (__GLzValue *)
  3547. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3548. 0, bIY);
  3549. }
  3550. }
  3551. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3552. gc->polygon.shader.dyBC,
  3553. &dxdyBC);
  3554. }
  3555. if (bIY != cIY)
  3556. {
  3557. dy = (bIY + __glHalf) - b->window.y;
  3558. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
  3559. GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC);
  3560. dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x;
  3561. GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy);
  3562. GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
  3563. }
  3564. }
  3565. CHOP_ROUND_OFF();
  3566. }
  3567. /**************************************************************************\
  3568. * __fastGenMcdFillTriangle
  3569. *
  3570. * Just like __fastGenFillTriangle, except that the floating point macros
  3571. * __GL_FLOAT_BEGIN_DIVIDE and __GL_FLOAT_SIMPLE_END_DIVIDE are not allowed
  3572. * to straddle a function call to the driver (i.e., __fastFillSubTrianglePtr
  3573. * calls the display driver span functions if direct frame buffer access is
  3574. * not available.
  3575. \**************************************************************************/
  3576. void FASTCALL __fastGenMcdFillTriangle(
  3577. __GLcontext *gc,
  3578. __GLvertex *a,
  3579. __GLvertex *b,
  3580. __GLvertex *c,
  3581. GLboolean ccw)
  3582. {
  3583. GLint aIY, bIY, cIY;
  3584. __GLfloat dxdyAC, dxdyBC, dxdyBA;
  3585. __GLfloat dx, dy;
  3586. CHOP_ROUND_ON();
  3587. //
  3588. // Calculate delta values for unit changes in x or y
  3589. //
  3590. GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c);
  3591. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxAC,
  3592. gc->polygon.shader.dyAC,
  3593. &dxdyAC);
  3594. //
  3595. // can this be moved up even farther?
  3596. //
  3597. gc->polygon.shader.cfb = gc->drawBuffer;
  3598. //
  3599. // Snap each y coordinate to its pixel center
  3600. //
  3601. aIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(a->window.y)+
  3602. __GL_VERTEX_FRAC_HALF);
  3603. bIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(b->window.y)+
  3604. __GL_VERTEX_FRAC_HALF);
  3605. cIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(c->window.y)+
  3606. __GL_VERTEX_FRAC_HALF);
  3607. //
  3608. // calculate the destination address
  3609. //
  3610. GENACCEL(gc).pPix =
  3611. (BYTE *)gc->polygon.shader.cfb->buf.base
  3612. + ( gc->polygon.shader.cfb->buf.outerWidth
  3613. * (
  3614. aIY
  3615. - gc->constants.viewportYAdjust
  3616. + gc->polygon.shader.cfb->buf.yOrigin
  3617. )
  3618. )
  3619. + ( GENACCEL(gc).xMultiplier
  3620. * (
  3621. - gc->constants.viewportXAdjust
  3622. + gc->polygon.shader.cfb->buf.xOrigin
  3623. )
  3624. );
  3625. // Calculate destination Z
  3626. if ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) &&
  3627. aIY != bIY)
  3628. {
  3629. if ( gc->modes.depthBits == 32 )
  3630. {
  3631. gc->polygon.shader.zbuf =
  3632. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3633. 0, aIY);
  3634. }
  3635. else
  3636. {
  3637. gc->polygon.shader.zbuf = (__GLzValue *)
  3638. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3639. 0, aIY);
  3640. }
  3641. }
  3642. /*
  3643. ** This algorithm always fills from bottom to top, left to right.
  3644. ** Because of this, ccw triangles are inherently faster because
  3645. ** the parameter values need not be recomputed.
  3646. */
  3647. if (ccw)
  3648. {
  3649. dy = (aIY + __glHalf) - a->window.y;
  3650. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
  3651. GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC);
  3652. dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
  3653. GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
  3654. if (aIY != bIY)
  3655. {
  3656. dxdyBA = (a->window.x - b->window.x) /
  3657. (a->window.y - b->window.y);
  3658. GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA);
  3659. if (bIY != cIY)
  3660. {
  3661. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3662. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3663. gc->polygon.shader.dyBC,
  3664. &dxdyBC);
  3665. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3666. {
  3667. if ( gc->modes.depthBits == 32 )
  3668. {
  3669. gc->polygon.shader.zbuf = (__GLzValue *)
  3670. ((GLubyte *)gc->polygon.shader.zbuf-
  3671. (gc->polygon.shader.ixLeft << 2));
  3672. }
  3673. else
  3674. {
  3675. gc->polygon.shader.zbuf = (__GLzValue *)
  3676. ((GLubyte *)gc->polygon.shader.zbuf-
  3677. (gc->polygon.shader.ixLeft << 1));
  3678. }
  3679. }
  3680. }
  3681. else
  3682. {
  3683. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3684. }
  3685. }
  3686. else if (bIY != cIY)
  3687. {
  3688. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3689. {
  3690. if ( gc->modes.depthBits == 32 )
  3691. {
  3692. gc->polygon.shader.zbuf =
  3693. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3694. 0, bIY);
  3695. }
  3696. else
  3697. {
  3698. gc->polygon.shader.zbuf = (__GLzValue *)
  3699. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3700. 0, bIY);
  3701. }
  3702. }
  3703. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3704. gc->polygon.shader.dyBC,
  3705. &dxdyBC);
  3706. }
  3707. if (bIY != cIY)
  3708. {
  3709. dy = (bIY + __glHalf) - b->window.y;
  3710. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
  3711. GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC);
  3712. GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
  3713. }
  3714. }
  3715. else
  3716. {
  3717. dy = (aIY + __glHalf) - a->window.y;
  3718. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
  3719. GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC);
  3720. if (aIY != bIY)
  3721. {
  3722. dxdyBA = (a->window.x - b->window.x) /
  3723. (a->window.y - b->window.y);
  3724. GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA);
  3725. dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
  3726. GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
  3727. if (bIY != cIY)
  3728. {
  3729. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3730. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3731. gc->polygon.shader.dyBC,
  3732. &dxdyBC);
  3733. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3734. {
  3735. if ( gc->modes.depthBits == 32 )
  3736. {
  3737. gc->polygon.shader.zbuf = (__GLzValue *)
  3738. ((GLubyte *)gc->polygon.shader.zbuf-
  3739. (gc->polygon.shader.ixLeft << 2));
  3740. }
  3741. else
  3742. {
  3743. gc->polygon.shader.zbuf = (__GLzValue *)
  3744. ((GLubyte *)gc->polygon.shader.zbuf-
  3745. (gc->polygon.shader.ixLeft << 1));
  3746. }
  3747. }
  3748. }
  3749. else
  3750. {
  3751. GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
  3752. }
  3753. }
  3754. else if (bIY != cIY)
  3755. {
  3756. if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
  3757. {
  3758. if ( gc->modes.depthBits == 32 )
  3759. {
  3760. gc->polygon.shader.zbuf =
  3761. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
  3762. 0, bIY);
  3763. }
  3764. else
  3765. {
  3766. gc->polygon.shader.zbuf = (__GLzValue *)
  3767. __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
  3768. 0, bIY);
  3769. }
  3770. }
  3771. __GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
  3772. gc->polygon.shader.dyBC,
  3773. &dxdyBC);
  3774. }
  3775. if (bIY != cIY)
  3776. {
  3777. dy = (bIY + __glHalf) - b->window.y;
  3778. __GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
  3779. GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC);
  3780. dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x;
  3781. GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy);
  3782. GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
  3783. }
  3784. }
  3785. CHOP_ROUND_OFF();
  3786. }