Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

901 lines
28 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. // $Id$
  3. #include "raytrace.h"
  4. #include <filesystem_tools.h>
  5. #include <cmdlib.h>
  6. #include <stdio.h>
  7. static bool SameSign(float a, float b)
  8. {
  9. int32 aa=*((int *) &a);
  10. int32 bb=*((int *) &b);
  11. return ((aa^bb)&0x80000000)==0;
  12. }
  13. int FourRays::CalculateDirectionSignMask(void) const
  14. {
  15. // this code treats the floats as integers since all it cares about is the sign bit and
  16. // floating point compares suck.
  17. int ret;
  18. int ormask;
  19. int andmask;
  20. int32 const *treat_as_int=((int32 const *) (&direction));
  21. ormask=andmask=*(treat_as_int++);
  22. ormask|=*treat_as_int;
  23. andmask&=*(treat_as_int++);
  24. ormask|=*(treat_as_int);
  25. andmask&=*(treat_as_int++);
  26. ormask|=*(treat_as_int);
  27. andmask&=*(treat_as_int++);
  28. if (ormask>=0)
  29. ret=0;
  30. else
  31. {
  32. if (andmask<0)
  33. ret=1;
  34. else return -1;
  35. }
  36. ormask=andmask=*(treat_as_int++);
  37. ormask|=*treat_as_int;
  38. andmask&=*(treat_as_int++);
  39. ormask|=*(treat_as_int);
  40. andmask&=*(treat_as_int++);
  41. ormask|=*(treat_as_int);
  42. andmask&=*(treat_as_int++);
  43. if (ormask<0)
  44. {
  45. if (andmask<0)
  46. ret|=2;
  47. else return -1;
  48. }
  49. ormask=andmask=*(treat_as_int++);
  50. ormask|=*treat_as_int;
  51. andmask&=*(treat_as_int++);
  52. ormask|=*(treat_as_int);
  53. andmask&=*(treat_as_int++);
  54. ormask|=*(treat_as_int);
  55. andmask&=*(treat_as_int++);
  56. if (ormask<0)
  57. {
  58. if (andmask<0)
  59. ret|=4;
  60. else return -1;
  61. }
  62. return ret;
  63. }
  64. void RayTracingEnvironment::MakeRoomForTriangles( int ntris )
  65. {
  66. //OptimizedTriangleList.EnsureCapacity( ntris );
  67. if (! (Flags & RTE_FLAGS_DONT_STORE_TRIANGLE_COLORS))
  68. TriangleColors.EnsureCapacity( ntris );
  69. }
  70. void RayTracingEnvironment::AddTriangle(int32 id, const Vector &v1,
  71. const Vector &v2, const Vector &v3,
  72. const Vector &color)
  73. {
  74. AddTriangle( id, v1, v2, v3, color, 0, 0 );
  75. }
  76. void RayTracingEnvironment::AddTriangle(int32 id, const Vector &v1,
  77. const Vector &v2, const Vector &v3,
  78. const Vector &color, uint16 flags, int32 materialIndex)
  79. {
  80. CacheOptimizedTriangle tmptri;
  81. tmptri.m_Data.m_GeometryData.m_nTriangleID = id;
  82. tmptri.Vertex( 0 ) = v1;
  83. tmptri.Vertex( 1 ) = v2;
  84. tmptri.Vertex( 2 ) = v3;
  85. tmptri.m_Data.m_GeometryData.m_nFlags = flags;
  86. OptimizedTriangleList.AddToTail(tmptri);
  87. if (! ( Flags & RTE_FLAGS_DONT_STORE_TRIANGLE_COLORS) )
  88. TriangleColors.AddToTail(color);
  89. if ( !( Flags & RTE_FLAGS_DONT_STORE_TRIANGLE_MATERIALS) )
  90. TriangleMaterials.AddToTail(materialIndex);
  91. // printf("add triange from (%f %f %f),(%f %f %f),(%f %f %f) id %d\n",
  92. // XYZ(v1),XYZ(v2),XYZ(v3),id);
  93. }
  94. void RayTracingEnvironment::AddQuad(
  95. int32 id, const Vector &v1, const Vector &v2, const Vector &v3,
  96. const Vector &v4, // specify vertices in cw or ccw order
  97. const Vector &color)
  98. {
  99. AddTriangle(id,v1,v2,v3,color);
  100. AddTriangle(id+1,v1,v3,v4,color);
  101. }
  102. void RayTracingEnvironment::AddAxisAlignedRectangularSolid(int id,Vector minc, Vector maxc,
  103. const Vector &color)
  104. {
  105. // "far" face
  106. AddQuad(id,
  107. Vector(minc.x,maxc.y,maxc.z),
  108. Vector(maxc.x,maxc.y,maxc.z),Vector(maxc.x,minc.y,maxc.z),
  109. Vector(minc.x,minc.y,maxc.z),color);
  110. // "near" face
  111. AddQuad(id,
  112. Vector(minc.x,maxc.y,minc.z),
  113. Vector(maxc.x,maxc.y,minc.z),Vector(maxc.x,minc.y,minc.z),
  114. Vector(minc.x,minc.y,minc.z),color);
  115. // "left" face
  116. AddQuad(id,
  117. Vector(minc.x,maxc.y,maxc.z),
  118. Vector(minc.x,maxc.y,minc.z),
  119. Vector(minc.x,minc.y,minc.z),
  120. Vector(minc.x,minc.y,maxc.z),color);
  121. // "right" face
  122. AddQuad(id,
  123. Vector(maxc.x,maxc.y,maxc.z),
  124. Vector(maxc.x,maxc.y,minc.z),
  125. Vector(maxc.x,minc.y,minc.z),
  126. Vector(maxc.x,minc.y,maxc.z),color);
  127. // "top" face
  128. AddQuad(id,
  129. Vector(minc.x,maxc.y,maxc.z),
  130. Vector(maxc.x,maxc.y,maxc.z),
  131. Vector(maxc.x,maxc.y,minc.z),
  132. Vector(minc.x,maxc.y,minc.z),color);
  133. // "bot" face
  134. AddQuad(id,
  135. Vector(minc.x,minc.y,maxc.z),
  136. Vector(maxc.x,minc.y,maxc.z),
  137. Vector(maxc.x,minc.y,minc.z),
  138. Vector(minc.x,minc.y,minc.z),color);
  139. }
  140. static Vector GetEdgeEquation(Vector p1, Vector p2, int c1, int c2, Vector InsidePoint)
  141. {
  142. float nx=p1[c2]-p2[c2];
  143. float ny=p2[c1]-p1[c1];
  144. float d=-(nx*p1[c1]+ny*p1[c2]);
  145. // assert(fabs(nx*p1[c1]+ny*p1[c2]+d)<0.01);
  146. // assert(fabs(nx*p2[c1]+ny*p2[c2]+d)<0.01);
  147. // use the convention that negative is "outside"
  148. float trial_dist=InsidePoint[c1]*nx+InsidePoint[c2]*ny+d;
  149. if (trial_dist<0)
  150. {
  151. nx = -nx;
  152. ny = -ny;
  153. d = -d;
  154. trial_dist = -trial_dist;
  155. }
  156. nx /= trial_dist; // scale so that it will be =1.0 at the oppositve vertex
  157. ny /= trial_dist;
  158. d /= trial_dist;
  159. return Vector(nx,ny,d);
  160. }
  161. void CacheOptimizedTriangle::ChangeIntoIntersectionFormat(void)
  162. {
  163. // lose the vertices and use edge equations instead
  164. // grab the whole original triangle to we don't overwrite it
  165. TriGeometryData_t srcTri = m_Data.m_GeometryData;
  166. m_Data.m_IntersectData.m_nFlags = srcTri.m_nFlags;
  167. m_Data.m_IntersectData.m_nTriangleID = srcTri.m_nTriangleID;
  168. Vector p1 = srcTri.Vertex( 0 );
  169. Vector p2 = srcTri.Vertex( 1 );
  170. Vector p3 = srcTri.Vertex( 2 );
  171. Vector e1 = p2 - p1;
  172. Vector e2 = p3 - p1;
  173. Vector N = e1.Cross( e2 );
  174. N.NormalizeInPlace();
  175. // now, determine which axis to drop
  176. int drop_axis = 0;
  177. for(int c=1 ; c<3 ; c++)
  178. if ( fabs(N[c]) > fabs( N[drop_axis] ) )
  179. drop_axis = c;
  180. m_Data.m_IntersectData.m_flD = N.Dot( p1 );
  181. m_Data.m_IntersectData.m_flNx = N.x;
  182. m_Data.m_IntersectData.m_flNy = N.y;
  183. m_Data.m_IntersectData.m_flNz = N.z;
  184. // decide which axes to keep
  185. int nCoordSelect0 = ( drop_axis + 1 ) % 3;
  186. int nCoordSelect1 = ( drop_axis + 2 ) % 3;
  187. m_Data.m_IntersectData.m_nCoordSelect0 = nCoordSelect0;
  188. m_Data.m_IntersectData.m_nCoordSelect1 = nCoordSelect1;
  189. Vector edge1 = GetEdgeEquation( p1, p2, nCoordSelect0, nCoordSelect1, p3 );
  190. m_Data.m_IntersectData.m_ProjectedEdgeEquations[0] = edge1.x;
  191. m_Data.m_IntersectData.m_ProjectedEdgeEquations[1] = edge1.y;
  192. m_Data.m_IntersectData.m_ProjectedEdgeEquations[2] = edge1.z;
  193. Vector edge2 = GetEdgeEquation( p2, p3, nCoordSelect0, nCoordSelect1, p1 );
  194. m_Data.m_IntersectData.m_ProjectedEdgeEquations[3] = edge2.x;
  195. m_Data.m_IntersectData.m_ProjectedEdgeEquations[4] = edge2.y;
  196. m_Data.m_IntersectData.m_ProjectedEdgeEquations[5] = edge2.z;
  197. }
  198. int n_intersection_calculations=0;
  199. int CacheOptimizedTriangle::ClassifyAgainstAxisSplit(int split_plane, float split_value)
  200. {
  201. // classify a triangle against an axis-aligned plane
  202. float minc=Vertex(0)[split_plane];
  203. float maxc=minc;
  204. for(int v=1;v<3;v++)
  205. {
  206. minc=min(minc,Vertex(v)[split_plane]);
  207. maxc=max(maxc,Vertex(v)[split_plane]);
  208. }
  209. if (minc>=split_value)
  210. return PLANECHECK_POSITIVE;
  211. if (maxc<=split_value)
  212. return PLANECHECK_NEGATIVE;
  213. if (minc==maxc)
  214. return PLANECHECK_POSITIVE;
  215. return PLANECHECK_STRADDLING;
  216. }
  217. #define MAILBOX_HASH_SIZE 256
  218. #define MAX_TREE_DEPTH 21
  219. #define MAX_NODE_STACK_LEN (40*MAX_TREE_DEPTH)
  220. struct NodeToVisit {
  221. CacheOptimizedKDNode const *node;
  222. fltx4 TMin;
  223. fltx4 TMax;
  224. };
  225. static fltx4 FourEpsilons={1.0e-10,1.0e-10,1.0e-10,1.0e-10};
  226. static fltx4 FourZeros={1.0e-10,1.0e-10,1.0e-10,1.0e-10};
  227. static fltx4 FourNegativeEpsilons={-1.0e-10,-1.0e-10,-1.0e-10,-1.0e-10};
  228. static float BoxSurfaceArea(Vector const &boxmin, Vector const &boxmax)
  229. {
  230. Vector boxdim=boxmax-boxmin;
  231. return 2.0*((boxdim[0]*boxdim[2])+(boxdim[0]*boxdim[1])+(boxdim[1]*boxdim[2]));
  232. }
  233. void RayTracingEnvironment::Trace4Rays(const FourRays &rays, fltx4 TMin, fltx4 TMax,
  234. RayTracingResult *rslt_out,
  235. int32 skip_id, ITransparentTriangleCallback *pCallback)
  236. {
  237. int msk=rays.CalculateDirectionSignMask();
  238. if (msk!=-1)
  239. Trace4Rays(rays,TMin,TMax,msk,rslt_out,skip_id, pCallback);
  240. else
  241. {
  242. // sucky case - can't trace 4 rays at once. in the worst case, need to trace all 4
  243. // separately, but usually we will still get 2x, Since our tracer only does 4 at a
  244. // time, we will have to cover up the undesired rays with the desired ray
  245. //!! speed!! there is room for some sse-ization here
  246. FourRays tmprays;
  247. tmprays.origin=rays.origin;
  248. uint8 need_trace[4]={1,1,1,1};
  249. for(int try_trace=0;try_trace<4;try_trace++)
  250. {
  251. if (need_trace[try_trace])
  252. {
  253. need_trace[try_trace]=2; // going to trace it
  254. // replicate the ray being traced into all 4 rays
  255. tmprays.direction.x=ReplicateX4(rays.direction.X(try_trace));
  256. tmprays.direction.y=ReplicateX4(rays.direction.Y(try_trace));
  257. tmprays.direction.z=ReplicateX4(rays.direction.Z(try_trace));
  258. // now, see if any of the other remaining rays can be handled at the same time.
  259. for(int try2=try_trace+1;try2<4;try2++)
  260. if (need_trace[try2])
  261. {
  262. if (
  263. SameSign(rays.direction.X(try2),
  264. rays.direction.X(try_trace)) &&
  265. SameSign(rays.direction.Y(try2),
  266. rays.direction.Y(try_trace)) &&
  267. SameSign(rays.direction.Z(try2),
  268. rays.direction.Z(try_trace)))
  269. {
  270. need_trace[try2]=2;
  271. tmprays.direction.X(try2) = rays.direction.X(try2);
  272. tmprays.direction.Y(try2) = rays.direction.Y(try2);
  273. tmprays.direction.Z(try2) = rays.direction.Z(try2);
  274. }
  275. }
  276. // ok, now trace between 1 and 3 rays, and output the results
  277. RayTracingResult tmpresults;
  278. msk=tmprays.CalculateDirectionSignMask();
  279. assert(msk!=-1);
  280. Trace4Rays(tmprays,TMin,TMax,msk,&tmpresults,skip_id, pCallback);
  281. // now, move results to proper place
  282. for(int i=0;i<4;i++)
  283. if (need_trace[i]==2)
  284. {
  285. need_trace[i]=0;
  286. rslt_out->HitIds[i]=tmpresults.HitIds[i];
  287. SubFloat(rslt_out->HitDistance, i) = SubFloat(tmpresults.HitDistance, i);
  288. rslt_out->surface_normal.X(i) = tmpresults.surface_normal.X(i);
  289. rslt_out->surface_normal.Y(i) = tmpresults.surface_normal.Y(i);
  290. rslt_out->surface_normal.Z(i) = tmpresults.surface_normal.Z(i);
  291. }
  292. }
  293. }
  294. }
  295. }
  296. void RayTracingEnvironment::Trace4Rays(const FourRays &rays, fltx4 TMin, fltx4 TMax,
  297. int DirectionSignMask, RayTracingResult *rslt_out,
  298. int32 skip_id, ITransparentTriangleCallback *pCallback)
  299. {
  300. rays.Check();
  301. memset(rslt_out->HitIds,0xff,sizeof(rslt_out->HitIds));
  302. rslt_out->HitDistance=ReplicateX4(1.0e23);
  303. rslt_out->surface_normal.DuplicateVector(Vector(0.,0.,0.));
  304. FourVectors OneOverRayDir=rays.direction;
  305. OneOverRayDir.MakeReciprocalSaturate();
  306. // now, clip rays against bounding box
  307. for(int c=0;c<3;c++)
  308. {
  309. fltx4 isect_min_t=
  310. MulSIMD(SubSIMD(ReplicateX4(m_MinBound[c]),rays.origin[c]),OneOverRayDir[c]);
  311. fltx4 isect_max_t=
  312. MulSIMD(SubSIMD(ReplicateX4(m_MaxBound[c]),rays.origin[c]),OneOverRayDir[c]);
  313. TMin=MaxSIMD(TMin,MinSIMD(isect_min_t,isect_max_t));
  314. TMax=MinSIMD(TMax,MaxSIMD(isect_min_t,isect_max_t));
  315. }
  316. fltx4 active=CmpLeSIMD(TMin,TMax); // mask of which rays are active
  317. if (! IsAnyNegative(active) )
  318. return; // missed bounding box
  319. int32 mailboxids[MAILBOX_HASH_SIZE]; // used to avoid redundant triangle tests
  320. memset(mailboxids,0xff,sizeof(mailboxids)); // !!speed!! keep around?
  321. int front_idx[3],back_idx[3]; // based on ray direction, whether to
  322. // visit left or right node first
  323. if (DirectionSignMask & 1)
  324. {
  325. back_idx[0]=0;
  326. front_idx[0]=1;
  327. }
  328. else
  329. {
  330. back_idx[0]=1;
  331. front_idx[0]=0;
  332. }
  333. if (DirectionSignMask & 2)
  334. {
  335. back_idx[1]=0;
  336. front_idx[1]=1;
  337. }
  338. else
  339. {
  340. back_idx[1]=1;
  341. front_idx[1]=0;
  342. }
  343. if (DirectionSignMask & 4)
  344. {
  345. back_idx[2]=0;
  346. front_idx[2]=1;
  347. }
  348. else
  349. {
  350. back_idx[2]=1;
  351. front_idx[2]=0;
  352. }
  353. NodeToVisit NodeQueue[MAX_NODE_STACK_LEN];
  354. CacheOptimizedKDNode const *CurNode=&(OptimizedKDTree[0]);
  355. NodeToVisit *stack_ptr=&NodeQueue[MAX_NODE_STACK_LEN];
  356. while(1)
  357. {
  358. while (CurNode->NodeType() != KDNODE_STATE_LEAF) // traverse until next leaf
  359. {
  360. int split_plane_number=CurNode->NodeType();
  361. CacheOptimizedKDNode const *FrontChild=&(OptimizedKDTree[CurNode->LeftChild()]);
  362. fltx4 dist_to_sep_plane= // dist=(split-org)/dir
  363. MulSIMD(
  364. SubSIMD(ReplicateX4(CurNode->SplittingPlaneValue),
  365. rays.origin[split_plane_number]),OneOverRayDir[split_plane_number]);
  366. active=CmpLeSIMD(TMin,TMax); // mask of which rays are active
  367. // now, decide how to traverse children. can either do front,back, or do front and push
  368. // back.
  369. fltx4 hits_front=AndSIMD(active,CmpGeSIMD(dist_to_sep_plane,TMin));
  370. if (! IsAnyNegative(hits_front))
  371. {
  372. // missed the front. only traverse back
  373. //printf("only visit back %d\n",CurNode->LeftChild()+back_idx[split_plane_number]);
  374. CurNode=FrontChild+back_idx[split_plane_number];
  375. TMin=MaxSIMD(TMin, dist_to_sep_plane);
  376. }
  377. else
  378. {
  379. fltx4 hits_back=AndSIMD(active,CmpLeSIMD(dist_to_sep_plane,TMax));
  380. if (! IsAnyNegative(hits_back) )
  381. {
  382. // missed the back - only need to traverse front node
  383. //printf("only visit front %d\n",CurNode->LeftChild()+front_idx[split_plane_number]);
  384. CurNode=FrontChild+front_idx[split_plane_number];
  385. TMax=MinSIMD(TMax, dist_to_sep_plane);
  386. }
  387. else
  388. {
  389. // at least some rays hit both nodes.
  390. // must push far, traverse near
  391. //printf("visit %d,%d\n",CurNode->LeftChild()+front_idx[split_plane_number],
  392. // CurNode->LeftChild()+back_idx[split_plane_number]);
  393. assert(stack_ptr>NodeQueue);
  394. --stack_ptr;
  395. stack_ptr->node=FrontChild+back_idx[split_plane_number];
  396. stack_ptr->TMin=MaxSIMD(TMin,dist_to_sep_plane);
  397. stack_ptr->TMax=TMax;
  398. CurNode=FrontChild+front_idx[split_plane_number];
  399. TMax=MinSIMD(TMax,dist_to_sep_plane);
  400. }
  401. }
  402. }
  403. // hit a leaf! must do intersection check
  404. int ntris=CurNode->NumberOfTrianglesInLeaf();
  405. if (ntris)
  406. {
  407. int32 const *tlist=&(TriangleIndexList[CurNode->TriangleIndexStart()]);
  408. do
  409. {
  410. int tnum=*(tlist++);
  411. //printf("try tri %d\n",tnum);
  412. // check mailbox
  413. int mbox_slot=tnum & (MAILBOX_HASH_SIZE-1);
  414. TriIntersectData_t const *tri = &( OptimizedTriangleList[tnum].m_Data.m_IntersectData );
  415. if ( ( mailboxids[mbox_slot] != tnum ) && ( tri->m_nTriangleID != skip_id ) )
  416. {
  417. n_intersection_calculations++;
  418. mailboxids[mbox_slot] = tnum;
  419. // compute plane intersection
  420. FourVectors N;
  421. N.x = ReplicateX4( tri->m_flNx );
  422. N.y = ReplicateX4( tri->m_flNy );
  423. N.z = ReplicateX4( tri->m_flNz );
  424. fltx4 DDotN = rays.direction * N;
  425. // mask off zero or near zero (ray parallel to surface)
  426. fltx4 did_hit = OrSIMD( CmpGtSIMD( DDotN,FourEpsilons ),
  427. CmpLtSIMD( DDotN, FourNegativeEpsilons ) );
  428. fltx4 numerator=SubSIMD( ReplicateX4( tri->m_flD ), rays.origin * N );
  429. fltx4 isect_t=DivSIMD( numerator,DDotN );
  430. // now, we have the distance to the plane. lets update our mask
  431. did_hit = AndSIMD( did_hit, CmpGtSIMD( isect_t, FourZeros ) );
  432. //did_hit=AndSIMD(did_hit,CmpLtSIMD(isect_t,TMax));
  433. did_hit = AndSIMD( did_hit, CmpLtSIMD( isect_t, rslt_out->HitDistance ) );
  434. if ( ! IsAnyNegative( did_hit ) )
  435. continue;
  436. // now, check 3 edges
  437. fltx4 hitc1 = AddSIMD( rays.origin[tri->m_nCoordSelect0],
  438. MulSIMD( isect_t, rays.direction[ tri->m_nCoordSelect0] ) );
  439. fltx4 hitc2 = AddSIMD( rays.origin[tri->m_nCoordSelect1],
  440. MulSIMD( isect_t, rays.direction[tri->m_nCoordSelect1] ) );
  441. // do barycentric coordinate check
  442. fltx4 B0 = MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[0] ), hitc1 );
  443. B0 = AddSIMD(
  444. B0,
  445. MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[1] ), hitc2 ) );
  446. B0 = AddSIMD(
  447. B0, ReplicateX4( tri->m_ProjectedEdgeEquations[2] ) );
  448. did_hit = AndSIMD( did_hit, CmpGeSIMD( B0, FourZeros ) );
  449. fltx4 B1 = MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[3] ), hitc1 );
  450. B1 = AddSIMD(
  451. B1,
  452. MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[4]), hitc2 ) );
  453. B1 = AddSIMD(
  454. B1, ReplicateX4( tri->m_ProjectedEdgeEquations[5] ) );
  455. did_hit = AndSIMD( did_hit, CmpGeSIMD( B1, FourZeros ) );
  456. fltx4 B2 = AddSIMD( B1, B0 );
  457. did_hit = AndSIMD( did_hit, CmpLeSIMD( B2, Four_Ones ) );
  458. if ( ! IsAnyNegative( did_hit ) )
  459. continue;
  460. // if the triangle is transparent
  461. if ( tri->m_nFlags & FCACHETRI_TRANSPARENT )
  462. {
  463. if ( pCallback )
  464. {
  465. // assuming a triangle indexed as v0, v1, v2
  466. // the projected edge equations are set up such that the vert opposite the first
  467. // equation is v2, and the vert opposite the second equation is v0
  468. // Therefore we pass them back in 1, 2, 0 order
  469. // Also B2 is currently B1 + B0 and needs to be 1 - (B1+B0) in order to be a real
  470. // barycentric coordinate. Compute that now and pass it to the callback
  471. fltx4 b2 = SubSIMD( Four_Ones, B2 );
  472. if ( pCallback->VisitTriangle_ShouldContinue( *tri, rays, &did_hit, &B1, &b2, &B0, tnum ) )
  473. {
  474. did_hit = Four_Zeros;
  475. }
  476. }
  477. }
  478. // now, set the hit_id and closest_hit fields for any enabled rays
  479. fltx4 replicated_n = ReplicateIX4(tnum);
  480. StoreAlignedSIMD((float *) rslt_out->HitIds,
  481. OrSIMD(AndSIMD(replicated_n,did_hit),
  482. AndNotSIMD(did_hit,LoadAlignedSIMD(
  483. (float *) rslt_out->HitIds))));
  484. rslt_out->HitDistance=OrSIMD(AndSIMD(isect_t,did_hit),
  485. AndNotSIMD(did_hit,rslt_out->HitDistance));
  486. rslt_out->surface_normal.x=OrSIMD(
  487. AndSIMD(N.x,did_hit),
  488. AndNotSIMD(did_hit,rslt_out->surface_normal.x));
  489. rslt_out->surface_normal.y=OrSIMD(
  490. AndSIMD(N.y,did_hit),
  491. AndNotSIMD(did_hit,rslt_out->surface_normal.y));
  492. rslt_out->surface_normal.z=OrSIMD(
  493. AndSIMD(N.z,did_hit),
  494. AndNotSIMD(did_hit,rslt_out->surface_normal.z));
  495. }
  496. } while (--ntris);
  497. // now, check if all rays have terminated
  498. fltx4 raydone=CmpLeSIMD(TMax,rslt_out->HitDistance);
  499. if (! IsAnyNegative(raydone))
  500. {
  501. return;
  502. }
  503. }
  504. if (stack_ptr==&NodeQueue[MAX_NODE_STACK_LEN])
  505. {
  506. return;
  507. }
  508. // pop stack!
  509. CurNode=stack_ptr->node;
  510. TMin=stack_ptr->TMin;
  511. TMax=stack_ptr->TMax;
  512. stack_ptr++;
  513. }
  514. }
  515. int RayTracingEnvironment::MakeLeafNode(int first_tri, int last_tri)
  516. {
  517. CacheOptimizedKDNode ret;
  518. ret.Children=KDNODE_STATE_LEAF+(TriangleIndexList.Count()<<2);
  519. ret.SetNumberOfTrianglesInLeafNode(1+(last_tri-first_tri));
  520. for(int tnum=first_tri;tnum<=last_tri;tnum++)
  521. TriangleIndexList.AddToTail(tnum);
  522. OptimizedKDTree.AddToTail(ret);
  523. return OptimizedKDTree.Count()-1;
  524. }
  525. void RayTracingEnvironment::CalculateTriangleListBounds(int32 const *tris,int ntris,
  526. Vector &minout, Vector &maxout)
  527. {
  528. minout = Vector( 1.0e23, 1.0e23, 1.0e23);
  529. maxout = Vector( -1.0e23, -1.0e23, -1.0e23);
  530. for(int i=0; i<ntris; i++)
  531. {
  532. CacheOptimizedTriangle const &tri=OptimizedTriangleList[tris[i]];
  533. for(int v=0; v<3; v++)
  534. for(int c=0; c<3; c++)
  535. {
  536. minout[c]=min(minout[c],tri.Vertex(v)[c]);
  537. maxout[c]=max(maxout[c],tri.Vertex(v)[c]);
  538. }
  539. }
  540. }
  541. // Both the "quick" and regular kd tree building algorithms here use the "surface area heuristic":
  542. // the relative probability of hitting the "left" subvolume (Vl) from a split is equal to that
  543. // subvolume's surface area divided by its parent's surface area (Vp) : P(Vl | V)=SA(Vl)/SA(Vp).
  544. // The same holds for the right subvolume, Vp. Nl is the number of triangles in the left volume,
  545. // and Nr in the right volume. if Ct is the cost of traversing one tree node, and Ci is the cost of
  546. // intersection with the primitive, than the cost of splitting is estimated as:
  547. //
  548. // Ct+Ci*((SA(Vl)/SA(V))*Nl+(SA(Vr)/SA(V)*Nr)).
  549. // and the cost of not splitting is
  550. // Ci*N
  551. //
  552. // This both provides a metric to minimize when computing how and where to split, and also a
  553. // termination criterion.
  554. //
  555. // the "quick" method just splits down the middle, while the slow method splits at the best
  556. // discontinuity of the cost formula. The quick method splits along the longest axis ; the
  557. // regular algorithm tries all 3 to find which one results in the minimum cost
  558. //
  559. // both methods use the additional optimization of "growing" empty nodes - if the split results in
  560. // one side being devoid of triangles, the empty side is "grown" as much as possible.
  561. //
  562. #define COST_OF_TRAVERSAL 75 // approximate #operations
  563. #define COST_OF_INTERSECTION 167 // approximate #operations
  564. float RayTracingEnvironment::CalculateCostsOfSplit(
  565. int split_plane,int32 const *tri_list,int ntris,
  566. Vector MinBound,Vector MaxBound, float &split_value,
  567. int &nleft, int &nright, int &nboth)
  568. {
  569. // determine the costs of splitting on a given axis, and label triangles with respect to
  570. // that axis by storing the value in coordselect0. It will also return the number of
  571. // tris in the left, right, and nboth groups, in order to facilitate memory
  572. nleft=nboth=nright=0;
  573. // now, label each triangle. Since we have not converted the triangles into
  574. // intersection fromat yet, we can use the CoordSelect0 field of each as a temp.
  575. nleft=0;
  576. nright=0;
  577. nboth=0;
  578. float min_coord=1.0e23,max_coord=-1.0e23;
  579. for(int t=0;t<ntris;t++)
  580. {
  581. CacheOptimizedTriangle &tri=OptimizedTriangleList[tri_list[t]];
  582. // determine max and min coordinate values for later optimization
  583. for(int v=0;v<3;v++)
  584. {
  585. min_coord = min( min_coord, tri.Vertex(v)[split_plane] );
  586. max_coord = max( max_coord, tri.Vertex(v)[split_plane] );
  587. }
  588. switch(tri.ClassifyAgainstAxisSplit(split_plane,split_value))
  589. {
  590. case PLANECHECK_NEGATIVE:
  591. nleft++;
  592. tri.m_Data.m_GeometryData.m_nTmpData0 = PLANECHECK_NEGATIVE;
  593. break;
  594. case PLANECHECK_POSITIVE:
  595. nright++;
  596. tri.m_Data.m_GeometryData.m_nTmpData0 = PLANECHECK_POSITIVE;
  597. break;
  598. case PLANECHECK_STRADDLING:
  599. nboth++;
  600. tri.m_Data.m_GeometryData.m_nTmpData0 = PLANECHECK_STRADDLING;
  601. break;
  602. }
  603. }
  604. // now, if the split resulted in one half being empty, "grow" the empty half
  605. if (nleft && (nboth==0) && (nright==0))
  606. split_value=max_coord;
  607. if (nright && (nboth==0) && (nleft==0))
  608. split_value=min_coord;
  609. // now, perform surface area/cost check to determine whether this split was worth it
  610. Vector LeftMins=MinBound;
  611. Vector LeftMaxes=MaxBound;
  612. Vector RightMins=MinBound;
  613. Vector RightMaxes=MaxBound;
  614. LeftMaxes[split_plane]=split_value;
  615. RightMins[split_plane]=split_value;
  616. float SA_L=BoxSurfaceArea(LeftMins,LeftMaxes);
  617. float SA_R=BoxSurfaceArea(RightMins,RightMaxes);
  618. float ISA=1.0/BoxSurfaceArea(MinBound,MaxBound);
  619. float cost_of_split=COST_OF_TRAVERSAL+COST_OF_INTERSECTION*(nboth+
  620. (SA_L*ISA*(nleft))+(SA_R*ISA*(nright)));
  621. return cost_of_split;
  622. }
  623. #define NEVER_SPLIT 0
  624. void RayTracingEnvironment::RefineNode(int node_number,int32 const *tri_list,int ntris,
  625. Vector MinBound,Vector MaxBound, int depth)
  626. {
  627. if (ntris<3) // never split empty lists
  628. {
  629. // no point in continuing
  630. OptimizedKDTree[node_number].Children=KDNODE_STATE_LEAF+(TriangleIndexList.Count()<<2);
  631. OptimizedKDTree[node_number].SetNumberOfTrianglesInLeafNode(ntris);
  632. #ifdef DEBUG_RAYTRACE
  633. OptimizedKDTree[node_number].vecMins = MinBound;
  634. OptimizedKDTree[node_number].vecMaxs = MaxBound;
  635. #endif
  636. for(int t=0;t<ntris;t++)
  637. TriangleIndexList.AddToTail(tri_list[t]);
  638. return;
  639. }
  640. float best_cost=1.0e23;
  641. int best_nleft=0,best_nright=0,best_nboth=0;
  642. float best_splitvalue=0;
  643. int split_plane=0;
  644. int tri_skip=1+(ntris/10); // don't try all trinagles as split
  645. // points when there are a lot of them
  646. for(int axis=0;axis<3;axis++)
  647. {
  648. for(int ts=-1;ts<ntris;ts+=tri_skip)
  649. {
  650. for(int tv=0;tv<3;tv++)
  651. {
  652. int trial_nleft,trial_nright,trial_nboth;
  653. float trial_splitvalue;
  654. if (ts==-1)
  655. trial_splitvalue=0.5*(MinBound[axis]+MaxBound[axis]);
  656. else
  657. {
  658. // else, split at the triangle vertex if possible
  659. CacheOptimizedTriangle &tri=OptimizedTriangleList[tri_list[ts]];
  660. trial_splitvalue = tri.Vertex(tv)[axis];
  661. if ((trial_splitvalue>MaxBound[axis]) || (trial_splitvalue<MinBound[axis]))
  662. continue; // don't try this vertex - not inside
  663. }
  664. // printf("ts=%d tv=%d tp=%f\n",ts,tv,trial_splitvalue);
  665. float trial_cost=
  666. CalculateCostsOfSplit(axis,tri_list,ntris,MinBound,MaxBound,trial_splitvalue,
  667. trial_nleft,trial_nright, trial_nboth);
  668. // printf("try %d cost=%f nl=%d nr=%d nb=%d sp=%f\n",axis,trial_cost,trial_nleft,trial_nright, trial_nboth,
  669. // trial_splitvalue);
  670. if (trial_cost<best_cost)
  671. {
  672. split_plane=axis;
  673. best_cost=trial_cost;
  674. best_nleft=trial_nleft;
  675. best_nright=trial_nright;
  676. best_nboth=trial_nboth;
  677. best_splitvalue=trial_splitvalue;
  678. // save away the axis classification of each triangle
  679. for(int t=0 ; t < ntris; t++)
  680. {
  681. CacheOptimizedTriangle &tri=OptimizedTriangleList[tri_list[t]];
  682. tri.m_Data.m_GeometryData.m_nTmpData1 = tri.m_Data.m_GeometryData.m_nTmpData0;
  683. }
  684. }
  685. if (ts==-1)
  686. break;
  687. }
  688. }
  689. }
  690. float cost_of_no_split=COST_OF_INTERSECTION*ntris;
  691. if ( (cost_of_no_split<=best_cost) || NEVER_SPLIT || (depth>MAX_TREE_DEPTH))
  692. {
  693. // no benefit to splitting. just make this a leaf node
  694. OptimizedKDTree[node_number].Children=KDNODE_STATE_LEAF+(TriangleIndexList.Count()<<2);
  695. OptimizedKDTree[node_number].SetNumberOfTrianglesInLeafNode(ntris);
  696. #ifdef DEBUG_RAYTRACE
  697. OptimizedKDTree[node_number].vecMins = MinBound;
  698. OptimizedKDTree[node_number].vecMaxs = MaxBound;
  699. #endif
  700. for(int t=0;t<ntris;t++)
  701. TriangleIndexList.AddToTail(tri_list[t]);
  702. }
  703. else
  704. {
  705. // printf("best split was %d at %f (mid=%f,n=%d, sk=%d)\n",split_plane,best_splitvalue,
  706. // 0.5*(MinBound[split_plane]+MaxBound[split_plane]),ntris,tri_skip);
  707. // its worth splitting!
  708. // we will achieve the splitting without sorting by using a selection algorithm.
  709. int32 *new_triangle_list;
  710. new_triangle_list=new int32[ntris];
  711. // now, perform surface area/cost check to determine whether this split was worth it
  712. Vector LeftMins=MinBound;
  713. Vector LeftMaxes=MaxBound;
  714. Vector RightMins=MinBound;
  715. Vector RightMaxes=MaxBound;
  716. LeftMaxes[split_plane]=best_splitvalue;
  717. RightMins[split_plane]=best_splitvalue;
  718. int n_left_output=0;
  719. int n_both_output=0;
  720. int n_right_output=0;
  721. for(int t=0;t<ntris;t++)
  722. {
  723. CacheOptimizedTriangle &tri=OptimizedTriangleList[tri_list[t]];
  724. switch( tri.m_Data.m_GeometryData.m_nTmpData1 )
  725. {
  726. case PLANECHECK_NEGATIVE:
  727. // printf("%d goes left\n",t);
  728. new_triangle_list[n_left_output++]=tri_list[t];
  729. break;
  730. case PLANECHECK_POSITIVE:
  731. n_right_output++;
  732. // printf("%d goes right\n",t);
  733. new_triangle_list[ntris-n_right_output]=tri_list[t];
  734. break;
  735. case PLANECHECK_STRADDLING:
  736. // printf("%d goes both\n",t);
  737. new_triangle_list[best_nleft+n_both_output]=tri_list[t];
  738. n_both_output++;
  739. break;
  740. }
  741. }
  742. int left_child=OptimizedKDTree.Count();
  743. int right_child=left_child+1;
  744. // printf("node %d split on axis %d at %f, nl=%d nr=%d nb=%d lc=%d rc=%d\n",node_number,
  745. // split_plane,best_splitvalue,best_nleft,best_nright,best_nboth,
  746. // left_child,right_child);
  747. OptimizedKDTree[node_number].Children=split_plane+(left_child<<2);
  748. OptimizedKDTree[node_number].SplittingPlaneValue=best_splitvalue;
  749. #ifdef DEBUG_RAYTRACE
  750. OptimizedKDTree[node_number].vecMins = MinBound;
  751. OptimizedKDTree[node_number].vecMaxs = MaxBound;
  752. #endif
  753. CacheOptimizedKDNode newnode;
  754. OptimizedKDTree.AddToTail(newnode);
  755. OptimizedKDTree.AddToTail(newnode);
  756. // now, recurse!
  757. if ( (ntris<20) && ((best_nleft==0) || (best_nright==0)) )
  758. depth+=100;
  759. RefineNode(left_child,new_triangle_list,best_nleft+best_nboth,LeftMins,LeftMaxes,depth+1);
  760. RefineNode(right_child,new_triangle_list+best_nleft,best_nright+best_nboth,
  761. RightMins,RightMaxes,depth+1);
  762. delete[] new_triangle_list;
  763. }
  764. }
  765. void RayTracingEnvironment::SetupAccelerationStructure(void)
  766. {
  767. CacheOptimizedKDNode root;
  768. OptimizedKDTree.AddToTail(root);
  769. int32 *root_triangle_list=new int32[OptimizedTriangleList.Count()];
  770. for(int t=0;t<OptimizedTriangleList.Count();t++)
  771. root_triangle_list[t]=t;
  772. CalculateTriangleListBounds(root_triangle_list,OptimizedTriangleList.Count(),m_MinBound,
  773. m_MaxBound);
  774. RefineNode(0,root_triangle_list,OptimizedTriangleList.Count(),m_MinBound,m_MaxBound,0);
  775. delete[] root_triangle_list;
  776. // now, convert all triangles to "intersection format"
  777. for(int i=0;i<OptimizedTriangleList.Count();i++)
  778. OptimizedTriangleList[i].ChangeIntoIntersectionFormat();
  779. }
  780. void RayTracingEnvironment::AddInfinitePointLight(Vector position, Vector intensity)
  781. {
  782. LightDesc_t mylight(position,intensity);
  783. LightList.AddToTail(mylight);
  784. }