Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

951 lines
29 KiB

  1. // This file renders vertex buffers, converts raw meshes
  2. // to GL meshes, and manages threads that do the raw-mesh
  3. // building (found in cave_mesher.c)
  4. #include "stb_voxel_render.h"
  5. #define STB_GLEXT_DECLARE "glext_list.h"
  6. #include "stb_gl.h"
  7. #include "stb_image.h"
  8. #include "stb_glprog.h"
  9. #include "caveview.h"
  10. #include "cave_parse.h"
  11. #include "stb.h"
  12. #include "sdl.h"
  13. #include "sdl_thread.h"
  14. #include <math.h>
  15. #include <assert.h>
  16. //#define STBVOX_CONFIG_TEX1_EDGE_CLAMP
  17. // currently no dynamic way to set mesh cache size or view distance
  18. //#define SHORTVIEW
  19. stbvox_mesh_maker g_mesh_maker;
  20. GLuint main_prog;
  21. GLint uniform_locations[64];
  22. //#define MAX_QUADS_PER_DRAW (65536 / 4) // assuming 16-bit indices, 4 verts per quad
  23. //#define FIXED_INDEX_BUFFER_SIZE (MAX_QUADS_PER_DRAW * 6 * 2) // 16*1024 * 12 == ~192KB
  24. // while uploading texture data, this holds our each texture
  25. #define TEX_SIZE 64
  26. uint32 texture[TEX_SIZE][TEX_SIZE];
  27. GLuint voxel_tex[2];
  28. // chunk state
  29. enum
  30. {
  31. STATE_invalid,
  32. STATE_needed,
  33. STATE_requested,
  34. STATE_abandoned,
  35. STATE_valid,
  36. };
  37. // mesh is 32x32x255 ... this is hardcoded in that
  38. // a mesh covers 2x2 minecraft chunks, no #defines for it
  39. typedef struct
  40. {
  41. int state;
  42. int chunk_x, chunk_y;
  43. int num_quads;
  44. float priority;
  45. int vbuf_size, fbuf_size;
  46. float transform[3][3];
  47. float bounds[2][3];
  48. GLuint vbuf;// vbuf_tex;
  49. GLuint fbuf, fbuf_tex;
  50. } chunk_mesh;
  51. void scale_texture(unsigned char *src, int x, int y, int w, int h)
  52. {
  53. int i,j,k;
  54. assert(w == 256 && h == 256);
  55. for (j=0; j < TEX_SIZE; ++j) {
  56. for (i=0; i < TEX_SIZE; ++i) {
  57. uint32 val=0;
  58. for (k=0; k < 4; ++k) {
  59. val >>= 8;
  60. val += src[ 4*(x+(i>>2)) + 4*w*(y+(j>>2)) + k]<<24;
  61. }
  62. texture[j][i] = val;
  63. }
  64. }
  65. }
  66. void build_base_texture(int n)
  67. {
  68. int x,y;
  69. uint32 color = stb_rand() | 0x808080;
  70. for (y=0; y<TEX_SIZE; ++y)
  71. for (x=0; x<TEX_SIZE; ++x) {
  72. texture[y][x] = (color + (stb_rand()&0x1f1f1f))|0xff000000;
  73. }
  74. }
  75. void build_overlay_texture(int n)
  76. {
  77. int x,y;
  78. uint32 color = stb_rand();
  79. if (color & 16)
  80. color = 0xff000000;
  81. else
  82. color = 0xffffffff;
  83. for (y=0; y<TEX_SIZE; ++y)
  84. for (x=0; x<TEX_SIZE; ++x) {
  85. texture[y][x] = 0;
  86. }
  87. for (y=0; y < TEX_SIZE/8; ++y) {
  88. for (x=0; x < TEX_SIZE; ++x) {
  89. texture[y][x] = color;
  90. texture[TEX_SIZE-1-y][x] = color;
  91. texture[x][y] = color;
  92. texture[x][TEX_SIZE-1-y] = color;
  93. }
  94. }
  95. }
  96. // view radius of about 1024 = 2048 columns / 32 columns-per-mesh = 2^11 / 2^5 = 64x64
  97. // so we need bigger than 64x64 so we can precache, which means we have to be
  98. // non-power-of-two, or we have to be pretty huge
  99. #define CACHED_MESH_NUM_X 128
  100. #define CACHED_MESH_NUM_Y 128
  101. chunk_mesh cached_chunk_mesh[CACHED_MESH_NUM_Y][CACHED_MESH_NUM_X];
  102. void free_chunk(int slot_x, int slot_y)
  103. {
  104. chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
  105. if (cm->state == STATE_valid) {
  106. glDeleteTextures(1, &cm->fbuf_tex);
  107. glDeleteBuffersARB(1, &cm->vbuf);
  108. glDeleteBuffersARB(1, &cm->fbuf);
  109. cached_chunk_mesh[slot_y][slot_x].state = STATE_invalid;
  110. }
  111. }
  112. void upload_mesh(chunk_mesh *cm, uint8 *build_buffer, uint8 *face_buffer)
  113. {
  114. glGenBuffersARB(1, &cm->vbuf);
  115. glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
  116. glBufferDataARB(GL_ARRAY_BUFFER_ARB, cm->num_quads*4*sizeof(uint32), build_buffer, GL_STATIC_DRAW_ARB);
  117. glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
  118. glGenBuffersARB(1, &cm->fbuf);
  119. glBindBufferARB(GL_TEXTURE_BUFFER_ARB, cm->fbuf);
  120. glBufferDataARB(GL_TEXTURE_BUFFER_ARB, cm->num_quads*sizeof(uint32), face_buffer , GL_STATIC_DRAW_ARB);
  121. glBindBufferARB(GL_TEXTURE_BUFFER_ARB, 0);
  122. glGenTextures(1, &cm->fbuf_tex);
  123. glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
  124. glTexBufferARB(GL_TEXTURE_BUFFER_ARB, GL_RGBA8UI, cm->fbuf);
  125. glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
  126. }
  127. static void upload_mesh_data(raw_mesh *rm)
  128. {
  129. int cx = rm->cx;
  130. int cy = rm->cy;
  131. int slot_x = (cx >> 1) & (CACHED_MESH_NUM_X-1);
  132. int slot_y = (cy >> 1) & (CACHED_MESH_NUM_Y-1);
  133. chunk_mesh *cm;
  134. free_chunk(slot_x, slot_y);
  135. cm = &cached_chunk_mesh[slot_y][slot_x];
  136. cm->num_quads = rm->num_quads;
  137. upload_mesh(cm, rm->build_buffer, rm->face_buffer);
  138. cm->vbuf_size = rm->num_quads*4*sizeof(uint32);
  139. cm->fbuf_size = rm->num_quads*sizeof(uint32);
  140. cm->priority = 100000;
  141. cm->chunk_x = cx;
  142. cm->chunk_y = cy;
  143. memcpy(cm->bounds, rm->bounds, sizeof(cm->bounds));
  144. memcpy(cm->transform, rm->transform, sizeof(cm->transform));
  145. // write barrier here
  146. cm->state = STATE_valid;
  147. }
  148. GLint uniform_loc[16];
  149. float table3[128][3];
  150. float table4[64][4];
  151. GLint tablei[2];
  152. float step=0;
  153. #ifdef SHORTVIEW
  154. int view_dist_in_chunks = 50;
  155. #else
  156. int view_dist_in_chunks = 80;
  157. #endif
  158. void setup_uniforms(float pos[3])
  159. {
  160. int i,j;
  161. step += 1.0f/60.0f;
  162. for (i=0; i < STBVOX_UNIFORM_count; ++i) {
  163. stbvox_uniform_info raw, *ui=&raw;
  164. stbvox_get_uniform_info(&raw, i);
  165. uniform_loc[i] = -1;
  166. if (i == STBVOX_UNIFORM_texscale || i == STBVOX_UNIFORM_texgen || i == STBVOX_UNIFORM_color_table)
  167. continue;
  168. if (ui) {
  169. void *data = ui->default_value;
  170. uniform_loc[i] = stbgl_find_uniform(main_prog, ui->name);
  171. switch (i) {
  172. case STBVOX_UNIFORM_face_data:
  173. tablei[0] = 2;
  174. data = tablei;
  175. break;
  176. case STBVOX_UNIFORM_tex_array:
  177. glActiveTextureARB(GL_TEXTURE0_ARB);
  178. glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
  179. glActiveTextureARB(GL_TEXTURE1_ARB);
  180. glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
  181. glActiveTextureARB(GL_TEXTURE0_ARB);
  182. tablei[0] = 0;
  183. tablei[1] = 1;
  184. data = tablei;
  185. break;
  186. case STBVOX_UNIFORM_color_table:
  187. data = ui->default_value;
  188. ((float *)data)[63*4+3] = 2.0f; // emissive
  189. break;
  190. case STBVOX_UNIFORM_camera_pos:
  191. data = table3[0];
  192. table3[0][0] = pos[0];
  193. table3[0][1] = pos[1];
  194. table3[0][2] = pos[2];
  195. table3[0][3] = stb_max(0,(float)sin(step*2)*0.125f);
  196. break;
  197. case STBVOX_UNIFORM_ambient: {
  198. float bright = 1.0;
  199. //float bright = 0.75;
  200. float amb[3][3];
  201. // ambient direction is sky-colored upwards
  202. // "ambient" lighting is from above
  203. table4[0][0] = 0.3f;
  204. table4[0][1] = -0.5f;
  205. table4[0][2] = 0.9f;
  206. amb[1][0] = 0.3f; amb[1][1] = 0.3f; amb[1][2] = 0.3f; // dark-grey
  207. amb[2][0] = 1.0; amb[2][1] = 1.0; amb[2][2] = 1.0; // white
  208. // convert so (table[1]*dot+table[2]) gives
  209. // above interpolation
  210. // lerp((dot+1)/2, amb[1], amb[2])
  211. // amb[1] + (amb[2] - amb[1]) * (dot+1)/2
  212. // amb[1] + (amb[2] - amb[1]) * dot/2 + (amb[2]-amb[1])/2
  213. for (j=0; j < 3; ++j) {
  214. table4[1][j] = (amb[2][j] - amb[1][j])/2 * bright;
  215. table4[2][j] = (amb[1][j] + amb[2][j])/2 * bright;
  216. }
  217. // fog color
  218. table4[3][0] = 0.6f, table4[3][1] = 0.7f, table4[3][2] = 0.9f;
  219. table4[3][3] = 1.0f / (view_dist_in_chunks * 16);
  220. table4[3][3] *= table4[3][3];
  221. data = table4;
  222. break;
  223. }
  224. }
  225. switch (ui->type) {
  226. case STBVOX_UNIFORM_TYPE_sampler: stbglUniform1iv(uniform_loc[i], ui->array_length, data); break;
  227. case STBVOX_UNIFORM_TYPE_vec2: stbglUniform2fv(uniform_loc[i], ui->array_length, data); break;
  228. case STBVOX_UNIFORM_TYPE_vec3: stbglUniform3fv(uniform_loc[i], ui->array_length, data); break;
  229. case STBVOX_UNIFORM_TYPE_vec4: stbglUniform4fv(uniform_loc[i], ui->array_length, data); break;
  230. }
  231. }
  232. }
  233. }
  234. GLuint unitex[64], unibuf[64];
  235. void make_texture_buffer_for_uniform(int uniform, int slot)
  236. {
  237. GLenum type;
  238. stbvox_uniform_info raw, *ui=&raw;
  239. GLint uloc;
  240. stbvox_get_uniform_info(ui, uniform);
  241. uloc = stbgl_find_uniform(main_prog, ui->name);
  242. if (uniform == STBVOX_UNIFORM_color_table)
  243. ((float *)ui->default_value)[63*4+3] = 2.0f; // emissive
  244. glGenBuffersARB(1, &unibuf[uniform]);
  245. glBindBufferARB(GL_ARRAY_BUFFER_ARB, unibuf[uniform]);
  246. glBufferDataARB(GL_ARRAY_BUFFER_ARB, ui->array_length * ui->bytes_per_element, ui->default_value, GL_STATIC_DRAW_ARB);
  247. glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
  248. glGenTextures(1, &unitex[uniform]);
  249. glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
  250. switch (ui->type) {
  251. case STBVOX_UNIFORM_TYPE_vec2: type = GL_RG32F; break;
  252. case STBVOX_UNIFORM_TYPE_vec3: type = GL_RGB32F; break;
  253. case STBVOX_UNIFORM_TYPE_vec4: type = GL_RGBA32F; break;
  254. default: assert(0);
  255. }
  256. glTexBufferARB(GL_TEXTURE_BUFFER_ARB, type, unibuf[uniform]);
  257. glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
  258. glActiveTextureARB(GL_TEXTURE0 + slot);
  259. glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
  260. glActiveTextureARB(GL_TEXTURE0);
  261. stbglUseProgram(main_prog);
  262. stbglUniform1i(uloc, slot);
  263. }
  264. #define MAX_MESH_WORKERS 8
  265. #define MAX_CHUNK_LOAD_WORKERS 2
  266. int num_mesh_workers;
  267. int num_chunk_load_workers;
  268. typedef struct
  269. {
  270. int state;
  271. int request_cx;
  272. int request_cy;
  273. int padding[13];
  274. SDL_sem * request_received;
  275. SDL_sem * chunk_server_done_processing;
  276. int chunk_action;
  277. int chunk_request_x;
  278. int chunk_request_y;
  279. fast_chunk *chunks[4][4];
  280. int padding2[16];
  281. raw_mesh rm;
  282. int padding3[16];
  283. uint8 *build_buffer;
  284. uint8 *face_buffer ;
  285. } mesh_worker;
  286. enum
  287. {
  288. WSTATE_idle,
  289. WSTATE_requested,
  290. WSTATE_running,
  291. WSTATE_mesh_ready,
  292. };
  293. mesh_worker mesh_data[MAX_MESH_WORKERS];
  294. int num_meshes_started; // stats
  295. int request_chunk(int chunk_x, int chunk_y);
  296. void update_meshes_from_render_thread(void);
  297. unsigned char tex2_data[64][4];
  298. void init_tex2_gradient(void)
  299. {
  300. int i;
  301. for (i=0; i < 16; ++i) {
  302. tex2_data[i+ 0][0] = 64 + 12*i;
  303. tex2_data[i+ 0][1] = 32;
  304. tex2_data[i+ 0][2] = 64;
  305. tex2_data[i+16][0] = 255;
  306. tex2_data[i+16][1] = 32 + 8*i;
  307. tex2_data[i+16][2] = 64;
  308. tex2_data[i+32][0] = 255;
  309. tex2_data[i+32][1] = 160;
  310. tex2_data[i+32][2] = 64 + 12*i;
  311. tex2_data[i+48][0] = 255;
  312. tex2_data[i+48][1] = 160 + 6*i;
  313. tex2_data[i+48][2] = 255;
  314. }
  315. }
  316. void set_tex2_alpha(float fa)
  317. {
  318. int i;
  319. int a = (int) stb_lerp(fa, 0, 255);
  320. if (a < 0) a = 0; else if (a > 255) a = 255;
  321. glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
  322. for (i=0; i < 64; ++i) {
  323. tex2_data[i][3] = a;
  324. glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]);
  325. }
  326. }
  327. void render_init(void)
  328. {
  329. int i;
  330. char *binds[] = { "attr_vertex", "attr_face", NULL };
  331. char *vertex;
  332. char *fragment;
  333. int w=0,h=0;
  334. unsigned char *texdata = stbi_load("terrain.png", &w, &h, NULL, 4);
  335. stbvox_init_mesh_maker(&g_mesh_maker);
  336. for (i=0; i < num_mesh_workers; ++i) {
  337. stbvox_init_mesh_maker(&mesh_data[i].rm.mm);
  338. }
  339. vertex = stbvox_get_vertex_shader();
  340. fragment = stbvox_get_fragment_shader();
  341. {
  342. char error_buffer[1024];
  343. char *main_vertex[] = { vertex, NULL };
  344. char *main_fragment[] = { fragment, NULL };
  345. main_prog = stbgl_create_program(main_vertex, main_fragment, binds, error_buffer, sizeof(error_buffer));
  346. if (main_prog == 0) {
  347. ods("Compile error for main shader: %s\n", error_buffer);
  348. assert(0);
  349. exit(1);
  350. }
  351. }
  352. //init_index_buffer();
  353. make_texture_buffer_for_uniform(STBVOX_UNIFORM_texscale , 3);
  354. make_texture_buffer_for_uniform(STBVOX_UNIFORM_texgen , 4);
  355. make_texture_buffer_for_uniform(STBVOX_UNIFORM_color_table , 5);
  356. glGenTextures(2, voxel_tex);
  357. glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
  358. glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
  359. TEX_SIZE,TEX_SIZE,256,
  360. 0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
  361. for (i=0; i < 256; ++i) {
  362. if (texdata)
  363. scale_texture(texdata, (i&15)*w/16, (h/16)*(i>>4), w,h);
  364. else
  365. build_base_texture(i);
  366. glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
  367. }
  368. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
  369. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
  370. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16);
  371. #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP
  372. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
  373. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  374. #endif
  375. glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
  376. glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
  377. glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
  378. 1,1,64,
  379. 0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
  380. init_tex2_gradient();
  381. set_tex2_alpha(0.0);
  382. #if 0
  383. for (i=0; i < 128; ++i) {
  384. //build_overlay_texture(i);
  385. glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
  386. }
  387. #endif
  388. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
  389. glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
  390. glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
  391. }
  392. void world_init(void)
  393. {
  394. int a,b,x,y;
  395. Uint64 start_time, end_time;
  396. #ifdef NDEBUG
  397. int range = 32;
  398. #else
  399. int range = 12;
  400. #endif
  401. start_time = SDL_GetPerformanceCounter();
  402. // iterate in 8x8 clusters of qchunks at a time to get better converted-chunk-cache reuse
  403. // than a purely row-by-row ordering is (single-threaded this is a bigger win than
  404. // any of the above optimizations were, since it halves zlib/mc-conversion costs)
  405. for (x=-range; x <= range; x += 16)
  406. for (y=-range; y <= range; y += 16)
  407. for (b=y; b < y+16 && b <= range; b += 2)
  408. for (a=x; a < x+16 && a <= range; a += 2)
  409. while (!request_chunk(a, b)) { // if request fails, all threads are busy
  410. update_meshes_from_render_thread();
  411. SDL_Delay(1);
  412. }
  413. // wait until all the workers are done,
  414. // (this is only needed if we want to time
  415. // when the build finishes, or when we want to reset the
  416. // cache size; otherwise we could just go ahead and
  417. // start rendering whatever we've got)
  418. for(;;) {
  419. int i;
  420. update_meshes_from_render_thread();
  421. for (i=0; i < num_mesh_workers; ++i)
  422. if (mesh_data[i].state != WSTATE_idle)
  423. break;
  424. if (i == num_mesh_workers)
  425. break;
  426. SDL_Delay(3);
  427. }
  428. end_time = SDL_GetPerformanceCounter();
  429. ods("Build time: %7.2fs\n", (end_time - start_time) / (float) SDL_GetPerformanceFrequency());
  430. // don't waste lots of storage on chunk caches once it's finished starting-up;
  431. // this was only needed to be this large because we worked in large blocks
  432. // to maximize sharing
  433. reset_cache_size(32);
  434. }
  435. extern SDL_mutex * chunk_cache_mutex;
  436. int mesh_worker_handler(void *data)
  437. {
  438. mesh_worker *mw = data;
  439. mw->face_buffer = malloc(FACE_BUFFER_SIZE);
  440. mw->build_buffer = malloc(BUILD_BUFFER_SIZE);
  441. // this loop only works because the compiler can't
  442. // tell that the SDL_calls don't access mw->state;
  443. // really we should barrier that stuff
  444. for(;;) {
  445. int i,j;
  446. int cx,cy;
  447. // wait for a chunk request
  448. SDL_SemWait(mw->request_received);
  449. // analyze the chunk request
  450. assert(mw->state == WSTATE_requested);
  451. cx = mw->request_cx;
  452. cy = mw->request_cy;
  453. // this is inaccurate as it can block while another thread has the cache locked
  454. mw->state = WSTATE_running;
  455. // get the chunks we need (this takes a lock and caches them)
  456. for (j=0; j < 4; ++j)
  457. for (i=0; i < 4; ++i)
  458. mw->chunks[j][i] = get_converted_fastchunk(cx-1 + i, cy-1 + j);
  459. // build the mesh based on the chunks
  460. mw->rm.build_buffer = mw->build_buffer;
  461. mw->rm.face_buffer = mw->face_buffer;
  462. build_chunk(cx, cy, mw->chunks, &mw->rm);
  463. mw->state = WSTATE_mesh_ready;
  464. // don't need to notify of this, because it gets polled
  465. // when done, free the chunks
  466. // for efficiency we just take the mutex once around the whole thing,
  467. // though this spreads the mutex logic over two files
  468. SDL_LockMutex(chunk_cache_mutex);
  469. for (j=0; j < 4; ++j)
  470. for (i=0; i < 4; ++i) {
  471. deref_fastchunk(mw->chunks[j][i]);
  472. mw->chunks[j][i] = NULL;
  473. }
  474. SDL_UnlockMutex(chunk_cache_mutex);
  475. }
  476. return 0;
  477. }
  478. int request_chunk(int chunk_x, int chunk_y)
  479. {
  480. int i;
  481. for (i=0; i < num_mesh_workers; ++i) {
  482. mesh_worker *mw = &mesh_data[i];
  483. if (mw->state == WSTATE_idle) {
  484. mw->request_cx = chunk_x;
  485. mw->request_cy = chunk_y;
  486. mw->state = WSTATE_requested;
  487. SDL_SemPost(mw->request_received);
  488. ++num_meshes_started;
  489. return 1;
  490. }
  491. }
  492. return 0;
  493. }
  494. void prepare_threads(void)
  495. {
  496. int i;
  497. int num_proc = SDL_GetCPUCount();
  498. if (num_proc > 6)
  499. num_mesh_workers = num_proc/2;
  500. else if (num_proc > 4)
  501. num_mesh_workers = 4;
  502. else
  503. num_mesh_workers = num_proc-1;
  504. // @TODO
  505. // Thread usage is probably pretty terrible; need to make a
  506. // separate queue of needed chunks, instead of just generating
  507. // one request per thread per frame, and a separate queue of
  508. // results. (E.g. If it takes 1.5 frames to build mesh, thread
  509. // is idle for 0.5 frames.) To fake this for now, I've just
  510. // doubled the number of threads to let those serve as a 'queue',
  511. // but that's dumb.
  512. num_mesh_workers *= 2; // try to get better thread usage
  513. if (num_mesh_workers > MAX_MESH_WORKERS)
  514. num_mesh_workers = MAX_MESH_WORKERS;
  515. for (i=0; i < num_mesh_workers; ++i) {
  516. mesh_worker *data = &mesh_data[i];
  517. data->request_received = SDL_CreateSemaphore(0);
  518. data->chunk_server_done_processing = SDL_CreateSemaphore(0);
  519. SDL_CreateThread(mesh_worker_handler, "mesh worker", data);
  520. }
  521. }
  522. // "better" buffer uploading
  523. #if 0
  524. if (glBufferStorage) {
  525. glDeleteBuffersARB(1, &vb->vbuf);
  526. glGenBuffersARB(1, &vb->vbuf);
  527. glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
  528. glBufferStorage(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, 0);
  529. glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
  530. } else {
  531. glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
  532. glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, GL_STATIC_DRAW_ARB);
  533. glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
  534. }
  535. #endif
  536. typedef struct
  537. {
  538. float x,y,z,w;
  539. } plane;
  540. static plane frustum[6];
  541. static void matd_mul(double out[4][4], double src1[4][4], double src2[4][4])
  542. {
  543. int i,j,k;
  544. for (j=0; j < 4; ++j) {
  545. for (i=0; i < 4; ++i) {
  546. double t=0;
  547. for (k=0; k < 4; ++k)
  548. t += src1[k][i] * src2[j][k];
  549. out[i][j] = t;
  550. }
  551. }
  552. }
  553. // https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/
  554. static void compute_frustum(void)
  555. {
  556. int i;
  557. GLdouble mv[4][4],proj[4][4], mvproj[4][4];
  558. glGetDoublev(GL_MODELVIEW_MATRIX , mv[0]);
  559. glGetDoublev(GL_PROJECTION_MATRIX, proj[0]);
  560. matd_mul(mvproj, proj, mv);
  561. for (i=0; i < 4; ++i) {
  562. (&frustum[0].x)[i] = (float) (mvproj[3][i] + mvproj[0][i]);
  563. (&frustum[1].x)[i] = (float) (mvproj[3][i] - mvproj[0][i]);
  564. (&frustum[2].x)[i] = (float) (mvproj[3][i] + mvproj[1][i]);
  565. (&frustum[3].x)[i] = (float) (mvproj[3][i] - mvproj[1][i]);
  566. (&frustum[4].x)[i] = (float) (mvproj[3][i] + mvproj[2][i]);
  567. (&frustum[5].x)[i] = (float) (mvproj[3][i] - mvproj[2][i]);
  568. }
  569. }
  570. static int test_plane(plane *p, float x0, float y0, float z0, float x1, float y1, float z1)
  571. {
  572. // return false if the box is entirely behind the plane
  573. float d=0;
  574. assert(x0 <= x1 && y0 <= y1 && z0 <= z1);
  575. if (p->x > 0) d += x1*p->x; else d += x0*p->x;
  576. if (p->y > 0) d += y1*p->y; else d += y0*p->y;
  577. if (p->z > 0) d += z1*p->z; else d += z0*p->z;
  578. return d + p->w >= 0;
  579. }
  580. static int is_box_in_frustum(float *bmin, float *bmax)
  581. {
  582. int i;
  583. for (i=0; i < 6; ++i)
  584. if (!test_plane(&frustum[i], bmin[0], bmin[1], bmin[2], bmax[0], bmax[1], bmax[2]))
  585. return 0;
  586. return 1;
  587. }
  588. float compute_priority(int cx, int cy, float x, float y)
  589. {
  590. float distx, disty, dist2;
  591. distx = (cx*16+8) - x;
  592. disty = (cy*16+8) - y;
  593. dist2 = distx*distx + disty*disty;
  594. return view_dist_in_chunks*view_dist_in_chunks * 16 * 16 - dist2;
  595. }
  596. int chunk_locations, chunks_considered, chunks_in_frustum;
  597. int quads_considered, quads_rendered;
  598. int chunk_storage_rendered, chunk_storage_considered, chunk_storage_total;
  599. int update_frustum = 1;
  600. #ifdef SHORTVIEW
  601. int max_chunk_storage = 450 << 20;
  602. int min_chunk_storage = 350 << 20;
  603. #else
  604. int max_chunk_storage = 900 << 20;
  605. int min_chunk_storage = 800 << 20;
  606. #endif
  607. float min_priority = -500; // this really wants to be in unit space, not squared space
  608. int num_meshes_uploaded;
  609. void update_meshes_from_render_thread(void)
  610. {
  611. int i;
  612. for (i=0; i < num_mesh_workers; ++i) {
  613. mesh_worker *mw = &mesh_data[i];
  614. if (mw->state == WSTATE_mesh_ready) {
  615. upload_mesh_data(&mw->rm);
  616. ++num_meshes_uploaded;
  617. mw->state = WSTATE_idle;
  618. }
  619. }
  620. }
  621. extern float tex2_alpha;
  622. extern int global_hack;
  623. int num_threads_active;
  624. float chunk_server_activity;
  625. void render_caves(float campos[3])
  626. {
  627. float x = campos[0], y = campos[1];
  628. int qchunk_x, qchunk_y;
  629. int cam_x, cam_y;
  630. int i,j, rad;
  631. compute_frustum();
  632. chunk_locations = chunks_considered = chunks_in_frustum = 0;
  633. quads_considered = quads_rendered = 0;
  634. chunk_storage_total = chunk_storage_considered = chunk_storage_rendered = 0;
  635. cam_x = (int) floor(x+0.5);
  636. cam_y = (int) floor(y+0.5);
  637. qchunk_x = (((int) floor(x)+16) >> 5) << 1;
  638. qchunk_y = (((int) floor(y)+16) >> 5) << 1;
  639. glEnable(GL_ALPHA_TEST);
  640. glAlphaFunc(GL_GREATER, 0.5);
  641. stbglUseProgram(main_prog);
  642. setup_uniforms(campos); // set uniforms to default values inefficiently
  643. glActiveTextureARB(GL_TEXTURE2_ARB);
  644. stbglEnableVertexAttribArray(0);
  645. {
  646. float lighting[2][3] = { { campos[0],campos[1],campos[2] }, { 0.75,0.75,0.65f } };
  647. float bright = 8;
  648. lighting[1][0] *= bright;
  649. lighting[1][1] *= bright;
  650. lighting[1][2] *= bright;
  651. stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]);
  652. }
  653. if (global_hack)
  654. set_tex2_alpha(tex2_alpha);
  655. num_meshes_uploaded = 0;
  656. update_meshes_from_render_thread();
  657. // traverse all in-range chunks and analyze them
  658. for (j=-view_dist_in_chunks; j <= view_dist_in_chunks; j += 2) {
  659. for (i=-view_dist_in_chunks; i <= view_dist_in_chunks; i += 2) {
  660. float priority;
  661. int cx = qchunk_x + i;
  662. int cy = qchunk_y + j;
  663. priority = compute_priority(cx, cy, x, y);
  664. if (priority >= min_priority) {
  665. int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
  666. int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
  667. chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
  668. ++chunk_locations;
  669. if (cm->state == STATE_valid && priority >= 0) {
  670. // check if chunk pos actually matches
  671. if (cm->chunk_x != cx || cm->chunk_y != cy) {
  672. // we have a stale chunk we need to recreate
  673. free_chunk(slot_x, slot_y); // it probably will have already gotten freed, but just in case
  674. }
  675. }
  676. if (cm->state == STATE_invalid) {
  677. cm->chunk_x = cx;
  678. cm->chunk_y = cy;
  679. cm->state = STATE_needed;
  680. }
  681. cm->priority = priority;
  682. }
  683. }
  684. }
  685. // draw front-to-back
  686. for (rad = 0; rad <= view_dist_in_chunks; rad += 2) {
  687. for (j=-rad; j <= rad; j += 2) {
  688. // if j is +- rad, then iterate i through all values
  689. // if j isn't +-rad, then i should be only -rad & rad
  690. int step = 2;
  691. if (abs(j) != rad)
  692. step = 2*rad;
  693. for (i=-rad; i <= rad; i += step) {
  694. int cx = qchunk_x + i;
  695. int cy = qchunk_y + j;
  696. int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
  697. int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
  698. chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
  699. if (cm->state == STATE_valid && cm->priority >= 0) {
  700. ++chunks_considered;
  701. quads_considered += cm->num_quads;
  702. if (is_box_in_frustum(cm->bounds[0], cm->bounds[1])) {
  703. ++chunks_in_frustum;
  704. // @TODO if in range
  705. stbglUniform3fv(uniform_loc[STBVOX_UNIFORM_transform], 3, cm->transform[0]);
  706. glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
  707. glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, 4, (void*) 0);
  708. glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
  709. glDrawArrays(GL_QUADS, 0, cm->num_quads*4);
  710. quads_rendered += cm->num_quads;
  711. chunk_storage_rendered += cm->vbuf_size + cm->fbuf_size;
  712. }
  713. chunk_storage_considered += cm->vbuf_size + cm->fbuf_size;
  714. }
  715. }
  716. }
  717. }
  718. stbglDisableVertexAttribArray(0);
  719. glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
  720. glActiveTextureARB(GL_TEXTURE0_ARB);
  721. stbglUseProgram(0);
  722. num_meshes_started = 0;
  723. {
  724. #define MAX_QUEUE 8
  725. float highest_priority[MAX_QUEUE];
  726. int highest_i[MAX_QUEUE], highest_j[MAX_QUEUE];
  727. float lowest_priority = view_dist_in_chunks * view_dist_in_chunks * 16 * 16.0f;
  728. int lowest_i = -1, lowest_j = -1;
  729. for (i=0; i < MAX_QUEUE; ++i) {
  730. highest_priority[i] = min_priority;
  731. highest_i[i] = -1;
  732. highest_j[i] = -1;
  733. }
  734. for (j=0; j < CACHED_MESH_NUM_Y; ++j) {
  735. for (i=0; i < CACHED_MESH_NUM_X; ++i) {
  736. chunk_mesh *cm = &cached_chunk_mesh[j][i];
  737. if (cm->state == STATE_valid) {
  738. cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
  739. chunk_storage_total += cm->vbuf_size + cm->fbuf_size;
  740. if (cm->priority < lowest_priority) {
  741. lowest_priority = cm->priority;
  742. lowest_i = i;
  743. lowest_j = j;
  744. }
  745. }
  746. if (cm->state == STATE_needed) {
  747. cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
  748. if (cm->priority < min_priority)
  749. cm->state = STATE_invalid;
  750. else if (cm->priority > highest_priority[0]) {
  751. int k;
  752. highest_priority[0] = cm->priority;
  753. highest_i[0] = i;
  754. highest_j[0] = j;
  755. // bubble this up to right place
  756. for (k=0; k < MAX_QUEUE-1; ++k) {
  757. if (highest_priority[k] > highest_priority[k+1]) {
  758. highest_priority[k] = highest_priority[k+1];
  759. highest_priority[k+1] = cm->priority;
  760. highest_i[k] = highest_i[k+1];
  761. highest_i[k+1] = i;
  762. highest_j[k] = highest_j[k+1];
  763. highest_j[k+1] = j;
  764. } else {
  765. break;
  766. }
  767. }
  768. }
  769. }
  770. }
  771. }
  772. // I couldn't find any straightforward logic that avoids
  773. // the hysteresis problem of continually creating & freeing
  774. // a block on the margin, so I just don't free a block until
  775. // it's out of range, but this doesn't actually correctly
  776. // handle when the cache is too small for the given range
  777. if (chunk_storage_total >= min_chunk_storage && lowest_i >= 0) {
  778. if (cached_chunk_mesh[lowest_j][lowest_i].priority < -1200) // -1000? 0?
  779. free_chunk(lowest_i, lowest_j);
  780. }
  781. if (chunk_storage_total < max_chunk_storage && highest_i[0] >= 0) {
  782. for (j=MAX_QUEUE-1; j >= 0; --j) {
  783. if (highest_j[0] >= 0) {
  784. chunk_mesh *cm = &cached_chunk_mesh[highest_j[j]][highest_i[j]];
  785. if (request_chunk(cm->chunk_x, cm->chunk_y)) {
  786. cm->state = STATE_requested;
  787. } else {
  788. // if we couldn't queue this one, skip the remainder
  789. break;
  790. }
  791. }
  792. }
  793. }
  794. }
  795. update_meshes_from_render_thread();
  796. num_threads_active = 0;
  797. for (i=0; i < num_mesh_workers; ++i) {
  798. num_threads_active += (mesh_data[i].state == WSTATE_running);
  799. }
  800. }