Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

316 lines
10 KiB

  1. /* memtest.c, Robert Nix, December, 1993
  2. * nix@vliw.enet.dec.com
  3. * based on:
  4. * cbash.c
  5. * kirk johnson @ MIT
  6. * february 1993
  7. *
  8. * RCS $Id: cbash.c,v 1.2 1993/08/12 15:30:17 tuna Exp $
  9. *
  10. * Usage: memtest <machname> <iterations> <max-mem>
  11. * machname - a short indentifier for the machine being tested.
  12. * iterations - target number of iterations to run for stable timing.
  13. * max-mem - maximum working set size to test.
  14. *
  15. * Iterations and max-mem can be specified with a "k" or "m" suffix
  16. * for kilo or mega iterations/mem.
  17. *
  18. * Example: Test of a Gateway 60 Mhz Pentium system
  19. * Command Line: memtest gp560 8m 4m
  20. * Output:
  21. *
  22. --------------------------------------------------------------------------------
  23. * 4k 8k 16k 32k 64k 128k 256k 512k 1m 2m 4m
  24. * L gp560 4 68 68 86 86 86 93 104 111 111 111 122
  25. * L gp560 8 68 68 107 107 107 114 139 165 154 154 154
  26. * L gp560 16 89 68 143 143 143 161 204 232 240 243 243
  27. * L gp560 32 68 68 172 168 168 207 290 347 365 365 365
  28. * L gp560 64 68 72 168 168 168 207 290 350 368 368 368
  29. * L gp560 128 72 75 168 168 168 211 293 358 379 418 379
  30. * L gp560 256 75 79 168 168 168 207 293 379 397 401 401
  31. * L gp560 512 86 86 172 168 168 215 297 418 440 443 494
  32. * L gp560 1k 100 104 175 172 168 218 304 501 522 529 529
  33. * L gp560 2k 136 139 179 172 172 222 322 665 687 755 701
  34. * L gp560 4k 132 243 232 225 222 286 401 991 1016 1094 1048
  35. * L gp560 8k 132 136 243 232 225 290 350 923 973 1034 1109
  36. * L gp560 16k 132 136 132 243 232 225 333 937 908 994 1041
  37. * L gp560 32k 136 132 136 136 243 232 304 833 919 930 1012
  38. *
  39. --------------------------------------------------------------------------------
  40. * Explanation of output.
  41. *
  42. * There are three kinds of tests.
  43. *
  44. * L - Load latency test.
  45. * Measures the average repetition rate, in ns, of a latency-oriented load
  46. * loop. The two main variables are:
  47. *
  48. * (1) working set, or the amount of memory touched by the loop. This
  49. * varies across the columns in the output above, from a low of 4k
  50. * bytes to a high of max-mem, or 16m bytes.
  51. *
  52. * (2) stride, or the the number of bytes separating successive loads.
  53. * This is the number in the 3rd column of each of the "L" rows
  54. * in the output above, and varies from 4 bytes to 32k bytes.
  55. *
  56. * Interpreting the results. This is easiest on a 3d chart in Excel.
  57. * Two strides are always particularly interesting:
  58. *
  59. * - The cache line or block size stride (32 bytes above).
  60. * Big changes in latencies across the columns show the sizes
  61. * and basic performance of the load side of the cache hierarchy.
  62. *
  63. * If you don't know the cache line size: look across the first row
  64. * for the first column that takes a big jump up in latency (the jump
  65. * from 68ns to 86ns between the 8k column and 16k columnabove), then (b)
  66. * scan down the rows of that column for the first relativelystable value
  67. * (172ns in the 32 byte stride row above). The row containing
  68. * that stable value is probably the cache line size.
  69. *
  70. * Look across the cache line size row. Access time jumps at 16K --
  71. * so the L1 cache is 8K -- and then jumps again at 512K -- so the L2
  72. * cache is 256K. The slope between 64K and 512K could be caused
  73. * by a thrash in the L2 cache; page coloring could remove this thrash.
  74. *
  75. * - The page size stride (4k above).
  76. * Big changes in latencies across the columns expose the tbsize and the cost
  77. * of a tb refill.
  78. *
  79. * Scan the 4k line. It takes a big jump in latency at the 512K working
  80. * set (and actually starts to thrash at the 256K working set). This test says the TB
  81. * can map somewhere in the neighborhood of 64 4K pages. The TB fill time
  82. * looks to be somewhere around 650-700 ns (subtract large working set entries
  83. * in the 32-byte stride line from corresponding entries in the 4k stride line).
  84. *
  85. * The output always contains a little noise:
  86. *
  87. * - Boost the "iterations" command line parameter to remove timing jitter.
  88. *
  89. * - All entries contain some loop overhead. Its fair to normalize results by subtracting
  90. * out the difference between the reported times and the known latency to the fastest level
  91. * of the memory hierarchy.
  92. *
  93. * - The entries in the lower-left hand corner of the table (large
  94. * strides in small memory) are dominated by loop overhead; ignore them.
  95. *
  96. * - Implement a good page coloring algorithm to remove jitter caused by cache
  97. * thrashing. Look at the cache-line sized stride to see the frequency of thrashing.
  98. *
  99. */
  100. #include <stdio.h>
  101. #include <malloc.h>
  102. #include <stdlib.h>
  103. #include <string.h>
  104. #include <assert.h>
  105. #include <time.h>
  106. #define DEF_MAXMEM 16777216
  107. #define MINMEM 4096
  108. #define ITYPE signed int
  109. signed long max_mem;
  110. char *mach_name;
  111. #if defined(_WIN64)
  112. typedef unsigned __int64 ULONG_PTR;
  113. #else
  114. typedef unsigned long ULONG_PTR;
  115. #endif
  116. #define MAXSTRIDE 32768
  117. #define MINSTRIDE 4
  118. char *version_string = "1.0 (20 Dec 1993)";
  119. extern ITYPE arg_to_int(char *);
  120. extern double bash(char *, long, long, long);
  121. extern int bash_loop(char *, long, long, long);
  122. extern void allocate_memory(char *, long);
  123. extern void usage(char *);
  124. int __cdecl main(
  125. int argc,
  126. char *argv[]
  127. )
  128. {
  129. ITYPE nbytes;
  130. ITYPE stride;
  131. ITYPE iters;
  132. char *region;
  133. if ((argc > 1) && (strcmp(argv[1], "-v") == 0)) {
  134. fprintf(stderr, "This is memtest version %s.\n", version_string);
  135. exit(1);
  136. }
  137. if (argc < 3)
  138. usage(argv[0]);
  139. mach_name = argv[1];
  140. iters = arg_to_int(argv[2]);
  141. if (argc < 4) {
  142. max_mem = DEF_MAXMEM;
  143. } else {
  144. max_mem = arg_to_int(argv[3]);
  145. }
  146. region = (char *) malloc(max_mem+(128*1024));
  147. region = (char *) ((((ULONG_PTR) region) + (128*1024-1)) & ~((128*1024)-1));
  148. if (region == NULL) {
  149. perror("malloc failed");
  150. exit(1);
  151. }
  152. printf(" %8s", "");
  153. printf("%8s", "");
  154. for (nbytes = MINMEM; nbytes <= max_mem; nbytes += nbytes) {
  155. if (nbytes >= (1024 * 1024))
  156. printf("%4dm", nbytes / (1024 * 1024));
  157. else if (nbytes >= 1024)
  158. printf("%4dk", nbytes / 1024);
  159. else
  160. printf("%5d", nbytes);
  161. }
  162. printf("\n");
  163. for (stride = MINSTRIDE; stride <= MAXSTRIDE; stride += stride) {
  164. printf("L %-8s", mach_name);
  165. if (stride >= (1024 * 1024))
  166. printf("%7dm", stride / (1024 * 1024));
  167. else if (stride >= 1024)
  168. printf("%7dk", stride / 1024);
  169. else
  170. printf("%8d", stride);
  171. for (nbytes = MINMEM; nbytes <= max_mem; nbytes += nbytes) {
  172. double ns_ref = bash(region, nbytes, stride, iters);
  173. printf("%5.0f", ns_ref);
  174. fflush(stdout);
  175. }
  176. printf("\n");
  177. }
  178. exit(0);
  179. return 0;
  180. }
  181. ITYPE
  182. arg_to_int(char *arg)
  183. {
  184. ITYPE rslt = 0;
  185. ITYPE mult = 1;
  186. switch (arg[strlen(arg) - 1]) {
  187. case 'k':
  188. case 'K':
  189. mult = 1024;
  190. break;
  191. case 'm':
  192. case 'M':
  193. mult = 1024 * 1024;
  194. break;
  195. default:
  196. mult = 1;
  197. break;
  198. }
  199. if (!((arg[0] >= '0') && arg[0] <= '9')) {
  200. fprintf(stderr, "Argument %s not a number\n", arg);
  201. usage("memtest");
  202. exit(1);
  203. }
  204. if (sscanf(arg, "%ld", &rslt) != 1) {
  205. fprintf(stderr, "Argument %s not a number\n", arg);
  206. usage("memtest");
  207. exit(1);
  208. }
  209. rslt *= mult;
  210. return rslt;
  211. }
  212. double
  213. bash(
  214. char *region,
  215. long nbytes, /* size of region to bash (bytes) */
  216. long stride, /* stride through region (bytes) */
  217. long iters /* target # of loop iterations */
  218. )
  219. {
  220. signed long count;
  221. signed long reps;
  222. clock_t start, stop;
  223. double utime, stime;
  224. count = ((nbytes - sizeof(int)) / stride) + 1;
  225. if (! (((count - 1) * stride + (long)sizeof(int)) <= nbytes)) {
  226. fprintf(stderr, "trip count problem\n");
  227. exit(1);
  228. }
  229. reps = (iters + count - 1) / count;
  230. if (reps <= 0)
  231. reps = 1;
  232. iters = reps * count;
  233. /* make sure the memory is allocated */
  234. memset(region, 0, nbytes);
  235. memset(region, 1, nbytes);
  236. allocate_memory(region, nbytes);
  237. memset(region, 0, nbytes);
  238. /* warm up the cache */
  239. (void) bash_loop(region, count, stride, 1L);
  240. /* run the bash loop */
  241. start = clock();
  242. (void) bash_loop(region, count, stride, reps);
  243. stop = clock();
  244. utime = (double) (stop - start) / CLOCKS_PER_SEC;
  245. stime = 0.0;
  246. return 1e9 * ((utime + stime) / iters);
  247. }
  248. /* Your virtual memory pagesize must be at least this big */
  249. #define MIN_PAGESIZE 256
  250. void
  251. allocate_memory(
  252. char *region, /* memory region to be bashed */
  253. long nbytes)
  254. { /* size of region (bytes) */
  255. long i;
  256. for (i = 0; i < nbytes; i += MIN_PAGESIZE)
  257. *((int *) (region + i)) = 0;
  258. }
  259. int
  260. bash_loop(
  261. char *region, /* memory region to be bashed */
  262. long count, /* number of locations to bash */
  263. long stride, /* stride between locations (bytes) */
  264. long reps /* number of passes through region */
  265. )
  266. {
  267. long i;
  268. int rslt;
  269. char *tmp;
  270. rslt = 0;
  271. for (; reps > 0; reps--) {
  272. tmp = region;
  273. for (i = count; i > 0; i--) {
  274. rslt ^= *((int *) tmp);
  275. tmp += stride;
  276. }
  277. }
  278. return rslt;
  279. }
  280. void
  281. usage(char *progname)
  282. {
  283. fprintf(stderr, "usage: %s <machname> <iters> [<maxmem>]\n", progname);
  284. fprintf(stderr, " <machname> machine name\n");
  285. fprintf(stderr, " <iters> target # of accesses\n");
  286. fprintf(stderr, " <maxmem> maximum amount of mem to touch (def 16 Mb)\n");
  287. exit(1);
  288. }