Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

503 lines
16 KiB

  1. #ifndef _SCB_H
  2. #define _SCB_H
  3. // Redirect Asserts in inline code to seem to fire from this file
  4. #define szAssertFilename __FILE__
  5. // includes
  6. #include <stddef.h>
  7. // tune these constants for optimal performance
  8. // maximum amount of fast memory (cache) to use for sorting
  9. #define cbSortMemFast ( 16 * ( 4088 + 1 ) )
  10. // maximum amount of normal memory to use for sorting
  11. #define cbSortMemNorm ( 1024 * 1024L )
  12. // maximum size for memory resident Temp Table
  13. #define cbResidentTTMax ( 64 * 1024L )
  14. // minimum count of sort pairs effectively sorted by Quicksort
  15. // NOTE: must be greater than 2!
  16. #define cspairQSortMin ( 32 )
  17. // maximum partition stack depth for Quicksort
  18. #define cpartQSortMax ( 16 )
  19. // maximum count of runs to merge at once (fan-in)
  20. #define crunFanInMax ( 16 )
  21. // I/O cluster size (in pages)
  22. #define cpgClusterSize ( 2 )
  23. // define to use predictive preread instead of prereading all runs
  24. //#define PRED_PREREAD
  25. // Sort Page structure
  26. //
  27. // This is a custom page layout for use in the temporary database by sorting
  28. // only. Sufficient structure still remains so that other page reading code
  29. // can recognize that they do not know this format and can continue on their
  30. // merry way.
  31. #pragma pack(1)
  32. typedef struct _spage
  33. {
  34. ULONG ulChecksum; // page checksum
  35. #ifdef PRED_PREREAD
  36. USHORT ibLastSREC; // offset to last unbroken SREC
  37. #endif // PRED_PREREAD
  38. BYTE rgbData[ // free data space =
  39. cbPage // page size
  40. - sizeof( ULONG ) // - ulChecksum
  41. #ifdef PRED_PREREAD
  42. - sizeof( USHORT ) // - ibLastSREC
  43. #endif // PRED_PREREAD
  44. - sizeof( PGTYP ) // - pgtyp
  45. - sizeof( THREEBYTES ) // - pgnoThisPage
  46. ];
  47. PGTYP pgtyp; // page type (== pgtypSort)
  48. THREEBYTES pgnoThisPage; // this page's page number
  49. } SPAGE;
  50. #pragma pack()
  51. // returns start of free data area in a sort page
  52. STATIC INLINE BYTE *PbDataStartPspage( SPAGE *pspage )
  53. {
  54. return (BYTE *)( &pspage->rgbData);
  55. }
  56. // returns end of free data area in a sort page + 1
  57. STATIC INLINE BYTE *PbDataEndPspage( SPAGE *pspage )
  58. {
  59. return (BYTE *)( &pspage->pgtyp );
  60. }
  61. // free data space per SPAGE
  62. #define cbFreeSPAGE ( offsetof( SPAGE, pgtyp ) - offsetof( SPAGE, rgbData ) )
  63. // maximum count of SPAGEs' data that can be stored in normal sort memory
  64. #define cspageSortMax ( cbSortMemNorm / cbFreeSPAGE )
  65. // amount of normal memory actually used for sorting
  66. // (designed to make original runs fill pages exactly)
  67. #define cbSortMemNormUsed ( cspageSortMax * cbFreeSPAGE )
  68. // Sort Pair in fast sort memory
  69. //
  70. // (key prefix, index) pairs are sorted so that most of the data that needs
  71. // to be examined during a sort will be loaded into cache memory, allowing
  72. // the sort to run very fast. If two key prefixes are equal, we must go out
  73. // to slower memory to compare the remainder of the keys (if any) to determine
  74. // the proper sort order. This makes it important for the prefixes to be as
  75. // discriminatory as possible for each record.
  76. //
  77. // CONSIDER: adding a flag to indicate that the entire key is present
  78. //
  79. // Indexes are compressed pointers that describe the record's position in
  80. // the slow memory sort buffer. Each record's position can only be known to
  81. // a granularity designated by the size of the normal sort memory. For
  82. // example, if you specify 128KB of normal memory, the granularity is 2
  83. // because the index can only take on 65536 values:
  84. // ceil( ( 128 * 1024 ) / 65536 ) = 2.
  85. // size of key prefix (in bytes)
  86. #define cbKeyPrefix ( 14 )
  87. #pragma pack(1)
  88. // NOTE: sizeof(SPAIR) must be a power of 2 >= 8
  89. typedef struct _spair
  90. {
  91. USHORT irec; // record index
  92. BYTE rgbKey[cbKeyPrefix]; // key prefix
  93. } SPAIR;
  94. #pragma pack()
  95. // addressing granularity of record indexes (fit indexes into USHORT)
  96. // (run disk usage is optimal for cbIndexGran == 1)
  97. #define cbIndexGran ( ( cbSortMemNormUsed + 0xFFFFL ) / 0x10000L )
  98. // maximum index of records that can be stored in normal memory
  99. #define irecSortMax ( cbSortMemNormUsed / cbIndexGran )
  100. // maximum count of SPAIRs' data that can be stored in fast sort memory
  101. // NOTE: we are reserving one for temporary sort key storage (at cspairSortMax)
  102. #define cspairSortMax ( cbSortMemFast / sizeof( SPAIR ) - 1 )
  103. // amount of fast memory actually used for sorting (counting reserve SPAIR)
  104. #define cbSortMemFastUsed ( ( cspairSortMax + 1 ) * sizeof( SPAIR ) )
  105. // count of "Sort Record indexes" required to store count bytes of data
  106. // (This is fast if numbers are chosen to make cbIndexGran a power of 2
  107. // (especially 1) due to compiler optimizations)
  108. STATIC INLINE LONG CirecToStoreCb( LONG cb )
  109. {
  110. return ( cb + cbIndexGran - 1 ) / cbIndexGran;
  111. }
  112. // generalized Sort Record type (encompasses all types)
  113. // NOTE: using void blocks illegal declarations, pointer math, etc
  114. typedef VOID SREC;
  115. // Unique run identifier (first page of run = run id)
  116. typedef PGNO RUN;
  117. #define runNull ( (RUN) pgnoNull )
  118. #define crunAll ( 0x7FFFFFFFL )
  119. // Run Information structure
  120. typedef struct _runinfo
  121. {
  122. RUN run; // this run
  123. CPG cpg; // count of pages in run
  124. LONG cb; // count of bytes of data in run
  125. LONG crec; // count of records in each run
  126. CPG cpgUsed; // count of pages actually used
  127. } RUNINFO;
  128. // Run Link structure (used in RUNLIST)
  129. typedef struct _runlink
  130. {
  131. struct _runlink *prunlinkNext; // next run
  132. RUNINFO runinfo; // runinfo for this run
  133. } RUNLINK;
  134. #define prunlinkNil ( (RUNLINK *) 0 )
  135. // RUNLINK allocation operators
  136. #define PrunlinkRUNLINKAlloc() ( (RUNLINK *) LAlloc( 1, sizeof( RUNLINK ) ) )
  137. #ifdef DEBUG /* Debug check for illegal use of freed runlink */
  138. #define RUNLINKReleasePrcb(prunlink) { LFree( prunlink ); prunlink = prunlinkNil; }
  139. #else
  140. #define RUNLINKReleasePrcb(prunlink) { LFree( prunlink ); }
  141. #endif
  142. // Run List structure
  143. typedef struct _runlist
  144. {
  145. RUNLINK *prunlinkHead; // head of runlist
  146. LONG crun; // count of runs in list
  147. } RUNLIST;
  148. // Merge Tree Node
  149. //
  150. // These nodes are used in the replacement-selection sort tree that merges
  151. // the incoming runs into one large run. Due to the way the tree is set up,
  152. // each node acts as both an internal (loser) node and as an external (input)
  153. // node, with the exception of node 0, which keeps the last winner instead
  154. // of a loser.
  155. typedef struct _mtnode
  156. {
  157. // external node
  158. struct _rcb *prcb; // input run
  159. struct _mtnode *pmtnodeExtUp; // pointer to father node
  160. // internal node
  161. SREC *psrec; // current record
  162. struct _mtnode *pmtnodeSrc; // record's source node
  163. struct _mtnode *pmtnodeIntUp; // pointer to father node
  164. } MTNODE;
  165. // Special values for psrec for replacement-selection sort. psrecNegInf is a
  166. // sentinel value less than any possible key and is used for merge tree
  167. // initialization. psrecInf is a sentinel value greater than any possible key
  168. // and is used to indicate the end of the input stream.
  169. #define psrecNegInf ( (SREC *) -1L )
  170. #define psrecInf ( (SREC *) NULL )
  171. // Optimized Tree Merge Node
  172. //
  173. // These nodes are used to build the merge plan for the depth first merge of
  174. // an optimized tree merge. This tree is built so that we perform the merges
  175. // from the smaller side of the tree to the larger side of the tree, all in
  176. // the interest of increasing our cache locality during the merge process.
  177. typedef struct _otnode
  178. {
  179. RUNLIST runlist; // list of runs for this node
  180. struct _otnode *rgpotnode[crunFanInMax]; // subtrees for this node
  181. struct _otnode *potnodeAllocNext; // next node (allocation)
  182. struct _otnode *potnodeLevelNext; // next node (level)
  183. } OTNODE;
  184. #define potnodeNil ( (OTNODE *) 0 )
  185. // Special value for potnode for the optimized tree merge tree build routine.
  186. // potnodeLevel0 means that the current level is comprised of original runs,
  187. // not of other merge nodes.
  188. #define potnodeLevel0 ( (OTNODE *) -1L )
  189. // OTNODE allocation operators
  190. #define PotnodeOTNODEAlloc() ( (OTNODE *) LAlloc( 1, sizeof( OTNODE ) ) )
  191. #ifdef DEBUG /* Debug check for illegal use of freed otnode */
  192. #define OTNODEReleasePotnode(potnode) { LFree( potnode ); potnode = potnodeNil; }
  193. #else
  194. #define OTNODEReleasePotnode(potnode) { LFree( potnode ); }
  195. #endif
  196. // Sort Control Block (SCB)
  197. typedef struct _scb
  198. {
  199. FCB fcb; // FCB MUST BE FIRST FIELD IN STRUCTURE
  200. JET_GRBIT grbit; // sort grbit
  201. INT fFlags; // sort flags
  202. LONG cRecords; // count of records in sort
  203. // memory-resident sorting
  204. SPAIR *rgspair; // sort pair buffer
  205. LONG ispairMac; // next available sort pair
  206. BYTE *rgbRec; // record buffer
  207. LONG cbCommit; // amount of committed buffer space
  208. LONG irecMac; // next available record index
  209. LONG crecBuf; // count of records in buffer
  210. LONG cbData; // total record data size (actual)
  211. // disk-resident sorting
  212. LONG crun; // count of original runs generated
  213. RUNLIST runlist; // list of runs to be merged
  214. // sort/merge run output
  215. PGNO pgnoNext; // next page in output run
  216. struct _bf *pbfOut; // current output buffer
  217. BYTE *pbOutMac; // next available byte in page
  218. BYTE *pbOutMax; // end of available page
  219. // merge (replacement-selection sort)
  220. LONG crunMerge; // count of runs being read/merged
  221. MTNODE rgmtnode[crunFanInMax]; // merge tree
  222. // merge duplicate removal
  223. BOOL fUnique; // remove duplicates during merge
  224. struct _bf *pbfLast; // last used read ahead buffer
  225. struct _bf *pbfAssyLast; // last used assembly buffer
  226. #ifdef PCACHE_OPTIMIZATION
  227. /* pad to multiple of 32 bytes
  228. /**/
  229. BYTE rgbFiller[12];
  230. #endif
  231. } SCB;
  232. // SCB allocation operators
  233. #define PscbMEMAlloc() (SCB *)PbMEMAlloc( iresSCB )
  234. #ifdef DEBUG /* Debug check for illegal use of freed scb */
  235. #define MEMReleasePscb(pscb) { MEMRelease( iresSCB, (BYTE *) ( pscb ) ); pscb = pscbNil; }
  236. #else
  237. #define MEMReleasePscb(pscb) { MEMRelease( iresSCB, (BYTE *) ( pscb ) ); }
  238. #endif
  239. // SCB fFlags
  240. #define fSCBInsert (1<<0)
  241. #define fSCBIndex (1<<1)
  242. #define fSCBUnique (1<<2)
  243. // SCB fFlags operators
  244. STATIC INLINE VOID SCBSetInsert( SCB *pscb ) { pscb->fFlags |= fSCBInsert; }
  245. STATIC INLINE VOID SCBResetInsert( SCB *pscb ) { pscb->fFlags &= ~fSCBInsert; }
  246. STATIC INLINE BOOL FSCBInsert( SCB *pscb ) { return pscb->fFlags & fSCBInsert; }
  247. STATIC INLINE VOID SCBSetIndex( SCB *pscb ) { pscb->fFlags |= fSCBIndex; }
  248. STATIC INLINE VOID SCBResetIndex( SCB *pscb ) { pscb->fFlags &= ~fSCBIndex; }
  249. STATIC INLINE BOOL FSCBIndex( SCB *pscb ) { return pscb->fFlags & fSCBIndex; }
  250. STATIC INLINE VOID SCBSetUnique( SCB *pscb ) { pscb->fFlags |= fSCBUnique; }
  251. STATIC INLINE VOID SCBResetUnique( SCB *pscb ) { pscb->fFlags &= ~fSCBUnique; }
  252. STATIC INLINE BOOL FSCBUnique( SCB *pscb ) { return pscb->fFlags & fSCBUnique; }
  253. // Sort Record in normal sort memory
  254. //
  255. // There are two types of Sort Records. One type, SRECD, is used for general
  256. // sort records and can have an abitrary record data field. The second type,
  257. // SRECI, is used when we know we are sorting Key/SRID records during index
  258. // creation. SRECI is more compact and therefore allows more records to fit
  259. // in each run in this special (and common) case.
  260. #pragma pack(1)
  261. typedef struct _srecd
  262. {
  263. USHORT cbRec; // record size
  264. BYTE cbKey; // key size
  265. BYTE rgbKey[]; // key
  266. // BYTE rgbData[]; // data (just for illustration)
  267. } UNALIGNED SRECD;
  268. typedef struct _sreci
  269. {
  270. BYTE cbKey; // key size
  271. BYTE rgbKey[]; // key
  272. // SRID srid; // srid (just for illistration)
  273. } UNALIGNED SRECI;
  274. #pragma pack()
  275. // minimum amount of record that must be read in order to retrieve its size
  276. #define cbSRECReadMin ( offsetof( SRECD, cbKey ) )
  277. // the following functions abstract different operations on a sort record pointer
  278. // to perform the appropriate operations, depending on the flags set in the SCB
  279. // returns size of an existing sort record
  280. STATIC INLINE LONG CbSRECSizePscbPsrec( SCB *pscb, SREC *psrec )
  281. {
  282. if ( FSCBIndex( pscb ) )
  283. return sizeof( SRECI ) + ( (SRECI *) psrec )->cbKey + sizeof( SRID );
  284. return ( (SRECD * ) psrec )->cbRec;
  285. }
  286. // calculates size of a potential sort record
  287. STATIC INLINE LONG CbSRECSizePscbCbCb( SCB *pscb, LONG cbKey, LONG cbData )
  288. {
  289. if ( FSCBIndex( pscb ) )
  290. return sizeof( SRECI ) + cbKey + sizeof( SRID );
  291. return sizeof( SRECD ) + cbKey + cbData;
  292. }
  293. // sets size of sort record
  294. STATIC INLINE VOID SRECSizePscbPsrecCb( SCB *pscb, SREC *psrec, LONG cb )
  295. {
  296. if ( !FSCBIndex( pscb ) )
  297. ( (SRECD * ) psrec )->cbRec = (USHORT) cb;
  298. }
  299. // returns size of sort record key
  300. STATIC INLINE LONG CbSRECKeyPscbPsrec( SCB *pscb, SREC *psrec )
  301. {
  302. if ( FSCBIndex( pscb ) )
  303. return ( (SRECI *) psrec )->cbKey;
  304. return ( (SRECD * ) psrec )->cbKey;
  305. }
  306. // sets size of sort record key
  307. STATIC INLINE VOID SRECKeySizePscbPsrecCb( SCB *pscb, SREC *psrec, LONG cb )
  308. {
  309. if ( FSCBIndex( pscb ) )
  310. ( (SRECI *) psrec )->cbKey = (BYTE) cb;
  311. else
  312. ( (SRECD *) psrec )->cbKey = (BYTE) cb;
  313. }
  314. // returns sort record key buffer pointer
  315. STATIC INLINE BYTE *PbSRECKeyPscbPsrec( SCB *pscb, SREC *psrec )
  316. {
  317. if ( FSCBIndex( pscb ) )
  318. return ( (SRECI *) psrec )->rgbKey;
  319. return ( (SRECD *) psrec )->rgbKey;
  320. }
  321. // returns sort record key as a Pascal string
  322. STATIC INLINE BYTE *StSRECKeyPscbPsrec( SCB *pscb, SREC *psrec )
  323. {
  324. if ( FSCBIndex( pscb ) )
  325. return &( (SRECI *) psrec )->cbKey;
  326. return &( (SRECD *) psrec )->cbKey;
  327. }
  328. // returns size of sort record data
  329. STATIC INLINE LONG CbSRECDataPscbPsrec( SCB *pscb, SREC *psrec )
  330. {
  331. if ( FSCBIndex( pscb ) )
  332. return sizeof( SRID );
  333. return ( (SRECD *) psrec )->cbRec - ( (SRECD *) psrec )->cbKey - sizeof( SRECD );
  334. }
  335. // returns sort record data buffer pointer
  336. STATIC INLINE BYTE *PbSRECDataPscbPsrec( SCB *pscb, SREC *psrec )
  337. {
  338. if ( FSCBIndex( pscb ) )
  339. return ( (SRECI *) psrec )->rgbKey + ( (SRECI *) psrec )->cbKey;
  340. return ( (SRECD * ) psrec )->rgbKey + ( (SRECD * ) psrec )->cbKey;
  341. }
  342. // returns pointer to a sort record given a base address and a Sort Record Index
  343. STATIC INLINE SREC *PsrecFromPbIrec( BYTE *pb, LONG irec )
  344. {
  345. return (SREC *) ( pb + irec * cbIndexGran );
  346. }
  347. // Run Control Block
  348. //
  349. // This control block is used for multiple instance use of the run input
  350. // functions ErrSORTIRunOpen, ErrSORTIRunNext, and ErrSORTIRunClose.
  351. typedef struct _rcb
  352. {
  353. SCB *pscb; // associated SCB
  354. RUNINFO runinfo; // run information
  355. struct _bf *rgpbf[cpgClusterSize]; // pinned read ahead buffers
  356. LONG ipbf; // current buffer
  357. BYTE *pbInMac; // next byte in page data
  358. BYTE *pbInMax; // end of page data
  359. LONG cbRemaining; // remaining bytes of data in run
  360. #ifdef PRED_PREREAD
  361. SREC *psrecPred; // SREC used for predictive preread
  362. #endif // PRED_PREREAD
  363. struct _bf *pbfAssy; // record assembly buffer
  364. } RCB;
  365. #define prcbNil ( (RCB *) 0 )
  366. // RCB allocation operators
  367. #define PrcbRCBAlloc() ( (RCB *) LAlloc( 1, sizeof( RCB ) ) )
  368. #ifdef DEBUG /* Debug check for illegal use of freed rcb */
  369. #define RCBReleasePrcb(prcb) { LFree( prcb ); prcb = prcbNil; }
  370. #else
  371. #define RCBReleasePrcb(prcb) { LFree( prcb ); }
  372. #endif
  373. //#define UtilPerfDumpStats( a ) ( 0 )
  374. // End Assert redirection
  375. #undef szAssertFilename
  376. #endif // _SCB_H