Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1564 lines
44 KiB

  1. //+---------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1991 - 2000.
  5. //
  6. // File: NORM.CXX
  7. //
  8. // Contents: Normalizer
  9. //
  10. // Classes: CNormalizer
  11. //
  12. // History: 28-May-91 t-WadeR added CNormalizer
  13. // 31-Jan-92 BartoszM Created from lang.cxx
  14. // 07-Oct-93 DwightKr Added new methods to normalize
  15. // different data types
  16. //
  17. // Notes: The filtering pipeline is hidden in the Data Repository
  18. // object which serves as a sink for the filter.
  19. // The sink for the Data Repository is the Key Repository.
  20. // The language dependent part of the pipeline
  21. // is obtained from the Language List object and is called
  22. // Key Maker. It consists of:
  23. //
  24. // Word Breaker
  25. // Stemmer (optional)
  26. // Normalizer
  27. // Noise List
  28. //
  29. // Each object serves as a sink for its predecessor,
  30. // Key Repository is the final sink.
  31. //
  32. //----------------------------------------------------------------------------
  33. #include <pch.cxx>
  34. #pragma hdrstop
  35. #include <plang.hxx>
  36. #include <misc.hxx>
  37. #include <norm.hxx>
  38. //+---------------------------------------------------------------------------
  39. //
  40. // Function GetExpAndSign
  41. //
  42. // Synopsis: Finds the exponent and sign of a number
  43. //
  44. // Arguments: [d] -- the input number to examine
  45. // [fPositive] -- returns TRUE if positive, FALSE if negative
  46. //
  47. // Returns: The exponent
  48. //
  49. // History: 21-Nov-94 KyleP Created.
  50. //
  51. //----------------------------------------------------------------------------
  52. int GetExpAndSign( double d, BOOL & fPositive )
  53. {
  54. //
  55. // bit 63 = sign
  56. // bits 52 - 62 = exponent
  57. // bits 0 - 51 = mantissa
  58. //
  59. Win4Assert( sizeof(LARGE_INTEGER) == sizeof(double) );
  60. LARGE_INTEGER * pli = (LARGE_INTEGER *)&d;
  61. fPositive = (pli->HighPart & 0x80000000) == 0;
  62. int const bias = 0x3ff;
  63. return ( ( pli->HighPart & 0x7ff00000 ) >> 20 ) - bias;
  64. } //GetExpAndSign
  65. //+---------------------------------------------------------------------------
  66. //
  67. // Function NormDouble
  68. //
  69. // Synopsis: Normalizes doubles by taking log2 of the number
  70. //
  71. // Notes: This func converts doubles into one of 5 different categories
  72. //
  73. // x < -1x2**32 is in bin 0
  74. // -1x2**32 <= x <= -1x2**-32 are in bins 1 to 65
  75. // -1x2**-32 <= x <= 1x2**-32 is in bin 66
  76. // 1x2**-32 <= x <= 1x2**32 are in bins 67 to 131
  77. // x > 1x2**32 is bin bin 132
  78. //
  79. // History: 21-Nov-94 KyleP Created.
  80. //
  81. //----------------------------------------------------------------------------
  82. static unsigned NormDouble(double dValue)
  83. {
  84. const int SignificantExponent = 32;
  85. const int SignificantRange = SignificantExponent * 2;
  86. const unsigned LowestBin = 0; // 0
  87. const unsigned LowerBin = LowestBin + 1; // 1
  88. const unsigned MiddleBin = LowerBin + SignificantRange + 1; // 66
  89. const unsigned UpperBin = MiddleBin + 1; // 67
  90. const unsigned HighestBin = UpperBin+ SignificantRange + 1; // 132
  91. BOOL fPositive;
  92. int exp = GetExpAndSign( dValue, fPositive );
  93. unsigned bin;
  94. if ( exp < -SignificantExponent )
  95. {
  96. //
  97. // All numbers close to zero in middle bin
  98. //
  99. bin = MiddleBin;
  100. }
  101. else if ( exp > SignificantExponent )
  102. {
  103. if ( fPositive )
  104. {
  105. //
  106. // Very large positive numbers in top bin
  107. //
  108. bin = HighestBin;
  109. }
  110. else
  111. {
  112. //
  113. // Very large negative numbers in bottom bin
  114. //
  115. bin = LowestBin;
  116. }
  117. }
  118. else
  119. {
  120. if ( fPositive )
  121. {
  122. //
  123. // medium size positive numbers
  124. //
  125. bin = UpperBin + exp + SignificantExponent;
  126. }
  127. else
  128. {
  129. //
  130. // medium size negative numbers
  131. //
  132. bin = LowerBin - exp + SignificantExponent;
  133. }
  134. }
  135. return bin;
  136. }
  137. #ifdef TEST_NORM
  138. //
  139. // a test to verify the validity of the NormDouble function.
  140. //
  141. void TestNormDouble()
  142. {
  143. float fVal0 = 0.;
  144. float fVal1 = 1.;
  145. unsigned nZero = NormDouble( fVal0 );
  146. unsigned nOne = NormDouble( fVal1 );
  147. printf(" Value:Bin %f : 0x%4X (%d)\n", fVal0, nZero, nZero );
  148. printf(" Value:Bin %f : 0x%4X (%d)\n", fVal1, nOne, nOne );
  149. BOOL fPos;
  150. float f = fVal1;
  151. unsigned nPrev = nOne;
  152. while ( f > fVal0 )
  153. {
  154. unsigned nVal = NormDouble( f );
  155. if (nVal > nPrev || nVal < nZero || nVal > nOne)
  156. {
  157. printf(" Value:Bin %f : 0x%4X (%d)\tExp %d\n", f, nVal, nVal, GetExpAndSign(f, fPos) );
  158. }
  159. nPrev = nVal;
  160. f = f/3;
  161. }
  162. f = fVal1;
  163. nPrev = nOne;
  164. while ( f < 1e+32 )
  165. {
  166. unsigned nVal = NormDouble( f );
  167. if (nVal < nPrev)
  168. printf(" Value:Bin %f : 0x%4X (%d)\n", f, nVal, nVal );
  169. nPrev = nVal;
  170. f = f * (float)1.5;
  171. }
  172. float fValm1 = -1.;
  173. unsigned nMinusOne = NormDouble( fValm1 );
  174. printf(" Value:Bin %f : 0x%4X (%d)\n", fValm1, nMinusOne, nMinusOne );
  175. f = fValm1;
  176. nPrev = nMinusOne;
  177. while ( f < fVal0 )
  178. {
  179. unsigned nVal = NormDouble( f );
  180. if (nVal < nPrev || nVal > nZero || nVal < nMinusOne)
  181. printf(" Value:Bin %f : 0x%4X (%d)\tExp %d\n", f, nVal, nVal, GetExpAndSign(f, fPos) );
  182. nPrev = nVal;
  183. f = f/3;
  184. }
  185. f = fValm1;
  186. nPrev = nMinusOne;
  187. while ( f > -1e+32 )
  188. {
  189. unsigned nVal = NormDouble( f );
  190. if (nVal > nPrev)
  191. printf(" Value:Bin %f : 0x%4X (%d)\n", f, nVal, nVal );
  192. nPrev = nVal;
  193. f = f * (float)1.5;
  194. }
  195. }
  196. #endif // 0
  197. // ------------------------------------------------------------------------
  198. // | Upper Limit | Divisor (2^x) | # of Bins | (in hex) |
  199. // ------------------------------------------------------------------------
  200. // | 2^10 - 1 | 2^0 | 2^10 - 0 | 0400 - 0000 |
  201. // | 2^16 - 1 | 2^3 | 2^12 - 2^7 | 2000 - 0080 |
  202. // | 2^20 - 1 | 2^6 | 2^14 - 2^10 | 4000 - 0400 |
  203. // | 2^26 - 1 | 2^13 | 2^13 - 2^7 | 2000 - 0080 |
  204. // | 2^30 - 1 | 2^23 | 2^7 - 2^3 | 0080 - 0008 |
  205. // | 2^31 - 1 | 2^25 | 2^6 - 2^5 | 0040 - 0020 |
  206. // ------------------------------------------------------------------------
  207. // | Total | | | 84C0 - 04D8 |
  208. // | | | | 7FE8 |
  209. // ------------------------------------------------------------------------
  210. const long limit1 = 0x400;
  211. const long shift1 = 0;
  212. const long cbins1 = 0x400;
  213. const long limit2 = 0x10000; // 2^16
  214. const long shift2 = 3;
  215. const long cSkip1 = limit1 >> shift2;
  216. const long cbins2 = (limit2 >> shift2)-cSkip1;
  217. const long limit3 = 0x100000; // 2^20
  218. const long shift3 = 6;
  219. const long cSkip2 = limit2 >> shift3;
  220. const long cbins3 = (limit3 >> shift3) - cSkip2;
  221. const long limit4 = 0x4000000; // 2^26
  222. const long shift4 = 13;
  223. const long cSkip3 = limit3 >> shift4;
  224. const long cbins4 = (limit4 >> shift4) - cSkip3;
  225. const long limit5 = 0x40000000; // 2^30
  226. const long shift5 = 23;
  227. const long cSkip4 = limit4 >> shift5;
  228. const long cbins5 = (limit5 >> shift5) - cSkip4;
  229. const long limit6 = MINLONG; // 2^31
  230. const long shift6 = 25;
  231. const long cSkip5 = limit5 >> shift6;
  232. const long cbins6 = ((long) ((unsigned) limit6 >> shift6)) - cSkip5;
  233. static unsigned MapLong( LONG lValue )
  234. {
  235. Win4Assert( !(lValue & MINLONG) || ( MINLONG == lValue ) );
  236. #if CIDBG==1
  237. const long cTotal = cbins1 + cbins2 + cbins3 + cbins4 + cbins5 + cbins6;
  238. Win4Assert( cTotal <= MINSHORT );
  239. #endif // CIDBG == 1
  240. unsigned ulValue = (unsigned) lValue;
  241. unsigned binNum = (unsigned) lValue;;
  242. if ( ulValue < limit1 )
  243. {
  244. //
  245. // Nothing to do.
  246. //
  247. }
  248. else if ( ulValue < limit2 )
  249. {
  250. binNum = cbins1 - cSkip1 + (ulValue >> shift2);
  251. }
  252. else if ( ulValue < limit3 )
  253. {
  254. binNum = cbins1 + cbins2 - cSkip2 + (binNum >> shift3);
  255. }
  256. else if ( ulValue < limit4 )
  257. {
  258. binNum = cbins1 + cbins2 + cbins3 - cSkip3 + (binNum >> shift4);
  259. }
  260. else if ( ulValue < limit5 )
  261. {
  262. binNum = cbins1 + cbins2 + cbins3 + cbins4 - cSkip4 + (binNum >> shift5);
  263. }
  264. else
  265. {
  266. binNum = cbins1 + cbins2 + cbins3 + cbins4 + cbins5 - cSkip5 + (binNum >> shift6);
  267. }
  268. return binNum;
  269. }
  270. //+---------------------------------------------------------------------------
  271. //
  272. // Function: NormLong
  273. //
  274. // Synopsis: Normalizes the given "signed" long value to a value between
  275. // 0x0000 - 0xFFFF. The negative numbers occupy 0x0000-0x8000.
  276. // Positive numbers occupy 0x8000-0xFFFF
  277. //
  278. // Arguments: [lValue] - The value to be normalized.
  279. //
  280. // History: 10-03-95 srikants Created
  281. //
  282. // Notes:
  283. //
  284. //----------------------------------------------------------------------------
  285. static unsigned NormLong(LONG lValue)
  286. {
  287. if (lValue >= 0)
  288. {
  289. return MapLong(lValue) + MINSHORT;
  290. }
  291. else
  292. {
  293. return MINSHORT - MapLong(-lValue);
  294. }
  295. }
  296. //+---------------------------------------------------------------------------
  297. //
  298. // Function: NormULong
  299. //
  300. // Synopsis: Normalizes an "unsigned" long value to a value between
  301. // 0x0000-0xFFFF. Numbers from 0-2^31 - 1 are mapped in the
  302. // range 0x0000-0x7FFF. Numbers 2^31 to 2^32 - 1 are mapped
  303. // in the range 0x8000 - 0xFFFF
  304. //
  305. // Arguments: [lValue] - The value to be mapped.
  306. //
  307. // History: 10-03-95 srikants Created
  308. //
  309. // Notes:
  310. //
  311. //----------------------------------------------------------------------------
  312. static unsigned NormULong( ULONG lValue )
  313. {
  314. unsigned val = MapLong( lValue & ~MINLONG ); // turn off the high bit
  315. Win4Assert( !(val & MINSHORT) );
  316. if ( lValue & MINLONG )
  317. val |= MINSHORT;
  318. return val;
  319. }
  320. //+---------------------------------------------------------------------------
  321. //
  322. // Function: MapLargeInteger
  323. //
  324. // Synopsis: Maps a LargeInteger to a number between 0x0000-0x7FFF.
  325. //
  326. // Numbers with the "HighPart" = 0 are mapped in the range
  327. // 0x0000-0x3FFF. When the HighPart !=0, the values are
  328. // mapped to 0x4000 - 0x7FFF
  329. //
  330. // Arguments: [liValue] - The value to be mapped.
  331. //
  332. // History: 10-03-95 srikants Created
  333. //
  334. // Notes:
  335. //
  336. //----------------------------------------------------------------------------
  337. static unsigned MapLargeInteger( LARGE_INTEGER & liValue )
  338. {
  339. Win4Assert( !(liValue.HighPart & MINLONG) || ( MINLONG == liValue.HighPart ) );
  340. unsigned normVal;
  341. if ( 0 == liValue.HighPart )
  342. {
  343. normVal = NormULong( liValue.LowPart );
  344. normVal >>= 2;
  345. }
  346. else
  347. {
  348. normVal = MapLong( liValue.HighPart ); // 0x0000-0x7FFF
  349. normVal >>= 1;
  350. normVal |= 0x4000;
  351. }
  352. Win4Assert( normVal < 0x8000 );
  353. return normVal;
  354. }
  355. //+---------------------------------------------------------------------------
  356. //
  357. // Function: NormULargeInteger
  358. //
  359. // Synopsis: Normalizes an unsigned LargeInteger to a number between
  360. // 0x0000-0xFFFF.
  361. //
  362. // Numbers with the "HighPart" = 0 are mapped in the range
  363. // 0x0000-0x7FFF. When the HighPart !=0, the values are
  364. // mapped to 0x8000 - 0xFFFF.
  365. //
  366. // Arguments: [uliValue] - The value to be mapped.
  367. //
  368. // History: 02-09-96 Alanw Created
  369. //
  370. // Notes:
  371. //
  372. //----------------------------------------------------------------------------
  373. static unsigned NormULargeInteger( ULARGE_INTEGER & uliValue )
  374. {
  375. unsigned normVal;
  376. if ( 0 == uliValue.HighPart )
  377. {
  378. normVal = NormULong( uliValue.LowPart );
  379. normVal >>= 1;
  380. }
  381. else
  382. {
  383. normVal = NormULong( uliValue.HighPart ); // 0x0000-0x7FFF
  384. normVal |= 0x8000;
  385. }
  386. Win4Assert( normVal < 0x10000 );
  387. return normVal;
  388. }
  389. //+---------------------------------------------------------------------------
  390. //
  391. // Function: NormLargeInteger
  392. //
  393. // Synopsis: Normalizes a large integer to a value between 0x0000-0xFFFF.
  394. //
  395. // -ve Numbers are mapped in the range 0x0000-0x8000.
  396. // +ve numbers are mapped in the range 0x8000-0xFFFF.
  397. //
  398. // Arguments: [liValue] - The value to be normalized. Note that the
  399. // argument is NOT passed by reference. The value is changed
  400. // in this method and so should not be passed by reference.
  401. //
  402. // History: 10-03-95 srikants Created
  403. //
  404. // Notes:
  405. //
  406. //----------------------------------------------------------------------------
  407. static unsigned NormLargeInteger( LARGE_INTEGER liValue )
  408. {
  409. unsigned normVal;
  410. if ( liValue.QuadPart < 0 )
  411. {
  412. liValue.QuadPart = -liValue.QuadPart;
  413. normVal = MINSHORT - MapLargeInteger( liValue );
  414. }
  415. else
  416. {
  417. normVal = MINSHORT + MapLargeInteger( liValue );
  418. }
  419. Win4Assert( normVal < 0x10000 );
  420. return normVal;
  421. }
  422. #ifdef TEST_NORM
  423. //
  424. // a test to verify the validity of the NormLong function.
  425. //
  426. void TestNormLong()
  427. {
  428. long lVal1 = 0;
  429. unsigned nVal1 = NormLong( lVal1 );
  430. printf(" Value:Bin 0x%8X : 0x%4X \t(%10d : %10d)\n", lVal1, nVal1, lVal1, nVal1 );
  431. lVal1 = 2;
  432. long lVal2 = 0;
  433. unsigned nVal2 = NormLong(1);
  434. while ( !(lVal1 & 0x80000000) )
  435. {
  436. nVal1 = NormLong( lVal1 );
  437. //printf(" Value:Bin 0x%8X : 0x%4X \t(%10d : %10d)\n", lVal1, nVal1, lVal1, nVal1 );
  438. Win4Assert( nVal1 == nVal2+1 );
  439. lVal2 = lVal1 + lVal1-1;
  440. nVal2 = NormLong( lVal2 );
  441. //printf(" Value:Bin 0x%8X : 0x%4X \t(%10d : %10d)\n", lVal2, nVal2, lVal2, nVal2 );
  442. lVal1 <<= 1;
  443. }
  444. lVal1 = 2;
  445. nVal2 = NormLong(-1);
  446. printf(" Value:Bin 0x%8X : 0x%4X \t(%10d : %10d)\n", -1, nVal2, -1, nVal2 );
  447. while ( !(lVal1 & 0x80000000) )
  448. {
  449. nVal1 = NormLong( -lVal1 );
  450. //printf(" Value:Bin 0x%8X : 0x%4X \t(%10d : %10d)\n", -lVal1, nVal1, -lVal1, nVal1 );
  451. Win4Assert( nVal1 == nVal2-1 );
  452. lVal2 = lVal1 + lVal1-1;
  453. lVal2 = -lVal2;
  454. nVal2 = NormLong( lVal2 );
  455. //printf(" Value:Bin 0x%8X : 0x%4X \t(%10d : %10d)\n", lVal2, nVal2, lVal2, nVal2 );
  456. lVal1 <<= 1;
  457. }
  458. }
  459. #endif // 0
  460. //+---------------------------------------------------------------------------
  461. //
  462. // Member: CNormalizer::CNormalizer
  463. //
  464. // Synopsis: constructor for normalizer
  465. //
  466. // Effects: gets buffers from noiselist
  467. //
  468. // Arguments: [nl] -- Noise list object to pass data on to.
  469. //
  470. // History: 05-June-91 t-WadeR Created.
  471. //
  472. // Notes:
  473. //
  474. //----------------------------------------------------------------------------
  475. CNormalizer::CNormalizer( PNoiseList& nl )
  476. : _noiseList(nl)
  477. {
  478. SetWordBuffer();
  479. // check that input size + prefix fits in the output buffer
  480. Win4Assert( cwcMaxKey * sizeof( WCHAR ) + cbKeyPrefix <= *_pcbOutBuf );
  481. }
  482. //+---------------------------------------------------------------------------
  483. //
  484. // Member: CNormalizer::GetFlags
  485. //
  486. // Synopsis: Returns address of ranking and range flags
  487. //
  488. // Arguments: [ppRange] -- range flag
  489. // [ppRank] -- rank flag
  490. //
  491. // History: 11-Fab-92 BartoszM Created.
  492. //
  493. //----------------------------------------------------------------------------
  494. void CNormalizer::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
  495. {
  496. _noiseList.GetFlags ( ppRange, ppRank );
  497. }
  498. //+---------------------------------------------------------------------------
  499. //
  500. // Member: CNormalizer::ProcessAltWord, public
  501. //
  502. // Synopsis: Normalizes a UniCode string, passes it to NoiseList.
  503. //
  504. // Effects: Deposits a normalized version [pwcInBuf] in [_pbOutBuf]
  505. //
  506. // Arguments: [pwcInBuf] -- input buffer
  507. // [cwc] -- count of chars in pwcInBuf
  508. //
  509. // History: 03-May-95 SitaramR Created.
  510. //
  511. //----------------------------------------------------------------------------
  512. void CNormalizer::ProcessAltWord( WCHAR const *pwcInBuf, ULONG cwc )
  513. {
  514. SetNextAltBuffer();
  515. unsigned hash = NormalizeWord( pwcInBuf, cwc );
  516. SetAltHash( hash );
  517. }
  518. //+---------------------------------------------------------------------------
  519. //
  520. // Member: CNormalizer::ProcessWord, public
  521. //
  522. // Synopsis: Normalizes a UniCode string, passes it to NoiseList.
  523. //
  524. // Effects: Deposits a normalized version of [pwcInBuf] in [_pbOutBuf].
  525. //
  526. // Arguments: [pwcInBuf] -- input buffer
  527. // [cwc] -- count of chars in pwcInBuf
  528. //
  529. // History: 05-June-91 t-WadeR Created.
  530. // 13-Oct-92 AmyA Added unicode support
  531. //
  532. //----------------------------------------------------------------------------
  533. void CNormalizer::ProcessWord( WCHAR const *pwcInBuf, ULONG cwc )
  534. {
  535. if ( UsingAltBuffers() )
  536. SetNextAltBuffer();
  537. unsigned hash = NormalizeWord( pwcInBuf, cwc );
  538. if ( UsingAltBuffers() )
  539. {
  540. SetAltHash( hash );
  541. ProcessAllWords();
  542. }
  543. else
  544. _noiseList.PutWord( hash );
  545. }
  546. //+---------------------------------------------------------------------------
  547. //
  548. // Member: CNormalizer::ProcessAllWords, private
  549. //
  550. // Synopsis: Removes duplicate alternate words and emits remainder.
  551. //
  552. // History: 17-Sep-1999 KyleP Created.
  553. //
  554. //----------------------------------------------------------------------------
  555. void CNormalizer::ProcessAllWords()
  556. {
  557. //
  558. // Check for duplicate keys. Since the number of alternate forms will always be
  559. // quite small it's ok to use a O(n^2) algorithm here.
  560. //
  561. unsigned iFinal = 0;
  562. for ( unsigned i = 0; i < _cAltKey; i++ )
  563. {
  564. //
  565. // Already marked duplicate?
  566. //
  567. if ( 0 == _aAltKey[i].Count() )
  568. continue;
  569. iFinal = i;
  570. for ( unsigned j = i+1; j < _cAltKey; j++ )
  571. {
  572. //
  573. // Remember, Pid is really the hash here.
  574. //
  575. if ( _aAltKey[i].Pid() == _aAltKey[j].Pid() &&
  576. _aAltKey[i].Count() == _aAltKey[j].Count() &&
  577. RtlEqualMemory( _aAltKey[i].GetBuf(), _aAltKey[j].GetBuf(), _aAltKey[j].Count() ) )
  578. {
  579. ciDebugOut(( DEB_TRACE, "Duplicate keys: %u and %u\n", i, j ));
  580. _aAltKey[j].SetCount( 0 );
  581. }
  582. }
  583. }
  584. //
  585. // Now transfer any remaining key(s).
  586. //
  587. SetWordBuffer();
  588. unsigned hash;
  589. for ( i = 0; i <= iFinal; i++ )
  590. {
  591. //
  592. // Ignore duplicates
  593. //
  594. if ( 0 == _aAltKey[i].Count() )
  595. continue;
  596. //
  597. // Copy to the transfer buffer.
  598. //
  599. *_pcbOutBuf = _aAltKey[i].Count();
  600. RtlCopyMemory( _pbOutBuf, _aAltKey[i].GetBuf(), *_pcbOutBuf );
  601. hash = _aAltKey[i].Pid();
  602. //
  603. // If this is not the final "PutWord" call, send the data along.
  604. //
  605. if ( i != iFinal )
  606. _noiseList.PutAltWord( hash );
  607. }
  608. //
  609. // Put the final word
  610. //
  611. _noiseList.PutWord( hash );
  612. } //ProcessAllWords
  613. //+---------------------------------------------------------------------------
  614. //
  615. // Member: CNormalizer::NormalizeWord
  616. //
  617. // Synopsis: Normalizes a UniCode string
  618. // Calculates the hash function for normalized string.
  619. //
  620. // Arguments: [pwcInBuf] -- input buffer
  621. // [cwc] -- count of chars in pwcInBuf
  622. //
  623. // Returns: unsigned hash value of string
  624. //
  625. // History: 03-May-95 SitaramR Created.
  626. //
  627. //----------------------------------------------------------------------------
  628. unsigned CNormalizer::NormalizeWord( WCHAR const *pwcInBuf, ULONG cwc )
  629. {
  630. return NormalizeWord( pwcInBuf, cwc, _pbOutBuf, _pcbOutBuf );
  631. }
  632. //+---------------------------------------------------------------------------
  633. //
  634. // Member: CNormalizer::NormalizeWord
  635. //
  636. // Synopsis: Normalizes a UniCode string
  637. // Calculates the hash function for normalized string. This
  638. // function is identical to the other NormalizeWord funtion,
  639. // except that it puts the outputs int he output parameters
  640. //
  641. // Arguments: [pwcInBuf] -- input buffer
  642. // [cwc] -- count of chars in pwcInBuf
  643. // [pbOutBuf] -- output buffer.
  644. // [pcbOutBuf] - pointer to output count of bytes.
  645. //
  646. // Returns: unsigned hash value of string
  647. //
  648. // History: 03-May-1995 SitaramR Created.
  649. // 03-Oct-2000 KitmanH Added output parameters
  650. //
  651. //----------------------------------------------------------------------------
  652. unsigned CNormalizer::NormalizeWord( WCHAR const *pwcInBuf,
  653. ULONG cwc,
  654. BYTE *pbOutBuf,
  655. unsigned *pcbOutBuf )
  656. {
  657. // count of bytes needs to take into account STRING_KEY
  658. *pcbOutBuf = cwc * sizeof(WCHAR) + cbKeyPrefix;
  659. // prefix with the string key identifier
  660. *pbOutBuf++ = STRING_KEY;
  661. unsigned hash = 0;
  662. Win4Assert ( cwc != 0 && cwc <= cwcMaxKey );
  663. for ( unsigned i = 0; i < cwc; i++ )
  664. {
  665. WCHAR c = *pwcInBuf++;
  666. // normalize the character to upcase.
  667. c = ( c < 'a' ) ? c : ( c <= 'z' ) ? ( c - ('a' - 'A') ) :
  668. RtlUpcaseUnicodeChar( c );
  669. //
  670. // Store. Do it one byte at a time because the normalized string
  671. // must be byte compared.
  672. //
  673. *pbOutBuf++ = (BYTE)(c >> 8);
  674. *pbOutBuf++ = (BYTE)c;
  675. // hash
  676. hash = ( hash << 2 ) + c;
  677. }
  678. return hash;
  679. }
  680. //+---------------------------------------------------------------------------
  681. //
  682. // Member: CNormalizer::NormalizeWstr - Public
  683. //
  684. // Synopsis: Normalizes a UniCode string
  685. //
  686. // Arguments: [pwcInBuf] -- input buffer
  687. // [cwcInBuf] -- count of chars in pwcInBuf
  688. // [pbOutBuf] -- output buffer.
  689. // [pcbOutBuf] - pointer to output count of bytes.
  690. //
  691. // History: 10-Feb-2000 KitmanH Created
  692. //
  693. //----------------------------------------------------------------------------
  694. void CNormalizer::NormalizeWStr( WCHAR const *pwcInBuf,
  695. ULONG cwcInBuf,
  696. BYTE *pbOutBuf,
  697. unsigned *pcbOutBuf )
  698. {
  699. NormalizeWord( pwcInBuf,
  700. cwcInBuf,
  701. pbOutBuf,
  702. pcbOutBuf );
  703. }
  704. //+---------------------------------------------------------------------------
  705. //
  706. // Member: CValueNormalizer::CValueNormalizer
  707. //
  708. // Synopsis: Constructor
  709. //
  710. // Arguments: [krep] -- key repository sink for keys
  711. //
  712. // History: 21-Sep-92 BartoszM Created.
  713. //
  714. //----------------------------------------------------------------------------
  715. CValueNormalizer::CValueNormalizer( PKeyRepository& krep )
  716. : _krep(krep)
  717. {
  718. _krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pOcc );
  719. _cbMaxOutBuf = *_pcbOutBuf;
  720. *_pOcc = 0;
  721. }
  722. //+---------------------------------------------------------------------------
  723. //
  724. // Member: CValueNormalizer::PutValue, public
  725. //
  726. // Synopsis: Store a variant
  727. //
  728. // Arguments: [pid] -- property id
  729. // [occ] -- On input: starting occurrence.
  730. // On output: next starting occurrence.
  731. // [var] -- value
  732. //
  733. // History: 04-Nov-94 KyleP Created.
  734. //
  735. //----------------------------------------------------------------------------
  736. void CValueNormalizer::PutValue( PROPID pid,
  737. OCCURRENCE & occ,
  738. CStorageVariant const & var )
  739. {
  740. *_pOcc = occ;
  741. switch ( var.Type() )
  742. {
  743. case VT_EMPTY:
  744. case VT_NULL:
  745. break;
  746. case VT_UI1:
  747. PutValue( pid, var.GetUI1() );
  748. break;
  749. case VT_I1:
  750. PutValue( pid, var.GetI1() );
  751. break;
  752. case VT_UI2:
  753. PutValue( pid, (USHORT) var.GetUI2() );
  754. break;
  755. case VT_I2:
  756. PutValue( pid, var.GetI2() );
  757. break;
  758. case VT_I4:
  759. case VT_INT:
  760. PutValue( pid, var.GetI4() );
  761. break;
  762. case VT_R4:
  763. PutValue( pid, var.GetR4() );
  764. break;
  765. case VT_R8:
  766. PutValue( pid, var.GetR8() );
  767. break;
  768. case VT_UI4:
  769. case VT_UINT:
  770. PutValue( pid, var.GetUI4() );
  771. break;
  772. case VT_I8:
  773. PutValue( pid, var.GetI8() );
  774. break;
  775. case VT_UI8:
  776. PutValue( pid, var.GetUI8() );
  777. break;
  778. case VT_BOOL:
  779. PutValue( pid, (BYTE) (FALSE != var.GetBOOL()) );
  780. break;
  781. case VT_ERROR:
  782. PutValue( pid, var.GetERROR() );
  783. break;
  784. case VT_CY:
  785. PutValue( pid, var.GetCY() );
  786. break;
  787. case VT_DATE:
  788. PutDate( pid, var.GetDATE() );
  789. break;
  790. case VT_FILETIME:
  791. PutValue( pid, var.GetFILETIME() );
  792. break;
  793. case VT_CLSID:
  794. PutValue( pid, *var.GetCLSID() );
  795. break;
  796. // NTRAID#DB-NTBUG9-84589-2000/07/31-dlee Indexing Service data type normalization doesn't handle VT_DECIMAL, VT_VECTOR, or VT_ARRAY.
  797. default:
  798. ciDebugOut(( DEB_IWARN, "Unhandled type %d (%x) sent to normalization\n",
  799. var.Type(), var.Type() ));
  800. break;
  801. }
  802. occ = *_pOcc;
  803. }
  804. //+---------------------------------------------------------------------------
  805. //
  806. // Member: CValueNormalizer::PutValue private
  807. //
  808. // Synopsis: Store a unsigned 2 byte value without altering it
  809. //
  810. // Arguments: [pid] -- property id
  811. // [uValue] -- value
  812. // [bType] -- value type
  813. //
  814. // History: 07-Oct-93 DwightKr Created.
  815. //
  816. // Notes: This is the principal PutValue method that other PutValue()s
  817. // will call. Each of the OTHER PutValue()'s sole purpose is
  818. // to normalize their input data into a 2-byte unsigned value.
  819. // This version of PutValue() will store the value together
  820. // with its WID, PID, size, etc. in the CDataRepository object.
  821. //
  822. //----------------------------------------------------------------------------
  823. void CValueNormalizer::PutValue( PROPID pid, unsigned uValue, BYTE bType )
  824. {
  825. BYTE* pb = _pbOutBuf;
  826. // Store size of entry
  827. *_pcbOutBuf = sizeof(USHORT) + sizeof(PROPID) + 1;
  828. // Store key type
  829. *pb++ = bType;
  830. // store property id
  831. *pb++ = (BYTE)(pid >> 24);
  832. *pb++ = (BYTE)(pid >> 16);
  833. *pb++ = (BYTE)(pid >> 8);
  834. *pb++ = (BYTE) pid;
  835. // Store key
  836. Win4Assert( uValue < 0x10000 );
  837. *pb++ = BYTE (uValue >> 8);
  838. *pb++ = BYTE (uValue);
  839. #if CIDBG == 1
  840. for (unsigned i = 0; i < *_pcbOutBuf; i++ )
  841. {
  842. ciDebugOut (( DEB_USER1 | DEB_NOCOMPNAME, "%02x ", _pbOutBuf[i] ));
  843. }
  844. ciDebugOut (( DEB_USER1 | DEB_NOCOMPNAME, "\n" ));
  845. #endif
  846. _krep.PutPropId(pid);
  847. _krep.PutKey();
  848. (*_pOcc)++;
  849. }
  850. void CValueNormalizer::PutMinValue( PROPID pid, OCCURRENCE & occ, VARENUM Type )
  851. {
  852. *_pOcc = occ;
  853. PutValue( pid, 0, Type );
  854. occ = *_pOcc;
  855. }
  856. void CValueNormalizer::PutMaxValue( PROPID pid, OCCURRENCE & occ, VARENUM Type )
  857. {
  858. *_pOcc = occ;
  859. PutValue( pid, 0xFFFF, Type );
  860. occ = *_pOcc;
  861. }
  862. //+---------------------------------------------------------------------------
  863. //
  864. // Member: CValueNormalizer::PutValue public
  865. //
  866. // Synopsis: Store a 1 byte value without altering it
  867. //
  868. // Arguments: [pid] -- property id
  869. // [byte] -- value
  870. //
  871. // History: 25-Oct-93 DwightKr Created.
  872. //
  873. // Notes: One byte values are NOT normalized, they are stored as is.
  874. //
  875. //----------------------------------------------------------------------------
  876. void CValueNormalizer::PutValue( PROPID pid, BYTE byte )
  877. {
  878. PutValue(pid, (unsigned) byte, VT_UI1);
  879. }
  880. //+---------------------------------------------------------------------------
  881. //
  882. // Member: CValueNormalizer::PutValue public
  883. //
  884. // Synopsis: Store a 1 byte signed value without altering it
  885. //
  886. // Arguments: [pid] -- property id
  887. // [ch] -- value
  888. //
  889. // History: 25-Oct-1993 DwightKr Created.
  890. // 29-Sep-2000 KitmanH Normalize VT_I1 values
  891. //
  892. //----------------------------------------------------------------------------
  893. void CValueNormalizer::PutValue( PROPID pid, CHAR ch )
  894. {
  895. PutValue(pid, ( ((BYTE) ch) + 0x80 ) & 0xFF, VT_I1);
  896. }
  897. //+---------------------------------------------------------------------------
  898. //
  899. // Member: CValueNormalizer::PutValue
  900. //
  901. // Synopsis: Store the high byte of an unsigned 2 byte value
  902. //
  903. // Arguments: [pid] -- property id
  904. // [usValue] -- value
  905. //
  906. // History: 07-Oct-93 DwightKr Created.
  907. //
  908. //----------------------------------------------------------------------------
  909. void CValueNormalizer::PutValue( PROPID pid, USHORT usValue )
  910. {
  911. PutValue(pid, (usValue >> 8) & 0xFF, VT_UI2);
  912. }
  913. //+---------------------------------------------------------------------------
  914. //
  915. // Member: CValueNormalizer::PutValue public
  916. //
  917. // Synopsis: Store the high byte of a signed 2 byte value.
  918. //
  919. // Arguments: [pid] -- property id
  920. // [sValue] -- value
  921. //
  922. // Notes: Add the smallest BYTE to this so that we translate numbers
  923. // into the range above 0. i.e. -32768 maps into 0x00, and 32767
  924. // maps into 0xFF.
  925. //
  926. // History: 07-Oct-93 DwightKr Created.
  927. //
  928. //----------------------------------------------------------------------------
  929. void CValueNormalizer::PutValue( PROPID pid, SHORT sValue )
  930. {
  931. PutValue(pid, ((sValue >> 8) + 0x80) & 0xFF, VT_I2);
  932. }
  933. //+---------------------------------------------------------------------------
  934. //
  935. // Member: CValueNormalizer::PutValue public
  936. //
  937. // Synopsis: Store the base-2 log of the ULONG value.
  938. //
  939. // Arguments: [pid] -- property id
  940. // [ulValue] -- value
  941. //
  942. // Notes: This convert ULONGs into the range 0 - 31 by taking the Log2
  943. // of the number.
  944. //
  945. // History: 07-Oct-93 DwightKr Created.
  946. //
  947. //----------------------------------------------------------------------------
  948. void CValueNormalizer::PutValue( PROPID pid, ULONG ulValue )
  949. {
  950. PutValue(pid, NormULong ( ulValue ), VT_UI4);
  951. }
  952. //+---------------------------------------------------------------------------
  953. //
  954. // Member: CValueNormalizer::PutValue
  955. //
  956. // Synopsis: Store the base-2 log of the signed LONG value.
  957. //
  958. // Arguments: [pid] -- property id
  959. // [lValue] -- value
  960. //
  961. // Notes: This converts LONGs into numbers larger than 0. This
  962. // translates into 64 bins; 32 bins for #'s < 0 & 32 bins for
  963. // #'s >= 0.
  964. //
  965. // History: 07-Oct-93 DwightKr Created.
  966. //
  967. //----------------------------------------------------------------------------
  968. void CValueNormalizer::PutValue( PROPID pid, LONG lValue )
  969. {
  970. PutValue(pid, NormLong(lValue), VT_I4);
  971. }
  972. //+---------------------------------------------------------------------------
  973. //
  974. // Member: CValueNormalizer::PutValue
  975. //
  976. // Synopsis: Store the base-10 log of the FLOAT value.
  977. //
  978. // Arguments: [pid] -- property id
  979. // [rValue] -- value
  980. //
  981. // Notes: floats fit into a total of 41 bins.
  982. //
  983. // History: 07-Oct-93 DwightKr Created.
  984. //
  985. //----------------------------------------------------------------------------
  986. void CValueNormalizer::PutValue( PROPID pid, float rValue )
  987. {
  988. PutValue(pid, NormDouble(rValue), VT_R4);
  989. }
  990. //+---------------------------------------------------------------------------
  991. //
  992. // Member: CValueNormalizer::PutValue
  993. //
  994. // Synopsis: Store the base-10 log of the DOUBLE value.
  995. //
  996. // Arguments: [pid] -- property id
  997. // [dValue] -- value
  998. //
  999. // Notes: doubles fit into a total of 41 bins.
  1000. //
  1001. // History: 07-Oct-93 DwightKr Created.
  1002. //
  1003. //----------------------------------------------------------------------------
  1004. void CValueNormalizer::PutValue( PROPID pid, double dValue )
  1005. {
  1006. PutValue(pid, NormDouble(dValue), VT_R8);
  1007. }
  1008. //+---------------------------------------------------------------------------
  1009. //
  1010. // Member: CValueNormalizer::PutValue
  1011. //
  1012. // Synopsis: Store the exponent of a large integer
  1013. //
  1014. // Arguments: [pid] -- property id
  1015. // [li] -- value
  1016. //
  1017. // History: 21-Sep-92 BartoszM Created.
  1018. // 04-Feb-93 KyleP Use LARGE_INTEGER
  1019. // 25-Oct-92 DwightKr Copied here & removed extra code &
  1020. // accounted for negative numbers
  1021. //
  1022. //----------------------------------------------------------------------------
  1023. void CValueNormalizer::PutValue( PROPID pid, LARGE_INTEGER liValue )
  1024. {
  1025. unsigned uExponent = NormLargeInteger(liValue);
  1026. PutValue( pid, uExponent, VT_I8);
  1027. }
  1028. //+---------------------------------------------------------------------------
  1029. //
  1030. // Member: CValueNormalizer::PutValue
  1031. //
  1032. // Synopsis: Store a compressed large integer
  1033. //
  1034. // Arguments: [pid] -- property id
  1035. // [uli] -- value
  1036. //
  1037. // History: 09 Feb 96 AlanW Created.
  1038. //
  1039. //----------------------------------------------------------------------------
  1040. void CValueNormalizer::PutValue( PROPID pid, ULARGE_INTEGER uliValue )
  1041. {
  1042. unsigned uExponent = NormULargeInteger(uliValue);
  1043. PutValue( pid, uExponent, VT_UI8);
  1044. }
  1045. //+---------------------------------------------------------------------------
  1046. //
  1047. // Member: CValueNormalizer::PutValue
  1048. //
  1049. // Synopsis: Store the least byte of a GUID
  1050. //
  1051. // Arguments: [pid] -- property id
  1052. // [guid] -- value
  1053. //
  1054. // Notes: The GUID generators are guaranteed to modify the TOP DWORD
  1055. // of the 32-byte GUID each time a new GUID is generated.
  1056. // The lower bytes of the GUID is the network address of the
  1057. // card which generated the UUID.
  1058. //
  1059. // We would like to cluster together together objects of a single
  1060. // class (all MS-Word objects together for example). Since it
  1061. // is possible that someone could generate UUIDs for more than
  1062. // one application on a single machine, the lower portion of
  1063. // the UUID will perhaps remain constant between class IDs. The
  1064. // only part of the UUID which is guaranteed to be unique between
  1065. // multiple objects is the field which represents time. It is
  1066. // unlikely that two classes were generated the same second on
  1067. // two different machines.
  1068. //
  1069. // History: 25-Oct-93 DwightKr Created.
  1070. //
  1071. //----------------------------------------------------------------------------
  1072. void CValueNormalizer::PutValue( PROPID pid, GUID const & Guid )
  1073. {
  1074. PutValue(pid, Guid.Data1 & 0xFFFF, VT_CLSID);
  1075. }
  1076. long CastToLong( double d )
  1077. {
  1078. //
  1079. // bit 63 = sign
  1080. // bits 52 - 62 = exponent
  1081. // bits 0 - 51 = mantissa
  1082. //
  1083. LARGE_INTEGER * pli = (LARGE_INTEGER *)&d;
  1084. int exp = (pli->HighPart & 0x7ff00000) >> 20;
  1085. if ( exp == 0 )
  1086. {
  1087. //
  1088. // Special case: Zero, NaNs, etc.
  1089. //
  1090. return( 0 );
  1091. }
  1092. //
  1093. // Subtract off bias
  1094. //
  1095. exp -= 0x3ff;
  1096. if ( exp < 0 )
  1097. {
  1098. // Cast of very small number to unsigned long. Loss of precision
  1099. return( 0 );
  1100. }
  1101. else if ( exp > 30 )
  1102. {
  1103. // Cast of very large number to unsigned long. Overflow
  1104. if ( pli->HighPart & 0x80000000 )
  1105. return( LONG_MIN );
  1106. else
  1107. return( LONG_MAX );
  1108. }
  1109. else
  1110. {
  1111. //
  1112. // We need to get the top 32 bits of the mantissa
  1113. // into a dword.
  1114. //
  1115. unsigned long temp = pli->LowPart >> (32 - 12);
  1116. temp |= pli->HighPart << (32 - 20);
  1117. //
  1118. // Add the 'hidden' bit of the mantissa. (Since all doubles
  1119. // are normalized to 1.?????? the highest 1 bit isn't stored)
  1120. //
  1121. temp = temp >> 1;
  1122. temp |= 0x80000000;
  1123. //
  1124. // Thow away digits to the right of decimal
  1125. //
  1126. temp = temp >> (31 - exp);
  1127. //
  1128. // Adjust for sign
  1129. //
  1130. Win4Assert( (temp & 0x80000000) == 0 );
  1131. long temp2;
  1132. if ( pli->HighPart & 0x80000000 )
  1133. temp2 = temp * -1;
  1134. else
  1135. temp2 = temp;
  1136. return( temp2 );
  1137. }
  1138. } //CastToLong
  1139. //+---------------------------------------------------------------------------
  1140. //
  1141. // Member: CValueNormalizer::PutDate
  1142. //
  1143. // Synopsis: Dates are passed in as the number of days (and fractional days)
  1144. // since Jan. 1, 1900. We'll crunch this down to the number of
  1145. // weeks. Dates are passed in a doubles. We'll assume that
  1146. // negative numbers represent dates before Jan. 1, 1900.
  1147. //
  1148. // Arguments: [pid] -- property id
  1149. // [DATE] -- value (double)
  1150. //
  1151. // Notes: Since dates before Jan 1, 1900 are passed as negative numbers
  1152. // we'll need to normalize them to something >= 0.
  1153. //
  1154. // time period resolution # bins
  1155. // =========================== =============== ======
  1156. // year < 10Bil BC -- bin = 0 1
  1157. // 10Bil BC <= year <= 1 BC -- log10 (year) 11
  1158. // 1 BC < year <= 1900 -- year 1902
  1159. // 1901 AD <= year <= 2050 AD -- daily 54787
  1160. // 2051 AD <= year <= 10Bil AD -- log10 (year) 8
  1161. // year > 10Bil AD -- bin = 0xFFFF 1
  1162. //
  1163. //
  1164. // I choose the daily range from 1901 - 2050 since there is a lot
  1165. // of events in the 20th century (WW I, WW II, landing on the
  1166. // moon, my wife's birthday, etc.) that are interesting, and
  1167. // imporant. It is likely that dates outside of this range will
  1168. // be rounded to the nearest year (1492, 1776, 1812, 1867, etc).
  1169. //
  1170. // Also by breaking the log10(year) at 1 BC rather than some other
  1171. // date (such as 0000 AD, or 1 AD) we avoid values in the range
  1172. // 1 BC < year < 1 AD, calculating log10(year) resulting in
  1173. // large negative numbers. Everything in this range should be in
  1174. // bin #12. It also avoids taking log10(0).
  1175. //
  1176. //
  1177. // History: 25-Oct-93 DwightKr Created.
  1178. // 07-Dec-94 KyleP Remove use of floating point
  1179. //
  1180. //----------------------------------------------------------------------------
  1181. void CValueNormalizer::PutDate( PROPID pid, DATE const & Date )
  1182. {
  1183. const int MinDate = 42; // 2^42 --> ~4.4E12 days --> ~12E9 years --> 12 billion B.C.
  1184. const int MinByYear = 20; // 2^20 --> ~1.0E6 days --> ~2.9E3 years --> 970 B.C.
  1185. const int cMinByYear = (1 << MinByYear) / 365 + 1; // 2873
  1186. const int MaxDaily = (2051 - 1900) * 365; // 55115
  1187. const int MinByYearAD = 15; // 2^15 --> ~32768 days --> ...
  1188. const int MaxDate = 42; // 2^42 --> ~4.4E12 days --> ~12E9 years --> 12 billion A.D.
  1189. const unsigned FirstBC = 0;
  1190. const unsigned FirstLogBC = FirstBC + 1;
  1191. const unsigned LastLogBC = FirstLogBC + MinDate - MinByYear;
  1192. const unsigned FirstYearBC = LastLogBC + 1;
  1193. const unsigned LastYearBC = FirstYearBC + cMinByYear;
  1194. const unsigned FirstDaily = LastYearBC + 1;
  1195. const unsigned LastDaily = FirstDaily + MaxDaily;
  1196. const unsigned FirstLogAD = LastDaily + 1;
  1197. const unsigned LastLogAD = FirstLogAD + MaxDate - MinByYearAD;
  1198. const unsigned LastAD = 0xFFFF;
  1199. Win4Assert( LastLogAD < 0xFFFF );
  1200. unsigned bin;
  1201. BOOL fPositive;
  1202. int exp = GetExpAndSign( Date, fPositive );
  1203. if ( !fPositive )
  1204. {
  1205. //
  1206. // Very large negative dates go in first bin
  1207. //
  1208. if ( exp >= MinDate )
  1209. bin = FirstBC;
  1210. //
  1211. // Medium size negative dates get 1 bin / power of 2
  1212. //
  1213. else if ( exp >= MinByYear )
  1214. bin = FirstLogBC - exp + MinByYear;
  1215. //
  1216. // All other dates before 1900 get 1 bucket per 365 days.
  1217. //
  1218. else
  1219. {
  1220. long cYears = CastToLong( Date ) / 365;
  1221. Win4Assert( cYears >= -cMinByYear && cYears <= 0 );
  1222. bin = FirstYearBC + cYears + cMinByYear;
  1223. }
  1224. }
  1225. else
  1226. {
  1227. //
  1228. // Very large positive dates go in last bin
  1229. //
  1230. if ( exp >= MaxDate )
  1231. bin = LastAD;
  1232. else
  1233. {
  1234. long cDays = CastToLong( Date );
  1235. //
  1236. // Dates rather far in the future get 1 bucket / power of 2
  1237. //
  1238. if ( cDays >= MaxDaily )
  1239. bin = FirstLogAD + exp - MinByYearAD;
  1240. //
  1241. // Days close to today get 1 bucket per day
  1242. //
  1243. else
  1244. bin = FirstDaily + cDays;
  1245. }
  1246. }
  1247. PutValue(pid, bin, VT_DATE);
  1248. } //PutDate
  1249. //+---------------------------------------------------------------------------
  1250. //
  1251. // Member: CValueNormalizer::PutValue
  1252. //
  1253. // Synopsis: Store the hashed value of an 8-byte currency.
  1254. //
  1255. // Arguments: [pid] -- property id
  1256. // [cyValue] -- value
  1257. //
  1258. // Notes: Currency values are stored as a ULONG cents, and a LONG $.
  1259. // We'll ignore the cents portion and store the $ part using
  1260. // the standard LONG storage method.
  1261. //
  1262. // History: 26-Oct-93 DwightKr Created.
  1263. //
  1264. //----------------------------------------------------------------------------
  1265. void CValueNormalizer::PutValue( PROPID pid, CURRENCY const & cyValue)
  1266. {
  1267. PutValue(pid, NormLong(cyValue.Hi), VT_CY);
  1268. }
  1269. //+---------------------------------------------------------------------------
  1270. //
  1271. // Member: CValueNormalizer::PutValue
  1272. //
  1273. // Synopsis: Store the number of days since Jan 1, 1980;
  1274. //
  1275. // Arguments: [pid] -- property id
  1276. // [ulValue] -- value
  1277. //
  1278. // History: 07-Oct-93 DwightKr Created.
  1279. //
  1280. // Notes: This algorithym calculates the number of days since Jan 1,
  1281. // 1980; and stores it into a unsigned. FileTimes are divided
  1282. // into the following ranges:
  1283. //
  1284. // FileTime < 1980 => bin 0
  1285. // 1980 <= FileTime <= 1993 week granularity => bins 1 - 729
  1286. // 1994 <= FileTime <= 2160 day granularity => bins 730+
  1287. // FileTime > 2160 => bin 0xFFFF
  1288. //
  1289. //----------------------------------------------------------------------------
  1290. void CValueNormalizer::PutValue( PROPID pid, FILETIME const & ftValue )
  1291. {
  1292. //
  1293. // Determine the number of days since Jan 1, 1601 by dividing by
  1294. // the number of 100 nanosecond intervals in a day. The result
  1295. // will fit into a ULONG.
  1296. //
  1297. // Then map the result into one of the ranges: before 1980, between
  1298. // 1980 and 1994, between 1994 and 2160, and after 2160. To make
  1299. // the computation easier, we use precomputed values of the number
  1300. // of days from 1601 and the breakpoints of our range.
  1301. //
  1302. // 100s of nanosecs per day
  1303. const ULONGLONG uliTicsPerDay = 24 * 60 * 60 * (ULONGLONG)10000000;
  1304. const ULONG ulStart = 138426; // number of days from 1601 to 1980
  1305. const ULONG ulMiddle= 143542; // number of days from 1601 to 1/2/1994
  1306. const ULONG ulEnd = 204535; // number of days from 1601 to 2161
  1307. ULARGE_INTEGER liValue = {ftValue.dwLowDateTime, ftValue.dwHighDateTime};
  1308. ULONG ulDays = (ULONG) (liValue.QuadPart / uliTicsPerDay);
  1309. //
  1310. // We now have the number of days since Jan. 01, 1601 in ulDays.
  1311. // Map into buckets.
  1312. //
  1313. if (ulDays < ulStart) // Store in bin 0
  1314. {
  1315. PutValue(pid, 0, VT_FILETIME);
  1316. }
  1317. else if (ulDays <= ulMiddle) // Store week granularity
  1318. {
  1319. PutValue(pid, (ulDays + 1 - ulStart) / 7, VT_FILETIME);
  1320. }
  1321. else if (ulDays <= ulEnd) // Store day granularity
  1322. {
  1323. //
  1324. // Bins 0 - 730 are used by the two clauses above. It doesn't
  1325. // really matter if we reuse bin 730 for the start of the next
  1326. // range (this might happen because of the division we do).
  1327. //
  1328. PutValue(pid, (ulDays + 1 - ulMiddle) + ((ulMiddle - ulStart) / 7),
  1329. VT_FILETIME);
  1330. }
  1331. else // FileTime > 2160
  1332. {
  1333. PutValue(pid, 0xFFFF, VT_FILETIME);
  1334. }
  1335. }