Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

516 lines
14 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. //
  5. // Copyright (C) Microsoft Corporation, 1997 - 1998
  6. //
  7. // File: marginals.cpp
  8. //
  9. //--------------------------------------------------------------------------
  10. //
  11. // marginals.cpp: Definitions for marginals tables
  12. //
  13. #include <basetsd.h>
  14. #include <math.h>
  15. #include "gmobj.h"
  16. #include "marginals.h"
  17. #include "algos.h"
  18. #include "parmio.h"
  19. #include "bndist.h"
  20. /*
  21. The marginalization story. Each MARGINALS structure maintains an array of node
  22. pointers representing the nodes whose discrete probabilities it covers. Since there
  23. was a total ordering over all nodes at clique time, any two node sets can be merged
  24. to determine which members are absent. Given, of course, that one table is a (possibly
  25. improper) subset of the other, which is always in a clique tree. There are three cases:
  26. * A node and its "parent" or "family" clique (the smallest clique containing it
  27. and all its parents); the clique must be at least as large as the node's family.
  28. * A sepset and its source (parent) clique; the sepset marginal must be a proper
  29. subset of the clique.
  30. * A sepset and its sink (child) clique; same as the other sepset case above.
  31. So we always know which of the two sets is the superset.
  32. There's the question of node ordering. When the edge between a node and its "family"
  33. clique is created, a reordering table is computed based upon the clique-time total ordering.
  34. This table gives the family indicies in clique order. (Note that the node itself will
  35. always be the last member of its family.) Use of this table allows full marginalization
  36. of the family clique.
  37. (Hereafter, "CMARG" is the clique MARGINALS table; "NDPROB" is the table of probabilities
  38. for the node in question.)
  39. The CMARG has a complete set of dimensions and node pointers.
  40. Marginalization of a node given its parent clique works as follows.
  41. 1) Make a copy of CMARG's table of dimensions (Vimd()).
  42. 2) Create a one-dimensional MDVCPD based on the state space of the
  43. target node.
  44. 3) Walk the MARGINALS VPGNODEMBN array. Change the sign of each entry
  45. which IS NOT the target node. For example, if the array is:
  46. Node Pointer VIMD
  47. 0x4030ab30 3
  48. 0x4030ab52 2
  49. 0x4030ac10 4
  50. and the node pointer is 0x4030ab52 (entry #2), the resulting
  51. VIMD should be
  52. -3
  53. 2
  54. -4
  55. 4) Then set up an MDVSLICE for the new MDVCPD which uses the
  56. special "pseudo-dimension" VIMD created in the last step.
  57. 5) Create two iterators: one for the MARGINALS table in its entirety,
  58. the other for the temporary MDVCPD and MDVSLICE create in the last step.
  59. 6) Iterate over the two, adding elements from the MARGINALS into
  60. the MDVCPD.
  61. 7) Normalize if necessary.
  62. */
  63. //////////////////////////////////////////////////////////////////////
  64. //
  65. // Helper functions
  66. //
  67. //////////////////////////////////////////////////////////////////////
  68. // Reorder a single m-d vector subscript array. 'vimdReorder' is the
  69. // table in MARGINALS (topological) sequence of the original dimensions.
  70. inline
  71. void MARGINALS :: ReorderVimd (
  72. const VIMD & vimdReorder, // Reordering array
  73. const VIMD & vimdIn, // Original subscript vector
  74. VIMD & vimdOut ) // Result: must be properly sized already!
  75. {
  76. int cDim = vimdReorder.size();
  77. assert( vimdIn.size() == cDim && vimdOut.size() == cDim );
  78. for ( int iDim = 0; iDim < cDim; iDim++ )
  79. {
  80. int iDimReord = vimdReorder[iDim];
  81. assert( iDimReord >= 0 && iDimReord < cDim );
  82. vimdOut[iDim] = vimdIn[iDimReord];
  83. }
  84. }
  85. // Reorder an array containing a node's family based upon the reordering
  86. // table given.
  87. inline
  88. void MARGINALS :: ReorderVimdNodes (
  89. const VIMD & vimdReorder, // Reordering array
  90. GNODEMBND * pgndd, // Discrete node to provide reorder for
  91. VPGNODEMBN & vpgnd ) // Result
  92. {
  93. VPGNODEMBN vpgndUnord;
  94. pgndd->GetFamily( vpgndUnord );
  95. int cDim = vimdReorder.size();
  96. assert( cDim == vpgndUnord.size() );
  97. vpgnd.resize( cDim );
  98. for ( int iDim = 0; iDim < cDim; iDim++ )
  99. {
  100. int iDimReord = vimdReorder[iDim];
  101. assert( iDimReord >= 0 && iDimReord < cDim );
  102. vpgnd[iDim] = vpgndUnord[iDimReord];
  103. }
  104. }
  105. inline
  106. static
  107. int vimdProd ( const VIMD & vimd )
  108. {
  109. int iprod = 1;
  110. for ( int i = 0; i < vimd.size() ; )
  111. {
  112. iprod *= vimd[i++];
  113. }
  114. return iprod;
  115. }
  116. inline
  117. static
  118. bool bIsProb ( const REAL & r )
  119. {
  120. return r >= 0.0 && r <= 1.0;
  121. }
  122. // Centralized "throw serious error" point
  123. void MARGINALS :: ThrowMisuse ( SZC szcMsg )
  124. {
  125. THROW_ASSERT( EC_MDVECT_MISUSE, szcMsg );
  126. }
  127. // Return the table of pseudo-dimensions for marginalizing to a single node
  128. VSIMD MARGINALS :: VsimdFromNode ( GNODEMBND * pgndd )
  129. {
  130. // Build the pseudo-dimension descriptor
  131. VIMD vimdMarg = VimdDim();
  132. VSIMD vsimdMarg( vimdMarg.size() );
  133. bool bFound = false;
  134. for ( int idim = 0; idim < vimdMarg.size(); idim++ )
  135. {
  136. SIMD simd = vimdMarg[idim];
  137. if ( pgndd != _vpgnd[idim] )
  138. simd = -simd; // Negate the missing dimension
  139. else
  140. {
  141. assert( ! bFound ); // Better not be in the list twice!
  142. bFound = true;
  143. }
  144. vsimdMarg[idim] = simd;
  145. }
  146. if ( ! bFound )
  147. ThrowMisuse( "attempt to marginalize non-member node");
  148. return vsimdMarg;
  149. }
  150. // Marginalize down to a single node
  151. void MARGINALS :: Marginalize ( GNODEMBND * pgndd, MDVCPD & distd )
  152. {
  153. // Initialize and clear the UPD
  154. ResizeDistribution( pgndd, distd );
  155. distd.Clear();
  156. // Get the pseudo-dimension descriptor for this node
  157. VSIMD vsimdMarg = VsimdFromNode( pgndd );
  158. // Construct the slice which governs the missing dimensions
  159. MDVSLICE mdvs( vsimdMarg );
  160. Iterator itSelf( self );
  161. Iterator itSubset( distd, mdvs );
  162. while ( itSelf.BNext() )
  163. {
  164. itSubset.Next() += itSelf.Next();
  165. }
  166. distd.Normalize();
  167. }
  168. VSIMD MARGINALS :: VsimdSubset ( const VPGNODEMBN & vpgndSubset )
  169. {
  170. // Build the pseudo-dimension descriptor. This means to walk
  171. // a copy of self's dimension array, negating dimensions which
  172. // are not present in the result.
  173. VIMD vimdMarg = VimdDim();
  174. int idimSubset = 0;
  175. VSIMD vsimdMarg(vimdMarg.size());
  176. // Iterate over each node in the self set
  177. for ( int idimSelf = 0;
  178. idimSelf < vimdMarg.size();
  179. idimSelf++ )
  180. {
  181. SIMD simd = vimdMarg[idimSelf];
  182. if ( idimSubset < vpgndSubset.size()
  183. && _vpgnd[idimSelf] == vpgndSubset[idimSubset] )
  184. {
  185. // Found; leave dimension alone
  186. idimSubset++;
  187. }
  188. else
  189. {
  190. // Missing; mark as "pseudo-dimension"
  191. simd = - simd;
  192. }
  193. vsimdMarg[idimSelf] = simd;
  194. }
  195. if ( idimSubset != vpgndSubset.size() )
  196. ThrowMisuse( "attempt to marginalize non-member node");
  197. return vsimdMarg;
  198. }
  199. // Marginalize down to a subset of our node set. Note that the
  200. // the nodes must be in the same order (with gaps, of course, in the
  201. // subset).
  202. void MARGINALS :: Marginalize (
  203. const VPGNODEMBN & vpgndSubset, // Subset array of nodes
  204. MARGINALS & margSubset ) // Marginalized result structure
  205. {
  206. // Initialize the result mdv
  207. margSubset.Init( vpgndSubset );
  208. // Call the common code
  209. Marginalize( margSubset );
  210. }
  211. // Marginalize down to a subset of our node set using the other
  212. // marginal's built-in table of nodes
  213. void MARGINALS :: Marginalize ( MARGINALS & margSubset )
  214. {
  215. // Build the pseudo-dimension descriptor.
  216. VSIMD vsimdMarg = VsimdSubset( margSubset.Vpgnd() );
  217. // Construct the slice which governs the missing dimensions
  218. MDVSLICE mdvs( vsimdMarg );
  219. Iterator itSelf( self );
  220. Iterator itSubset( margSubset, mdvs );
  221. Marginalize( margSubset, itSelf, itSubset );
  222. }
  223. void MARGINALS :: Marginalize (
  224. MARGINALS & margSubset,
  225. Iterator & itSelf,
  226. Iterator & itSubset )
  227. {
  228. margSubset.Clear();
  229. itSelf.Reset();
  230. itSubset.Reset();
  231. while ( itSelf.BNext() )
  232. {
  233. itSubset.Next() += itSelf.Next();
  234. }
  235. }
  236. // For "absorption", update one sepset marginal from another
  237. void MARGINALS :: UpdateRatios ( const MARGINALS & marg )
  238. {
  239. int cElem = size();
  240. if ( cElem != marg.size() )
  241. ThrowMisuse( "updating ratios requires same sized marginals" );
  242. for ( int i = 0; i < cElem; i++ )
  243. {
  244. REAL & rThis = self[i];
  245. if ( rThis != 0.0 )
  246. rThis = marg[i] / rThis;
  247. }
  248. }
  249. // Given a reorder table, return true if it's moot (no reordering present)
  250. bool MARGINALS :: BOrdered ( const VIMD & vimdReorder )
  251. {
  252. for ( int i = 0; i < vimdReorder.size(); i++ )
  253. {
  254. if ( vimdReorder[i] != i )
  255. return false;
  256. }
  257. return true;
  258. }
  259. // Assuming that the fastest-changing (highest) dimension is the base
  260. // state space, set the probabilities of this table to uniform.
  261. void MARGINALS :: SetUniform ()
  262. {
  263. const VIMD & vimdDim = VimdDim();
  264. int cState = vimdDim[ vimdDim.size() - 1 ];
  265. REAL rUniform = 1.0 / cState;
  266. Clear( rUniform );
  267. }
  268. // Construct the complete table of conditional probabilities for a given node
  269. // given a reordering table. The reordering table is maintained as part of
  270. // the clique membership arc (GEDGEMBN_CLIQ) for a node if the clique is
  271. // the "family" clique (the smallest clique containing node and its parents).
  272. //
  273. // At exit, the node pointer table of self is complete and in standard order.
  274. //
  275. // The "family reorder" vector is in clique order and contains the index
  276. // of the node's parents which occurs in that position. Note that the
  277. // node itself is always last in either ordering. In its own p-table,
  278. // its states are the fastest varying subcript. In the clique, it must
  279. // fall last in any marginalization containing only itself and its parents
  280. // due to the topological sorting employed in ordering nodes for clique
  281. // membership.
  282. void MARGINALS :: CreateOrderedCPDFromNode (
  283. GNODEMBND * pgndd,
  284. const VIMD & vimdFamilyReorder )
  285. {
  286. int cFam = vimdFamilyReorder.size();
  287. // Access the distribution in the node
  288. BNDIST & bndist = pgndd->Bndist();
  289. const VIMD & vimdDist = bndist.VimdDim();
  290. assert( vimdDist.size() == cFam );
  291. // Create this m-d vector's dimension table by reordering the
  292. // array of dimensions of the node's distribution and
  293. // initializing accordingly.
  294. VIMD vimd( cFam );
  295. ReorderVimd( vimdFamilyReorder, vimdDist, vimd );
  296. ReorderVimdNodes( vimdFamilyReorder, pgndd, _vpgnd );
  297. assert( _vpgnd.size() == cFam );
  298. assert( ifind( _vpgnd, pgndd ) >= 0 );
  299. Init( vimd );
  300. assert( vimdProd( vimdDist ) == size() );
  301. if ( bndist.BDense() )
  302. {
  303. // Dense distribution
  304. // Create the reordering iterator
  305. Iterator itNode( bndist.Mdvcpd() );
  306. if ( ! BOrdered( vimdFamilyReorder ) )
  307. itNode.SetDimReorder( vimdFamilyReorder );
  308. Iterator itSelf( self );
  309. while ( itSelf.BNext() )
  310. {
  311. itSelf.Next() = itNode.Next();
  312. }
  313. }
  314. else
  315. {
  316. // Sparse distribution. Iterate over all elements
  317. // and plop them into their proper locations. Since
  318. // there may be missing elements, set everything to
  319. // uniform first, and normalize as we go.
  320. SetUniform();
  321. VIMD vimdState( cFam );
  322. int cPar = cFam - 1;
  323. int cState = VimdDim()[cPar];
  324. // Prepare a value to be used to replace any bogus (n/a) values in the nodes.
  325. REAL rUniform = 1.0 / cState;
  326. MPCPDD::const_iterator itdmEnd = bndist.Mpcpdd().end();
  327. for ( MPCPDD::const_iterator itdm = bndist.Mpcpdd().begin();
  328. itdm != itdmEnd;
  329. itdm++ )
  330. {
  331. const VIMD & vimdIndex = (*itdm).first;
  332. const VLREAL & vlr = (*itdm).second;
  333. // Construct a complete subscript vector; first, the parents
  334. for ( int iDim = 0; iDim < cPar; iDim++ )
  335. vimdState[iDim] = vimdIndex[iDim];
  336. // Then iterate over each element of the DPI state vector
  337. vimdState[cPar] = 0;
  338. ReorderVimd( vimdFamilyReorder, vimdState, vimd );
  339. for ( int iState = 0; iState < cState; iState++ )
  340. {
  341. vimd[cPar] = iState;
  342. const REAL & r = vlr[iState];
  343. self[vimd] = bIsProb( r )
  344. ? r
  345. : rUniform;
  346. }
  347. }
  348. }
  349. }
  350. // Multiply corresponding entries in this marginal by those in another
  351. void MARGINALS :: MultiplyBySubset ( const MARGINALS & marg )
  352. {
  353. //MSRDEVBUG: create a const version of MDVDENSE::Iterator
  354. MARGINALS & margSubset = const_cast<MARGINALS &> (marg);
  355. // Build the pseudo-dimension descriptor.
  356. VSIMD vsimdMarg = VsimdSubset( margSubset.Vpgnd() );
  357. // Construct the slice which governs the missing dimensions
  358. MDVSLICE mdvs( vsimdMarg );
  359. // Construct the iterators for self and subset with missing dimensions
  360. Iterator itSelf( self );
  361. Iterator itSubset( margSubset, mdvs );
  362. MultiplyBySubset( itSelf, itSubset );
  363. }
  364. // Multiply corresponding entries using precomputed iterators
  365. void MARGINALS :: MultiplyBySubset (
  366. Iterator & itSelf,
  367. Iterator & itSubset )
  368. {
  369. itSelf.Reset();
  370. itSubset.Reset();
  371. while ( itSelf.BNext() )
  372. {
  373. itSelf.Next() *= itSubset.Next();
  374. }
  375. }
  376. void MARGINALS :: Multiply ( REAL r )
  377. {
  378. for ( int i = 0; i < size(); )
  379. {
  380. self[i++] *= r;
  381. }
  382. }
  383. void MARGINALS :: Invert ()
  384. {
  385. for ( int i = 0; i < size(); i++ )
  386. {
  387. REAL & r = self[i];
  388. if ( r != 0.0 )
  389. r = 1.0 / r;
  390. }
  391. }
  392. void MARGINALS :: ClampNode ( GNODEMBND * pgndd, const CLAMP & clamp )
  393. {
  394. if (! clamp.BActive() )
  395. return ;
  396. // Get the clamped state
  397. IST ist = clamp.Ist();
  398. // Find which dimension is represented by this node
  399. int iDim = ifind( _vpgnd, pgndd );
  400. if ( iDim < 0
  401. || ist >= Vimd()[iDim] )
  402. ThrowMisuse("invalid clamp");
  403. // Iterate over the entire table, zapping states which are inconsistent
  404. // with the evidence.
  405. Iterator itSelf( self );
  406. for ( int i = 0; itSelf.BNext(); i++ )
  407. {
  408. int iIst = itSelf.Vitmd()[iDim];
  409. if ( iIst != ist )
  410. itSelf.Next() = 0.0;
  411. else
  412. itSelf.IndxUpd();
  413. }
  414. assert( i == size() );
  415. }
  416. void MARGINALS :: Dump()
  417. {
  418. cout << "\n\tMarginals members: "
  419. << (const VPGNODEMBN &)_vpgnd // MSRDEVBUG: cast unnecessary for VC++ 5.0
  420. << "\n\t";
  421. Iterator itSelf(self);
  422. cout << itSelf;
  423. }
  424. // Return true if each entry in this marginal is equal the corresponding entry
  425. // in a like-dimensioned other marginal within the stated tolerance
  426. bool MARGINALS :: BEquivalent ( const MARGINALS & marg, REAL rTolerance )
  427. {
  428. // Test dimensionality
  429. if ( VimdDim() != marg.VimdDim() )
  430. return false;
  431. const VLREAL & vrSelf = first;
  432. const VLREAL & vrOther = marg.first;
  433. REAL rTol = fabs(rTolerance);
  434. for ( int i = 0; i < vrSelf.size(); i++ )
  435. {
  436. const REAL & rSelf = vrSelf[i];
  437. const REAL & rOther = vrOther[i];
  438. REAL rdiff = fabs(rSelf) - fabs(rOther);
  439. if ( fabs(rdiff) > rTol )
  440. break;
  441. }
  442. return i == vrSelf.size() && i == vrOther.size();
  443. }