/****************************************************************************** Copyright (c) 1999 Microsoft Corporation Module Name: MergedHHK.cpp Abstract: This file contains the implementation of the classes used to parse and process HHK files. Revision History: Davide Massarenti (Dmassare) 12/18/99 created ******************************************************************************/ #include #include //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// #ifdef DEBUG #define DEBUG_REGKEY HC_REGISTRY_HELPSVC L"\\Debug" #define DEBUG_DUMPHHK L"DUMPHHK" static bool m_fInitialized = false; static bool m_fDumpHHK = false; static void Local_ReadDebugSettings() { __HCP_FUNC_ENTRY( "Local_ReadDebugSettings" ); HRESULT hr; MPC::RegKey rkBase; bool fFound; if(m_fInitialized) __MPC_FUNC_LEAVE; __MPC_EXIT_IF_METHOD_FAILS(hr, rkBase.SetRoot( HKEY_LOCAL_MACHINE )); __MPC_EXIT_IF_METHOD_FAILS(hr, rkBase.Attach ( DEBUG_REGKEY )); __MPC_EXIT_IF_METHOD_FAILS(hr, rkBase.Exists ( fFound )); if(fFound) { CComVariant vValue; __MPC_EXIT_IF_METHOD_FAILS(hr, rkBase.get_Value( vValue, fFound, DEBUG_DUMPHHK )); if(fFound && vValue.vt == VT_I4) { m_fDumpHHK = vValue.lVal ? true : false; } } m_fInitialized = true; __HCP_FUNC_CLEANUP; } static void Local_DumpStream( /*[in]*/ LPCWSTR szFile, /*[in]*/ IStream* streamIN, /*[in]*/ HRESULT hrIN ) { __HCP_FUNC_ENTRY( "Local_DumpStream" ); static int iSeq = 0; HRESULT hr; CComPtr streamOUT; Local_ReadDebugSettings(); if(m_fDumpHHK) { USES_CONVERSION; WCHAR rgBuf [MAX_PATH]; CHAR rgBuf2[ 64]; ULARGE_INTEGER liWritten; StringCchPrintfW( rgBuf, ARRAYSIZE(rgBuf), L"C:\\TMP\\dump_%d.hhk", iSeq++ ); StringCchPrintfA( rgBuf2, ARRAYSIZE(rgBuf2), "%s\n" , W2A( szFile ) ); __MPC_EXIT_IF_METHOD_FAILS(hr, MPC::CreateInstance( &streamOUT )); __MPC_EXIT_IF_METHOD_FAILS(hr, streamOUT->InitForWrite( rgBuf )); streamOUT->Write( rgBuf2, strlen(rgBuf2), &liWritten.LowPart ); if(SUCCEEDED(hrIN) && streamIN) { STATSTG statstg; LARGE_INTEGER li; ULARGE_INTEGER liRead; streamIN->Stat( &statstg, STATFLAG_NONAME ); streamIN->CopyTo( streamOUT, statstg.cbSize, &liRead, &liWritten ); li.LowPart = 0; li.HighPart = 0; streamIN->Seek( li, STREAM_SEEK_SET, NULL ); } } __HCP_FUNC_CLEANUP; } #endif //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // ENTITY TABLE SWIPED FROM IE static const struct { const char* szName; int ch; } rgEntities[] = { "AElig", '\306', // capital AE diphthong (ligature) "Aacute", '\301', // capital A, acute accent "Acirc", '\302', // capital A, circumflex accent "Agrave", '\300', // capital A, grave accent "Aring", '\305', // capital A, ring "Atilde", '\303', // capital A, tilde "Auml", '\304', // capital A, dieresis or umlaut mark "Ccedil", '\307', // capital C, cedilla "Dstrok", '\320', // capital Eth, Icelandic "ETH", '\320', // capital Eth, Icelandic "Eacute", '\311', // capital E, acute accent "Ecirc", '\312', // capital E, circumflex accent "Egrave", '\310', // capital E, grave accent "Euml", '\313', // capital E, dieresis or umlaut mark "Iacute", '\315', // capital I, acute accent "Icirc", '\316', // capital I, circumflex accent "Igrave", '\314', // capital I, grave accent "Iuml", '\317', // capital I, dieresis or umlaut mark "Ntilde", '\321', // capital N, tilde "Oacute", '\323', // capital O, acute accent "Ocirc", '\324', // capital O, circumflex accent "Ograve", '\322', // capital O, grave accent "Oslash", '\330', // capital O, slash "Otilde", '\325', // capital O, tilde "Ouml", '\326', // capital O, dieresis or umlaut mark "THORN", '\336', // capital THORN, Icelandic "Uacute", '\332', // capital U, acute accent "Ucirc", '\333', // capital U, circumflex accent "Ugrave", '\331', // capital U, grave accent "Uuml", '\334', // capital U, dieresis or umlaut mark "Yacute", '\335', // capital Y, acute accent "aacute", '\341', // small a, acute accent "acirc", '\342', // small a, circumflex accent "acute", '\264', // acute accent "aelig", '\346', // small ae diphthong (ligature) "agrave", '\340', // small a, grave accent "amp", '\046', // ampersand "aring", '\345', // small a, ring "atilde", '\343', // small a, tilde "auml", '\344', // small a, dieresis or umlaut mark "brkbar", '\246', // broken vertical bar "brvbar", '\246', // broken vertical bar "ccedil", '\347', // small c, cedilla "cedil", '\270', // cedilla "cent", '\242', // small c, cent "copy", '\251', // copyright symbol (proposed 2.0) "curren", '\244', // currency symbol "deg", '\260', // degree sign "die", '\250', // umlaut (dieresis) "divide", '\367', // divide sign "eacute", '\351', // small e, acute accent "ecirc", '\352', // small e, circumflex accent "egrave", '\350', // small e, grave accent "eth", '\360', // small eth, Icelandic "euml", '\353', // small e, dieresis or umlaut mark "frac12", '\275', // fraction 1/2 "frac14", '\274', // fraction 1/4 "frac34", '\276', // fraction 3/4*/ "gt", '\076', // greater than "hibar", '\257', // macron accent "iacute", '\355', // small i, acute accent "icirc", '\356', // small i, circumflex accent "iexcl", '\241', // inverted exclamation "igrave", '\354', // small i, grave accent "iquest", '\277', // inverted question mark "iuml", '\357', // small i, dieresis or umlaut mark "laquo", '\253', // left angle quote "lt", '\074', // less than "macr", '\257', // macron accent "micro", '\265', // micro sign "middot", '\267', // middle dot "nbsp", '\240', // non-breaking space (proposed 2.0) "not", '\254', // not sign "ntilde", '\361', // small n, tilde "oacute", '\363', // small o, acute accent "ocirc", '\364', // small o, circumflex accent "ograve", '\362', // small o, grave accent "ordf", '\252', // feminine ordinal "ordm", '\272', // masculine ordinal "oslash", '\370', // small o, slash "otilde", '\365', // small o, tilde "ouml", '\366', // small o, dieresis or umlaut mark "para", '\266', // paragraph sign "plusmn", '\261', // plus minus "pound", '\243', // pound sterling "quot", '"', // double quote "raquo", '\273', // right angle quote "reg", '\256', // registered trademark (proposed 2.0) "sect", '\247', // section sign "shy", '\255', // soft hyphen (proposed 2.0) "sup1", '\271', // superscript 1 "sup2", '\262', // superscript 2 "sup3", '\263', // superscript 3 "szlig", '\337', // small sharp s, German (sz ligature) "thorn", '\376', // small thorn, Icelandic "times", '\327', // times sign "trade", '\231', // trademark sign "uacute", '\372', // small u, acute accent "ucirc", '\373', // small u, circumflex accent "ugrave", '\371', // small u, grave accent "uml", '\250', // umlaut (dieresis) "uuml", '\374', // small u, dieresis or umlaut mark "yacute", '\375', // small y, acute accent "yen", '\245', // yen "yuml", '\377', // small y, dieresis or umlaut mark 0, 0 }; static BOOL ReplaceEscapes( PCSTR pszSrc, PSTR pszDst, DWORD cchDst) { if(StrChrA( pszSrc, '&' ) == NULL) { // If we get here, there are no escape sequences, so copy the string and return. if(pszDst != pszSrc) { StringCchCopyA( pszDst, cchDst, pszSrc ); } return FALSE; // nothing changed } PSTR pszDstBase = pszDst; while(*pszSrc && (pszDst - pszDstBase < cchDst)) { if(IsDBCSLeadByte(*pszSrc)) { if(pszSrc[1]) { *pszDst++ = *pszSrc++; *pszDst++ = *pszSrc++; } else { // leadbyte followed by 0; invalid! *pszDst++ = '?'; break; } } else if(*pszSrc == '&') { pszSrc++; if(*pszSrc == '#') { // SGML/HTML character entity (decimal) pszSrc++; for(int val = 0; *pszSrc && *pszSrc != ';'; pszSrc++) { if(*pszSrc >= '0' && *pszSrc <= '9') { val = val * 10 + *pszSrc - '0'; } else { while(*pszSrc && *pszSrc != ';') { pszSrc++; } break; } } if(val) { *pszDst++ = (char)val; } } else if(*pszSrc) { char szEntityName[256]; int count = 0; for(PSTR p = szEntityName; *pszSrc && *pszSrc != ';' && *pszSrc != ' ' && count < sizeof(szEntityName);) { *p++ = *pszSrc++; count++; } *p = 0; if(*pszSrc == ';') pszSrc++; for(int i = 0; rgEntities[i].szName; i++) { if(!strcmp(szEntityName, rgEntities[i].szName)) { if(rgEntities[i].ch) { *pszDst++ = (char)rgEntities[i].ch; } break; } } if(!rgEntities[i].szName) { // illegal entity name, put in a block character *pszDst++ = '?'; } } } else { // just your usual character... *pszDst++ = *pszSrc++; } } *pszDst = 0; return TRUE; } static void ReplaceCharactersWithEntity( /*[out]*/ MPC::string& strValue , /*[out]*/ MPC::string& strBuffer ) { LPCSTR szToEscape = strValue.c_str(); CHAR ch; strBuffer.erase(); while((ch = *szToEscape++)) { switch(ch) { case '&': strBuffer += "&" ; break; case '"': strBuffer += """; break; case '<': strBuffer += "<" ; break; case '>': strBuffer += ">" ; break; default: strBuffer += ch ; break; } } strValue = strBuffer; } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// static const char txtBeginList [] = "UL>"; static const char txtEndList [] = "/UL>"; static const char txtBeginListItem [] = "LI"; static const char txtBeginObject [] = "OBJECT"; static const char txtEndObject [] = "/OBJECT"; static const char txtParam [] = "param name"; static const char txtValue [] = "value"; static const char txtParamKeyword [] = "Keyword"; static const char txtParamName [] = "Name"; static const char txtParamSeeAlso [] = "See Also"; static const char txtParamLocal [] = "Local"; static const char txtType [] = "type"; static const char txtSiteMapObject [] = "text/sitemap"; ///////////////////////////////////////////////////////////////////////////// static const char txtHeader[] = "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\t\n" \ "\n" \ "
    \n"; static const char txtTail[] = "
\n" \ "\n"; ///////////////////////////////////////////////////////////////////////////// static const char txtIndent [] = "\t"; static const char txtNewSection_Open [] = "\t
  • \n"; static const char txtNewSection_Close [] = "\t\t\n"; static const char txtNewSubSection_Open [] = "\t
      \n"; static const char txtNewSubSection_Close[] = "\t
    \n"; static const char txtNewParam_Name [] = "\t\t\n"; static const char txtIndexFirstLevel [] = "hcp://system/errors/indexfirstlevel.htm"; ///////////////////////////////////////////////////////////////////////////// BOOL HHK::Reader::s_fDBCSSystem = (BOOL)::GetSystemMetrics( SM_DBCSENABLED ); LCID HHK::Reader::s_lcidSystem = ::GetUserDefaultLCID(); ///////////////////////////////////////////////////////////////////////////// static void ConvertToAnsi( MPC::string& strANSI, const MPC::wstring& strUNICODE ) { USES_CONVERSION; strANSI = W2A( strUNICODE.c_str() ); } //////////////////////////////////////////////////////////////////////////////// void HHK::Entry::MergeURLs( const HHK::Entry& entry ) { Entry::UrlIterConst itUrlNew; Entry::UrlIter itUrlOld; // // Just copy unique URLs. // for(itUrlNew = entry.m_lstUrl.begin(); itUrlNew != entry.m_lstUrl.end(); itUrlNew++) { bool fInsert = true; for(itUrlOld = m_lstUrl.begin(); itUrlOld != m_lstUrl.end(); itUrlOld++) { int res = Reader::StrColl( (*itUrlOld).c_str(), (*itUrlNew).c_str() ); if(res == 0) { // Same URL, skip it. fInsert = false; break; } if(res > 0) { // // Old > New, insert New before Old. // break; } } // // If fInsert is set, we need to insert "New" just before "Old". // // This work also in the case itUrlOld == end(). // if(fInsert) m_lstUrl.insert( itUrlOld, *itUrlNew ); } } HHK::Section::Section() { } HHK::Section::~Section() { MPC::CallDestructorForAll( m_lstSeeAlso ); } void HHK::Section::MergeURLs( const Entry& entry ) { Section::EntryIter itEntry; bool fInsert = true; for(itEntry = m_lstEntries.begin(); itEntry != m_lstEntries.end(); itEntry++) { Entry& entryOld = *itEntry; int res = Reader::StrColl( entryOld.m_strTitle.c_str(), entry.m_strTitle.c_str() ); if(res == 0) { // Same title, just merge the URLs. entryOld.MergeURLs( entry ); fInsert = false; break; } if(res > 0) { // // Old > New, insert New before Old. // break; } } // // Make a copy, insert it at the right position... // if(fInsert) m_lstEntries.insert( itEntry, entry ); } void HHK::Section::MergeSeeAlso( const Section& sec ) { Section::SectionIterConst itSec; Section::SectionIter itSecOld; Section* subsec; Section* subsecOld; int res; for(itSec = sec.m_lstSeeAlso.begin(); itSec != sec.m_lstSeeAlso.end(); itSec++) { bool fInsert = true; subsec = *itSec; for(itSecOld = m_lstSeeAlso.begin(); itSecOld != m_lstSeeAlso.end(); itSecOld++) { subsecOld = *itSecOld; res = Reader::StrColl( subsecOld->m_strTitle.c_str(), subsec->m_strTitle.c_str() ); if(res == 0) { // // Same title, merge the entries. // Section::EntryIterConst itEntry; for(itEntry = subsec->m_lstEntries.begin(); itEntry != subsec->m_lstEntries.end(); itEntry++) { subsecOld->MergeURLs( *itEntry ); } fInsert = false; break; } if(res > 0) { // // Old > New, insert New before Old. // break; } } if(fInsert) { if((subsecOld = new Section())) { // // Copy everything, except "see also" list. // *subsecOld = *subsec; subsecOld->m_lstSeeAlso.clear(); m_lstSeeAlso.insert( itSecOld, subsecOld ); } } } } void HHK::Section::CleanEntries( EntryList& lstEntries ) { Section::EntryIterConst itEntry; for(itEntry = lstEntries.begin(); itEntry != lstEntries.end(); ) { const Entry& entry = *itEntry; if(entry.m_strTitle.length() == 0 || entry.m_lstUrl.size() == 0 ) { lstEntries.erase( itEntry ); itEntry = lstEntries.begin(); } else { itEntry++; } } } ///////////////////////////////////////////////////////////////////////////// LPCSTR HHK::Reader::StrChr( LPCSTR szString, CHAR cSearch ) { if(s_fDBCSSystem) { CHAR c; while((c = *szString)) { while(::IsDBCSLeadByte( c )) { szString++; if( *szString++ == 0) return NULL; if((c = *szString ) == 0) return NULL; } if(c == cSearch) return szString; szString++; } return NULL; } return ::strchr( szString, cSearch ); } LPCSTR HHK::Reader::StriStr( LPCSTR szString, LPCSTR szSearch ) { if(!szString || !szSearch) return NULL; LPCSTR szCur = szString; CHAR ch = (int)tolower(*szSearch); int cb = strlen ( szSearch); for(;;) { while(tolower(*szCur) != ch && *szCur) { szCur = s_fDBCSSystem ? ::CharNextA( szCur ) : szCur + 1; } if(!*szCur) return NULL; if(::CompareStringA( s_lcidSystem, NORM_IGNORECASE, szCur, cb, szSearch, cb ) == 2) return szCur; szCur = s_fDBCSSystem ? ::CharNextA( szCur ) : szCur + 1; } } int HHK::Reader::StrColl( LPCSTR szLeft, LPCSTR szRight ) { DWORD dwLeftLen = strlen(szLeft) + 2; DWORD dwRightLen = strlen(szRight) + 2; LPSTR szLeftCopy = (LPSTR)_alloca( dwLeftLen ); LPSTR szRightCopy = (LPSTR)_alloca( dwRightLen ); ReplaceEscapes( szLeft , szLeftCopy , dwLeftLen ); ReplaceEscapes( szRight, szRightCopy , dwRightLen ); switch(::CompareStringA( s_lcidSystem, NORM_IGNORECASE, szLeftCopy, -1, szRightCopy, -1 )) { case CSTR_LESS_THAN : return -1; case CSTR_EQUAL : return 0; case CSTR_GREATER_THAN: return 1; } return _stricmp( szLeftCopy, szRightCopy ); } LPCSTR HHK::Reader::ComparePrefix( LPCSTR szString, LPCSTR szPrefix ) { int cb = strlen( szPrefix ); if(_strnicoll( szString, szPrefix, cb ) == 0) return &szString[cb]; return NULL; } ///////////////////////////////////////////////////////////////////////////// HHK::Reader::Reader() { // CComPtr m_stream; // CHAR m_rgBuf[HHK_BUF_SIZE]; m_szBuf_Pos = NULL; // LPSTR m_szBuf_Pos; m_szBuf_End = NULL; // LPSTR m_szBuf_End; // // MPC::string m_strLine; m_szLine_Pos = NULL; // LPCSTR m_szLine_Pos; m_szLine_End = NULL; // LPCSTR m_szLine_End; m_iLevel = 0; // int m_iLevel; m_fOpeningBraceSeen = false; // bool m_fOpeningBraceSeen; } HHK::Reader::~Reader() { } /* HRESULT HHK::Reader::Init( LPCWSTR szFile ) Initializes the Reader as either a stream coming from a CHM of a plain file. */ HRESULT HHK::Reader::Init( LPCWSTR szFile ) { __HCP_FUNC_ENTRY( "HHK::Reader::Init" ); HRESULT hr; CComBSTR bstrStorageName; CComBSTR bstrFilePath; if(MPC::MSITS::IsCHM( szFile, &bstrStorageName, &bstrFilePath )) { USES_CONVERSION; m_strStorage = "ms-its:"; m_strStorage += OLE2A( SAFEBSTR( bstrStorageName ) ); m_strStorage += "::/"; __MPC_EXIT_IF_METHOD_FAILS(hr, MPC::MSITS::OpenAsStream( bstrStorageName, bstrFilePath, &m_stream )); } else { CComPtr fsStream; __MPC_EXIT_IF_METHOD_FAILS(hr, MPC::CreateInstance( &fsStream )); __MPC_EXIT_IF_METHOD_FAILS(hr, fsStream->InitForRead( szFile )); __MPC_EXIT_IF_METHOD_FAILS(hr, fsStream.QueryInterface( &m_stream )); } hr = S_OK; __HCP_FUNC_CLEANUP; #ifdef DEBUG Local_DumpStream( szFile, m_stream, hr ); #endif __HCP_FUNC_EXIT(hr); } ///////////////////////////////////////////////////////////////////////////// /* bool HHK::Reader::ReadNextBuffer() Reads the Next Input Buffer into m_rgBuf then resets member variable m_szBuf_Pos to point at the beginning of the Buffer and m_szBuf_end to point at the end of the Buffer. Returns: true - If it could read a buffer false - When at End of File (EOF), or on read error condition. */ bool HHK::Reader::ReadNextBuffer() { bool fRes = false; if(m_stream) { HRESULT hr; ULONG cbRead; hr = m_stream->Read( m_rgBuf, sizeof(m_rgBuf)-1, &cbRead ); if(SUCCEEDED(hr) && cbRead) { m_szBuf_Pos = m_rgBuf; m_szBuf_End = &m_rgBuf[cbRead]; m_szBuf_End[0] = 0; // So it's a string... fRes = true; } } return fRes; } /* bool HHK::Reader::GetLine(): Reads the Next Text Line from reader Input Stream returns: true - if it could read information false - if at Enf of File (EOF). */ bool HHK::Reader::GetLine( MPC::wstring* pstrString ) { LPSTR szEnd; LPSTR szMatch1; LPSTR szMatch2; int cb; bool fRes = false; bool fSkip = true; m_strLine.erase(); for(;;) { // // Make sure the buffer has data, otherwise exit. // if(IsEndOfBuffer()) { if(ReadNextBuffer() == false) { // // End of file: return 'true' if we got any text. // if(m_strLine.length()) fRes = true; break; } } // // Skip initial end of lines... // if(fSkip) { if(m_szBuf_Pos[0] == '\r' || m_szBuf_Pos[0] == '\n' ) { m_szBuf_Pos++; continue; } fSkip = false; } szMatch1 = (LPSTR)StrChr( m_szBuf_Pos, '\r' ); szMatch2 = (LPSTR)StrChr( m_szBuf_Pos, '\n' ); if(szMatch1 == NULL || (szMatch2 && szMatch1 > szMatch2)) szMatch1 = szMatch2; // Pick the first to appear, between \r and \n. if(szMatch1 == NULL) { // // End of line not found, save all the buffer. // cb = m_szBuf_End - m_szBuf_Pos; if(cb) fRes = true; m_strLine.append( m_szBuf_Pos, cb ); m_szBuf_Pos = m_szBuf_End; } else { cb = szMatch1 - m_szBuf_Pos; if(cb) fRes = true; m_strLine.append( m_szBuf_Pos, cb ); m_szBuf_Pos = szMatch1; break; } } if(fRes) { m_szLine_Pos = m_strLine.begin(); m_szLine_End = m_strLine.end (); // // Remove trailing spaces. // while(m_szLine_End > m_szLine_Pos && m_szLine_End[-1] == ' ') { --m_szLine_End; } if(m_szLine_End != m_strLine.end()) { ; } } else { m_szLine_Pos = NULL; m_szLine_End = NULL; } if(pstrString) { USES_CONVERSION; *pstrString = A2W( m_strLine.c_str() ); } return fRes; } ///////////////////////////////////////////////////////////////////////////// /* bool HHK::Reader::FirstNonSpace( bool fWrap ) This function sets the current Reader position to the First non space character it finds. If fWrap is set, it can go over End Of Line (EOL) markers. Return Value: It reports back whether there is or not a non space character forward from the current Reader stream position. */ bool HHK::Reader::FirstNonSpace( bool fWrap ) { for(;;) { LPCSTR szMatch; while(IsEndOfLine()) { if(fWrap == false) return false; if(GetLine() == false) return false; } if(s_fDBCSSystem) { if(IsDBCSLeadByte( *m_szLine_Pos )) break; } if(m_szLine_Pos[0] != ' ' && m_szLine_Pos[0] != '\t' ) { break; } m_szLine_Pos++; } return true; } /* HHK::Reader::FindCharacter( CHAR ch, bool fSkip, bool fWrap ): Finds a character within a given Reader Stream. if fWrap is set it goes beyond End of Line characters If fSkip is set it instructs the routine to not only find the character, but also to skip it and return the first non space character. */ bool HHK::Reader::FindCharacter( CHAR ch, bool fSkip, bool fWrap ) { for(;;) { LPCSTR szMatch; while(IsEndOfLine()) { if(fWrap == false) return false; if(GetLine() == false) return false; } szMatch = StrChr( m_szLine_Pos, ch ); if(szMatch) { m_szLine_Pos = szMatch; if(fSkip) m_szLine_Pos++; break; } m_szLine_Pos = m_szLine_End; // Skip the whole line. } return fSkip ? FirstNonSpace( fWrap ) : true; } bool HHK::Reader::FindDblQuote ( bool fSkip, bool fWrap ) { return FindCharacter( '"', fSkip, fWrap ); } bool HHK::Reader::FindOpeningBrace( bool fSkip, bool fWrap ) { return FindCharacter( '<', fSkip, fWrap ); } bool HHK::Reader::FindClosingBrace( bool fSkip, bool fWrap ) { return FindCharacter( '>', fSkip, fWrap ); } ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// // // We need to extract from "". // ///////////////////////////////////////////////////////////////////////////// bool HHK::Reader::GetQuotedString( MPC::string& strString ) { LPCSTR szPos; strString.erase(); // // Skip past beginning quote. // if(FindDblQuote() == false) return false; for(;;) { szPos = m_szLine_Pos; // // Find ending quote of parameter value, but don't skip it. // if(FindDblQuote( false, false )) { strString.append( szPos, m_szLine_Pos - szPos ); break; } else { strString.append( szPos, m_szLine_End - szPos ); if(GetLine() == false) return false; } } // // Skip past ending quote. // return FindDblQuote(); } /* bool HHK::Reader::GetValue( MPC::string& strName, MPC::string& strValue ) We are after '" value="">' ... " portion of the line. returns: true - If syntax is correct and everything was as expected. false - Some unexpected syntactitc error occured. */ bool HHK::Reader::GetValue( MPC::string& strName, MPC::string& strValue ) { LPCSTR szPos; strValue.erase(); if(GetQuotedString( strName ) == false) return false; // // Find parameter value. // for(;;) { while(IsEndOfLine()) { if(GetLine() == false) return false; } szPos = StriStr( m_szLine_Pos, txtValue ); if(szPos) { m_szLine_Pos = szPos + MAXSTRLEN(txtValue); break; } } if(GetQuotedString( strValue ) == false) return false; return FindClosingBrace(); } ///////////////////////////////////////////////////////////////////////////// // // We are after ' from ' type="">' // ///////////////////////////////////////////////////////////////////////////// bool HHK::Reader::GetType( MPC::string& strType ) { LPCSTR szPos; strType.erase(); // // Find type text. // for(;;) { while(IsEndOfLine()) { if(GetLine() == false) return false; } szPos = StriStr( m_szLine_Pos, txtType ); if(szPos) { m_szLine_Pos = szPos + MAXSTRLEN(txtValue); break; } } if(GetQuotedString( strType ) == false) return false; return FindClosingBrace(); } ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// HHK::Section* HHK::Reader::Parse() { MPC::string strName; MPC::string strValue; MPC::string strType; LPCSTR szPos; Section* section = NULL; Section* sectionCurrent = NULL; bool fComplete = false; for(;;) { if(m_fOpeningBraceSeen) { m_fOpeningBraceSeen = false; } else { if(FindOpeningBrace() == false) break; } if((szPos = ComparePrefix( m_szLine_Pos, txtParam ))) { m_szLine_Pos = szPos; if(GetValue( strName, strValue ) == false) break; { if(sectionCurrent) { if(!StrColl( strName.c_str(), txtParamKeyword )) { sectionCurrent->m_strTitle = strValue; } else if(!StrColl( strName.c_str(), txtParamName )) { if(sectionCurrent->m_strTitle.length() == 0) // Title of the section. { sectionCurrent->m_strTitle = strValue; } else // Title of the entry. { Section::EntryIter it = sectionCurrent->m_lstEntries.insert( sectionCurrent->m_lstEntries.end() ); it->m_strTitle = strValue; } } else if(!StrColl( strName.c_str(), txtParamLocal )) // URL of the entry. { Section::EntryIter it; if(sectionCurrent->m_lstEntries.size()) { it = sectionCurrent->m_lstEntries.end(); it--; } else { // // No title for this entry, so let's create it without title... // it = sectionCurrent->m_lstEntries.insert( sectionCurrent->m_lstEntries.end() ); // // If it's the first entry, use the keyword as a title. // if(sectionCurrent->m_lstEntries.size()) { it->m_strTitle = sectionCurrent->m_strTitle; } } if(m_strStorage.length()) { MPC::string strFullUrl( m_strStorage ); LPCSTR szValue = strValue.c_str(); // // If the entry in the HHK is in the form: ::/, drop the last component of the storage base. // if(strValue.find( "::/" ) != strValue.npos) { LPCSTR szStart; LPCSTR szEnd; szStart = strFullUrl.c_str(); szEnd = strrchr( szStart, '\\' ); if(szEnd) { strFullUrl.resize( (szEnd - szStart) + 1 ); } // // Handle the case for "MS-ITS:::/" // szStart = strchr( szValue, ':' ); if(szStart && szStart[1] != ':') szValue = szStart+1; } else if(strValue.find( ":/" ) != strValue.npos) // If it's a full URL (with a protocol), just add the value. { strFullUrl = ""; } strFullUrl += szValue; it->m_lstUrl.push_back( strFullUrl ); } else { it->m_lstUrl.push_back( strValue ); } } else if(!StrColl( strName.c_str(), txtParamSeeAlso )) // See Also { if(sectionCurrent) { sectionCurrent->m_strSeeAlso = strValue; } } } } } else if((szPos = ComparePrefix( m_szLine_Pos, txtBeginList ))) { m_szLine_Pos = szPos; m_iLevel++; if(FirstNonSpace() == false) break; } else if((szPos = ComparePrefix( m_szLine_Pos, txtEndList ))) { m_szLine_Pos = szPos; m_iLevel--; if(FirstNonSpace() == false) break; } else if((szPos = ComparePrefix( m_szLine_Pos, txtBeginListItem ))) { if(section) { if(m_iLevel == 1) { // // Ok, the node is really closed. // // Since we have already read the opening brace for the NEXT node, set the flag. // m_fOpeningBraceSeen = true; return section; } } m_szLine_Pos = szPos; if(FindClosingBrace() == false) break; if(FindOpeningBrace() == false) break; if((szPos = ComparePrefix( m_szLine_Pos, txtBeginObject ))) { m_szLine_Pos = szPos; if(GetType( strType ) == false) break; //////////////////// New Node //////////////////// if(!StrColl( strType.c_str(), txtSiteMapObject )) { if(m_iLevel == 1) { section = new Section(); if(section == NULL) break; sectionCurrent = section; } else if(section) { sectionCurrent = new Section(); if(sectionCurrent == NULL) break; section->m_lstSeeAlso.push_back( sectionCurrent ); } fComplete = false; // Start of a section/subsection. } } } else if((szPos = ComparePrefix( m_szLine_Pos, txtEndObject ))) { m_szLine_Pos = szPos; //////////////////// End Node //////////////////// if(m_iLevel == 1) // Normal section { // // Ok, node complete, but it's possible to have a
      subnode, so wait before exiting. // } else if(m_iLevel == 2) // See Also section { sectionCurrent = section; } fComplete = true; // End of a subsection. } } if(section) { // // End of File, but a section has already been parsed, so return it. // if(fComplete) return section; delete section; } return NULL; } ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// HHK::Writer::Writer() { // CComPtr m_stream; // CHAR m_rgBuf[HHK_BUF_SIZE]; m_szBuf_Pos = m_rgBuf; // LPSTR m_szBuf_Pos; } HHK::Writer::~Writer() { Close(); } HRESULT HHK::Writer::Init( LPCWSTR szFile ) { __HCP_FUNC_ENTRY( "HHK::Writer::Init" ); HRESULT hr; __MPC_EXIT_IF_METHOD_FAILS(hr, MPC::CreateInstance( &m_stream )); __MPC_EXIT_IF_METHOD_FAILS(hr, m_stream->InitForWrite( szFile )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtHeader )); hr = S_OK; __HCP_FUNC_CLEANUP; __HCP_FUNC_EXIT(hr); } HRESULT HHK::Writer::Close() { __HCP_FUNC_ENTRY( "HHK::Writer::Close" ); HRESULT hr; if(m_stream) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtTail )); __MPC_EXIT_IF_METHOD_FAILS(hr, FlushBuffer()); __MPC_EXIT_IF_METHOD_FAILS(hr, m_stream->Close()); } hr = S_OK; __HCP_FUNC_CLEANUP; __HCP_FUNC_EXIT(hr); } ///////////////////////////////////////////////////////////////////////////// HRESULT HHK::Writer::FlushBuffer() { HRESULT hr; ULONG cbWrite = (m_szBuf_Pos - m_rgBuf); ULONG cbWrote; if(m_stream) { if(cbWrite) { hr = m_stream->Write( m_szBuf_Pos = m_rgBuf, cbWrite, &cbWrote ); } else { hr = S_OK; } } else { hr = E_FAIL; } return hr; } HRESULT HHK::Writer::OutputLine( LPCSTR szLine ) { HRESULT hr = S_OK; if(szLine) { size_t iLen = strlen( szLine ); while(iLen) { size_t iCopy = min( iLen, Available() ); ::CopyMemory( m_szBuf_Pos, szLine, iCopy ); m_szBuf_Pos += iCopy; szLine += iCopy; iLen -= iCopy; if(iLen) { if(FAILED(hr = FlushBuffer())) break; } } } return hr; } HRESULT HHK::Writer::OutputSection( Section* sec ) { __HCP_FUNC_ENTRY( "HHK::Writer::OutputSection" ); HRESULT hr; Section::SectionIterConst itSec; Section* subsec; Section::EntryIterConst itEntry; Entry::UrlIterConst itUrl; // BUG 135252 - Help Center Content: Broken link from Adapters topic on Help Index // This is a UA specific tweak. The condition is present ONLY on single entry // Keyword links coming from the DB, which means that their URI does not point // inside a .CHM. if(sec->m_lstEntries.size() != 0 && sec->m_lstSeeAlso.size() != 0 ) { Section Sec1; for(itEntry = sec->m_lstEntries.begin(); itEntry != sec->m_lstEntries.end(); itEntry++) { itUrl = itEntry->m_lstUrl.begin(); if(itUrl != itEntry->m_lstUrl.end()) { Section* SubSec1; __MPC_EXIT_IF_ALLOC_FAILS(hr, SubSec1, new Section); SubSec1->m_strTitle = itEntry->m_strTitle; SubSec1->m_lstEntries.push_back( *itEntry ); Sec1.m_lstSeeAlso.push_back( SubSec1 ); } } sec->MergeSeeAlso( Sec1 ); sec->m_lstEntries.clear(); } // END Fix BUG 135252 __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewSection_Open )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Name )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( sec->m_strTitle.c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); sec->CleanEntries( sec->m_lstEntries ); for(itEntry = sec->m_lstEntries.begin(); itEntry != sec->m_lstEntries.end(); itEntry++) { const Entry& entry = *itEntry; if(entry.m_strTitle.length()) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Name )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( entry.m_strTitle.c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } for(itUrl = entry.m_lstUrl.begin(); itUrl != entry.m_lstUrl.end(); itUrl++) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Local )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( itUrl->c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } } if ( sec->m_lstEntries.size() == 0 ) { if(sec->m_strSeeAlso.length()) { if (sec->m_strSeeAlso == sec->m_strTitle) { // Bug 278906: If this is a first level index entry, with no associated // topic, then the See Also will be the same as the Title. Replace the // See Also by a pointer to an HTM that asks the user to click on a // lower level index entry. __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Local )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndexFirstLevel )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } else { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_SeeAlso )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( sec->m_strSeeAlso.c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } } } __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewSection_Close )); //////////////////// if(sec->m_lstSeeAlso.size()) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewSubSection_Open )); for(itSec = sec->m_lstSeeAlso.begin(); itSec != sec->m_lstSeeAlso.end(); itSec++) { subsec = *itSec; subsec->CleanEntries( subsec->m_lstEntries ); if(subsec->m_strSeeAlso.length() == 0 && subsec->m_lstEntries.size() == 0 ) continue; __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndent )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewSection_Open )); if(subsec->m_strTitle.length()) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndent )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Name )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( subsec->m_strTitle.c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } if(subsec->m_strSeeAlso.length()) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndent )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_SeeAlso )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( subsec->m_strSeeAlso.c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } else { for(itEntry = subsec->m_lstEntries.begin(); itEntry != subsec->m_lstEntries.end(); itEntry++) { const Entry& entry = *itEntry; if(entry.m_strTitle.length() == 0 || entry.m_lstUrl.size() == 0 ) continue; if(entry.m_strTitle.length()) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndent )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Name )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( entry.m_strTitle.c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } for(itUrl = entry.m_lstUrl.begin(); itUrl != entry.m_lstUrl.end(); itUrl++) { __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndent )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Local )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( itUrl->c_str() )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewParam_Close )); } } } __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtIndent )); __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewSection_Close )); } __MPC_EXIT_IF_METHOD_FAILS(hr, OutputLine( txtNewSubSection_Close )); } hr = S_OK; __HCP_FUNC_CLEANUP; __HCP_FUNC_EXIT(hr); } ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// HHK::Merger::Entity::Entity() { m_Section = NULL; // Section* m_Section; } HHK::Merger::Entity::~Entity() { if(m_Section) { delete m_Section; } } void HHK::Merger::Entity::SetSection( HHK::Section* sec ) { if(m_Section) { delete m_Section; } m_Section = sec; } HHK::Section* HHK::Merger::Entity::GetSection() { return m_Section; } HHK::Section* HHK::Merger::Entity::Detach() { HHK::Section* sec = m_Section; m_Section = NULL; return sec; } ///////////////////////////////////////////////////////////////////////////// HHK::Merger::FileEntity::FileEntity( LPCWSTR szFile ) { m_strFile = szFile; // MPC::wstring m_strFile; // Reader m_Input; } HHK::Merger::FileEntity::~FileEntity() { SetSection( NULL ); } HRESULT HHK::Merger::FileEntity::Init() { return m_Input.Init( m_strFile.c_str() ); } bool HHK::Merger::FileEntity::MoveNext() { HHK::Section* sec = m_Input.Parse(); SetSection( sec ); return sec != NULL; } long HHK::Merger::FileEntity::Size() const { return 0; } ///////////////////////////////////////////////////////////////////////////// bool HHK::Merger::DbEntity::CompareMatches::operator()( /*[in]*/ const HHK::Merger::DbEntity::match* left , /*[in]*/ const HHK::Merger::DbEntity::match* right ) const { int res = Reader::StrColl( left->strKeyword.c_str(), right->strKeyword.c_str() ); if(res == 0) { res = Reader::StrColl( left->strTitle.c_str(), right->strTitle.c_str() ); } return (res < 0); } HHK::Merger::DbEntity::DbEntity( /*[in]*/ Taxonomy::Updater& updater, /*[in]*/ Taxonomy::WordSet& setCHM ) : m_updater( updater ) { // Section::SectionList m_lst; // Taxonomy::Updater& m_updater; m_setCHM = setCHM; // Taxonomy::WordSet m_setCHM; } HHK::Merger::DbEntity::~DbEntity() { MPC::CallDestructorForAll( m_lst ); } HRESULT HHK::Merger::DbEntity::Init() { __HCP_FUNC_ENTRY( "HHK::Merger::DbEntity::Init" ); HRESULT hr; MatchList lstMatches; MatchIter itMatches; KeywordMap mapKeywords; KeywordIterConst itKeywords; TopicMap mapTopics; SortMap mapSorted; SortIter itSorted; bool fFound; // // Load all the matches. // { Taxonomy::RS_Matches* rs; __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetMatches( &rs )); __MPC_EXIT_IF_METHOD_FAILS(hr, rs->Move( 0, JET_MoveFirst, &fFound )); while(fFound) { if(rs->m_fHHK) { itMatches = lstMatches.insert( lstMatches.end() ); itMatches->ID_keyword = rs->m_ID_keyword; itMatches->ID_topic = rs->m_ID_topic; mapTopics[rs->m_ID_topic] = &(*itMatches); } __MPC_EXIT_IF_METHOD_FAILS(hr, rs->Move( 0, JET_MoveNext, &fFound )); } } // // Load all the keywords. // { Taxonomy::RS_Keywords* rs; __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetKeywords( &rs )); __MPC_EXIT_IF_METHOD_FAILS(hr, rs->Move( 0, JET_MoveFirst, &fFound )); while(fFound) { MPC::string strKeyword; ConvertToAnsi( strKeyword, rs->m_strKeyword ); mapKeywords[rs->m_ID_keyword] = strKeyword; __MPC_EXIT_IF_METHOD_FAILS(hr, rs->Move( 0, JET_MoveNext, &fFound )); } } // // Lookup the keyword's strings. // { match* lastKeyword = NULL; for(itMatches = lstMatches.begin(); itMatches != lstMatches.end(); itMatches++) { if(lastKeyword && lastKeyword->ID_keyword == itMatches->ID_keyword) { itMatches->strKeyword = lastKeyword->strKeyword; } else { itKeywords = mapKeywords.find( itMatches->ID_keyword ); if(itKeywords == mapKeywords.end()) { ; // This should NOT happen... } else { itMatches->strKeyword = itKeywords->second; } lastKeyword = &(*itMatches); } } } // // Lookup topics. // { Taxonomy::RS_Topics* rs; __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.GetTopics( &rs )); __MPC_EXIT_IF_METHOD_FAILS(hr, rs->Move( 0, JET_MoveFirst, &fFound )); while(fFound) { match* elem; if(rs->m_fValid__URI && (elem = mapTopics[rs->m_ID_topic])) { bool fSkip = false; // // If the link points to a CHM and it's one of those already merged, skip the topic. // { CComBSTR bstrStorageName; if(MPC::MSITS::IsCHM( rs->m_strURI.c_str(), &bstrStorageName ) && bstrStorageName) { LPCWSTR szEnd = wcsrchr( bstrStorageName, '\\' ); if(szEnd && m_setCHM.count( MPC::wstring( szEnd+1 ) ) > 0) { fSkip = true; } } } if(fSkip == false) { __MPC_EXIT_IF_METHOD_FAILS(hr, m_updater.ExpandURL( rs->m_strURI )); ConvertToAnsi( elem->strTitle, rs->m_strTitle ); ConvertToAnsi( elem->strURI , rs->m_strURI ); } } __MPC_EXIT_IF_METHOD_FAILS(hr, rs->Move( 0, JET_MoveNext, &fFound )); } } // // Sort topics. // { for(itMatches = lstMatches.begin(); itMatches != lstMatches.end(); itMatches++) { match* elem = &(*itMatches); // // We keep a one-to-one association between ID_topic and match, in order to resolve the Title/URI of a keyword. // However, there can be multiple keywords pointing to the same topic. // So we need to copy title/URI from the element in the one-to-one association to all the others. // if(elem->strTitle.length() == 0 || elem->strURI .length() == 0 ) { match* elem2 = mapTopics[elem->ID_topic]; if(elem2 && elem2 != elem) { elem->strTitle = elem2->strTitle; elem->strURI = elem2->strURI ; } } if(elem->strKeyword.length() == 0) continue; if(elem->strTitle .length() == 0) continue; if(elem->strURI .length() == 0) continue; mapSorted[elem] = elem; } } // // Generate sections. // { HHK::Section* sec = NULL; HHK::Section* secsub = NULL; HHK::Section::EntryIter it; MPC::string strBuffer; for(itSorted = mapSorted.begin(); itSorted != mapSorted.end(); itSorted++) { match* elem = itSorted->first; // // Escape all the values, before generating the sections. // ReplaceCharactersWithEntity( elem->strKeyword, strBuffer ); ReplaceCharactersWithEntity( elem->strTitle , strBuffer ); ReplaceCharactersWithEntity( elem->strURI , strBuffer ); if(sec == NULL || sec->m_strTitle != elem->strKeyword) { if(sec) { m_lst.push_back( sec ); } __MPC_EXIT_IF_ALLOC_FAILS(hr, sec, new HHK::Section()); secsub = NULL; it = sec->m_lstEntries.insert( sec->m_lstEntries.end() ); sec->m_strTitle = elem->strKeyword; it->m_strTitle = elem->strTitle; it->m_lstUrl.push_back( elem->strURI ); } else { if(secsub == NULL) { secsub = sec; __MPC_EXIT_IF_ALLOC_FAILS(hr, sec, new HHK::Section()); sec->m_lstSeeAlso.push_back( secsub ); sec->m_strTitle = elem->strKeyword; sec->m_strSeeAlso = elem->strKeyword; secsub->m_strTitle = it->m_strTitle; } if(secsub->m_strTitle != elem->strTitle) { __MPC_EXIT_IF_ALLOC_FAILS(hr, secsub, new HHK::Section()); sec->m_lstSeeAlso.push_back( secsub ); secsub->m_strTitle = elem->strTitle; it = secsub->m_lstEntries.insert( secsub->m_lstEntries.end() ); it->m_strTitle = elem->strTitle; } it->m_lstUrl.push_back( elem->strURI ); } } if(sec) { m_lst.push_back( sec ); } } hr = S_OK; __HCP_FUNC_CLEANUP; __HCP_FUNC_EXIT(hr); } bool HHK::Merger::DbEntity::MoveNext() { Section::SectionIterConst it = m_lst.begin(); if(it != m_lst.end()) { SetSection( *it ); m_lst.erase( it ); return true; } else { SetSection( NULL ); return false; } } long HHK::Merger::DbEntity::Size() const { return m_lst.size(); } ///////////////////////////////////////////////////////////////////////////// class Compare { public: bool operator()( /*[in]*/ HHK::Section* const &left, /*[in]*/ HHK::Section* const &right ) const { return HHK::Reader::StrColl( left->m_strTitle.c_str(), right->m_strTitle.c_str() ) < 0; } }; HHK::Merger::SortingFileEntity::SortingFileEntity( LPCWSTR szFile ) : m_in( szFile ) { // Section::SectionList m_lst; // FileEntity m_in; } HHK::Merger::SortingFileEntity::~SortingFileEntity() { MPC::CallDestructorForAll( m_lst ); } HRESULT HHK::Merger::SortingFileEntity::Init() { HRESULT hr; if(SUCCEEDED(hr = m_in.Init())) { Compare Pr; SectionVec vec; SectionIter itLast; SectionIter it; Section* secLast = NULL; Section* sec; // // Parse the whole file. // while(m_in.MoveNext()) { vec.push_back( m_in.Detach() ); } // // Sort all the sections. // std::sort( vec.begin(), vec.end(), Pr ); // // Walk through the sections, looking for duplicate keywords to merge. // // Each section encountered is not added immediately, but kept in "secLast" for comparison with the next one. // for(it=vec.begin(); it!=vec.end();) { sec = *it; if(secLast) { if(Reader::StrColl( sec->m_strTitle.c_str(), secLast->m_strTitle.c_str() ) == 0) { Section::SectionList lst; // // Collate all the sections with the same keyword in a list and merge them. // lst.push_back( secLast ); while(it != vec.end() && Reader::StrColl( (*it)->m_strTitle.c_str(), secLast->m_strTitle.c_str() ) == 0) { lst.push_back( *it++ ); } sec = Merger::MergeSections( lst ); if(sec == NULL) { hr = E_OUTOFMEMORY; break; } // // Queue the merged section and loop. // m_lst.push_back( sec ); secLast = NULL; continue; } else { m_lst.push_back( secLast ); *itLast = NULL; } } itLast = it; secLast = sec; it++; } if(secLast) { m_lst.push_back( secLast ); *itLast = NULL; } MPC::CallDestructorForAll( vec ); } return hr; } bool HHK::Merger::SortingFileEntity::MoveNext() { Section::SectionIterConst it = m_lst.begin(); if(it != m_lst.end()) { SetSection( *it ); m_lst.erase( it ); return true; } else { SetSection( NULL ); return false; } } long HHK::Merger::SortingFileEntity::Size() const { return m_lst.size(); } ///////////////////////////////////////////////////////////////////////////// HHK::Merger::Merger() { // EntityList m_lst; // EntityList m_lstSelected; m_SectionTemp = NULL; // Section* m_SectionTemp; } HHK::Merger::~Merger() { MPC::CallDestructorForAll( m_lst ); if(m_SectionTemp) { delete m_SectionTemp; } } HRESULT HHK::Merger::AddFile( Entity* ent, bool fIgnoreMissing ) { HRESULT hr; if(ent == NULL) { return E_OUTOFMEMORY; } if(SUCCEEDED(hr = ent->Init())) { m_lst.push_back( ent ); } else { delete ent; if(fIgnoreMissing) { hr = S_OK; } } return hr; } HHK::Section* HHK::Merger::MergeSections( Section::SectionList& lst ) { HHK::Section* secMerged = new HHK::Section(); if(secMerged) { if(lst.size()) { HHK::Section* sec = *(lst.begin()); secMerged->m_strTitle = sec->m_strTitle; } // // Move than one section with the same title, need to merge... // for(HHK::Section::SectionIter it=lst.begin(); it!=lst.end(); it++) { HHK::Section* sec = *it; for(HHK::Section::EntryIterConst itEntry = sec->m_lstEntries.begin(); itEntry != sec->m_lstEntries.end(); itEntry++) { secMerged->MergeURLs( *itEntry ); } if(sec ->m_strSeeAlso.length() > 0 && secMerged->m_strSeeAlso.length() == 0 ) { secMerged->m_strSeeAlso = sec->m_strSeeAlso; } //////////////////// secMerged->MergeSeeAlso( *sec ); } } return secMerged; } bool HHK::Merger::MoveNext() { HHK::Section* secLowest = NULL; EntityIterConst it; EntityIterConst it2; if(m_SectionTemp) { delete m_SectionTemp; m_SectionTemp = NULL; } // // If this is the first round ever, select all the entities. // if(m_lstSelected.size() == 0) { m_lstSelected = m_lst; } // // First of all, advance all the entity selected in the previous round. // for(it=m_lstSelected.begin(); it!=m_lstSelected.end(); it++) { Entity* ent = *it; if(ent->MoveNext() == false) { // // End of File for this entity, remove it from the system. // delete ent; it2 = std::find( m_lst.begin(), m_lst.end(), ent ); if(it2 != m_lst.end()) { m_lst.erase( it2 ); } continue; } } m_lstSelected.clear(); // // No more entities, abort. // if(m_lst.size() == 0) return false; // // Select a section with the lowest title. // for(it=m_lst.begin(); it!=m_lst.end(); it++) { Entity* ent = *it; HHK::Section* sec = ent->GetSection(); if(secLowest == NULL || Reader::StrColl( sec->m_strTitle.c_str(), secLowest->m_strTitle.c_str() ) < 0) { secLowest = sec; } } // // Find all the sections with the lowest title. // for(it=m_lst.begin(); it!=m_lst.end(); it++) { Entity* ent = *it; HHK::Section* sec = ent->GetSection(); if(Reader::StrColl( sec->m_strTitle.c_str(), secLowest->m_strTitle.c_str() ) == 0) { m_lstSelected.push_back( ent ); } } if(m_lstSelected.size() > 1) { Section::SectionList lst; for(it=m_lstSelected.begin(); it!=m_lstSelected.end(); it++) { HHK::Section* sec = (*it)->GetSection(); lst.push_back( sec ); } m_SectionTemp = MergeSections( lst ); } return true; } HHK::Section* HHK::Merger::GetSection() { if(m_SectionTemp) { return m_SectionTemp; } if(m_lstSelected.size()) { return (*m_lstSelected.begin())->GetSection(); } return NULL; } long HHK::Merger::Size() const { long lTotal = 0; EntityIterConst it; for(it=m_lst.begin(); it!=m_lst.end(); it++) { lTotal += (*it)->Size(); } return lTotal; } HRESULT HHK::Merger::PrepareMergedHhk( Writer& writer , Taxonomy::Updater& updater , Taxonomy::WordSet& setCHM , MPC::WStringList& lst , LPCWSTR szOutputHHK ) { __HCP_FUNC_ENTRY( "HHK::Merger::PrepareMergedHhk" ); HRESULT hr; MPC::WStringIter it; // // Enumerate all the HHKs to merge. // for(it=lst.begin(); it!=lst.end(); it++) { __MPC_EXIT_IF_METHOD_FAILS(hr, AddFile( new SortingFileEntity( it->c_str() ) )); } //// Keyword are not merged in the HHK. //// __MPC_EXIT_IF_METHOD_FAILS(hr, AddFile( new DbEntity( updater, setCHM ) )); __MPC_EXIT_IF_METHOD_FAILS(hr, MPC::MakeDir( szOutputHHK )); __MPC_EXIT_IF_METHOD_FAILS(hr, writer.Init( szOutputHHK )); hr = S_OK; __HCP_FUNC_CLEANUP; __HCP_FUNC_EXIT(hr); } HRESULT HHK::Merger::PrepareSortingOfHhk( HHK::Writer& writer , LPCWSTR szInputHHK , LPCWSTR szOutputHHK ) { __HCP_FUNC_ENTRY( "HHK::Merger::PrepareSortingOfHhk" ); HRESULT hr; __MPC_EXIT_IF_METHOD_FAILS(hr, AddFile( new SortingFileEntity( szInputHHK ) )); __MPC_EXIT_IF_METHOD_FAILS(hr, writer.Init( szOutputHHK )); hr = S_OK; __HCP_FUNC_CLEANUP; __HCP_FUNC_EXIT(hr); }