windows-server-2003/shell/ext/webcheck/webcrawl.cpp


								// TODO: Allow trident to download frames (and process new html)

								// nuke urlmon code (use trident always)


								#include "private.h"

								#include "shui.h"

								#include "downld.h"

								#include "subsmgrp.h"

								#include <ocidl.h>


								#include <initguid.h>


								#include <mluisupp.h>


								extern HICON g_webCrawlerIcon;

								extern HICON g_channelIcon;

								extern HICON g_desktopIcon;


								void LoadDefaultIcons();


								#undef TF_THISMODULE

								#define TF_THISMODULE   TF_WEBCRAWL


								#define _ERROR_REPROCESSING -1


								// DWORD field of the m_pPages string list

								const DWORD DATA_RECURSEMASK = 0x000000FF;  // Levels of recursion from this page

								const DWORD DATA_DLSTARTED =   0x80000000;  // Have we started downloading

								const DWORD DATA_DLFINISHED =  0x40000000;  // Have we finished this page

								const DWORD DATA_DLERROR =     0x20000000;  // An error during download

								const DWORD DATA_CODEBASE =    0x10000000;  // Is codebase

								const DWORD DATA_LINK =        0x08000000;  // Is link from page (not dependency)


								// DWORD field of m_pPendingLinks string list

								const DWORD DATA_ROBOTSTXTMASK=0x00000FFF;  // index into m_pRobotsTxt list


								// used internally; not actually stored in string list field

								const DWORD DATA_ROBOTSTXT =   0x01000000;  // Is robots.txt


								// m_pDependencyLinks uses m_pPages values


								// DWORD field of m_pRobotsTxt is NULL or (CWCDwordStringList *)


								// DWORD field of m_pRobotsTxt referenced string list

								const DWORD DATA_ALLOW =        0x80000000;

								const DWORD DATA_DISALLOW =     0x40000000;


								const WCHAR c_wszRobotsMetaName[] = L"Robots\n";

								const int c_iRobotsMetaNameLen = 7;        // string len without nullterm


								const WCHAR c_wszRobotsNoFollow[] = L"NoFollow";

								const int c_iRobotsNoFollow = 8;


								const WCHAR c_wszRobotsTxtURL[] = L"/robots.txt";


								const DWORD MAX_ROBOTS_SIZE = 8192;         // Max size of robots.txt file


								// tokens for parsing of robots.txt

								const CHAR  c_szRobots_UserAgent[] = "User-Agent:";

								const CHAR  c_szRobots_OurUserAgent[] = "MSIECrawler";

								const CHAR  c_szRobots_Allow[] = "Allow:";

								const CHAR  c_szRobots_Disallow[] = "Disallow:";


								// This GUID comes from Trident and is a hack for getting PARAM values for APPLET tags.

								DEFINE_GUID(CGID_JavaParambagCompatHack, 0x3050F405, 0x98B5, 0x11CF, 0xBB, 0x82, 0x00, 0xAA, 0x00, 0xBD, 0xCE, 0x0B);


								// This GUID is helpfully not defined elsewhere.

								DEFINE_GUID(CLSID_JavaVM, 0x08B0E5C0, 0x4FCB, 0x11CF, 0xAA, 0xA5, 0x00, 0x40, 0x1C, 0x60, 0x85, 0x01);


								// Get host channel agent's subscription item, if any.

								inline HRESULT CWebCrawler::GetChannelItem(ISubscriptionItem **ppChannelItem)

								{

								    IServiceProvider *pSP;

								    HRESULT hr = E_NOINTERFACE;


								    if (ppChannelItem)

								        *ppChannelItem = NULL;


								    if (SUCCEEDED(m_pAgentEvents->QueryInterface(IID_IServiceProvider, (void **)&pSP)) && pSP)

								    {

								        ISubscriptionItem *pTempChannelItem = NULL;

								        pSP->QueryService(CLSID_ChannelAgent, IID_ISubscriptionItem, (void **)&pTempChannelItem);

								        pSP->Release();


								        if(pTempChannelItem)

								            hr = S_OK;


								        if(ppChannelItem)

								            *ppChannelItem = pTempChannelItem;

								        else

								        {

								            if(pTempChannelItem)

								                pTempChannelItem->Release();

								        }

								    }


								    return hr;

								}


								//////////////////////////////////////////////////////////////////////////

								//

								// Helper functions - copied over from urlmon\download\helpers.cxx - Is there

								// an equivalent routine or better place for this, webcrawl.cpp?

								//

								//////////////////////////////////////////////////////////////////////////


								// ---------------------------------------------------------------------------

								// %%Function: GetVersionFromString

								//

								//    converts version in text format (a,b,c,d) into two dwords (a,b), (c,d)

								//    The printed version number is of format a.b.d (but, we don't care)

								// ---------------------------------------------------------------------------

								HRESULT

								GetVersionFromString(const char *szBuf, LPDWORD pdwFileVersionMS, LPDWORD pdwFileVersionLS)

								{

								    const char *pch = szBuf;

								    char ch;


								    *pdwFileVersionMS = 0;

								    *pdwFileVersionLS = 0;


								    if (!pch)            // default to zero if none provided

								        return S_OK;


								    if (StrCmpA(pch, "-1,-1,-1,-1") == 0) {

								        *pdwFileVersionMS = 0xffffffff;

								        *pdwFileVersionLS = 0xffffffff;

								    }


								    USHORT n = 0;


								    USHORT a = 0;

								    USHORT b = 0;

								    USHORT c = 0;

								    USHORT d = 0;


								    enum HAVE { HAVE_NONE, HAVE_A, HAVE_B, HAVE_C, HAVE_D } have = HAVE_NONE;


								    for (ch = *pch++;;ch = *pch++) {


								        if ((ch == ',') || (ch == '\0')) {


								            switch (have) {


								            case HAVE_NONE:

								                a = n;

								                have = HAVE_A;

								                break;


								            case HAVE_A:

								                b = n;

								                have = HAVE_B;

								                break;


								            case HAVE_B:

								                c = n;

								                have = HAVE_C;

								                break;


								            case HAVE_C:

								                d = n;

								                have = HAVE_D;

								                break;


								            case HAVE_D:

								                return E_INVALIDARG; // invalid arg

								            }


								            if (ch == '\0') {

								                // all done convert a,b,c,d into two dwords of version


								                *pdwFileVersionMS = ((a << 16)|b);

								                *pdwFileVersionLS = ((c << 16)|d);


								                return S_OK;

								            }


								            n = 0; // reset


								        } else if ( (ch < '0') || (ch > '9'))

								            return E_INVALIDARG;    // invalid arg

								        else

								            n = n*10 + (ch - '0');


								    } /* end forever */


								    // NEVERREACHED

								}


								/////////////////////////////////////////////////////////////////////////////////////////

								// CombineBaseAndRelativeURLs -

								//         Three URLs are combined by following rules (this is used for finding the URL

								//         to load Applet CABs from.)  Three inputs, the Base URL, the Code Base URL

								//         and the file name URL.

								//

								//         If file name URL is absolute return it.

								//         Otherwise if CodeBase URL is absolute combine it with filename and return.

								//         Otherwise if Base URL is absolute, combine CodeBase and fileName URL, then

								//            combine with Base URL and return it.

								////////////////////////////////////////////////////////////////////////////////////////


								HRESULT CombineBaseAndRelativeURLs(LPCWSTR szBaseURL, LPCWSTR szRelative1, LPWSTR *szRelative2)

								{


								    WCHAR wszTemp[INTERNET_MAX_URL_LENGTH];

								    DWORD dwLen = ARRAYSIZE(wszTemp);


								    ASSERT(szRelative2);                // should never happen.

								    if (szRelative2 == NULL)

								        return E_FAIL;


								    if (IsValidURL(NULL, *szRelative2, 0) == S_OK)

								        return S_OK;


								    if (szRelative1 && (IsValidURL(NULL, szRelative1, 0) == S_OK))

								    {


								        if (SUCCEEDED(UrlCombineW((LPCWSTR)szRelative1, (LPCWSTR)*szRelative2, (LPWSTR)wszTemp, &dwLen, 0)))

								        {

								            BSTR bstrNew = SysAllocString(wszTemp);

								            if (bstrNew)

								            {

								                SAFEFREEBSTR(*szRelative2);

								                *szRelative2 = bstrNew;

								                return S_OK;

								            }

								        }

								    }


								    if (szBaseURL && (IsValidURL(NULL, szBaseURL, 0) == S_OK))

								    {

								        LPWSTR szNewRel = NULL;

								        WCHAR wszCombined[INTERNET_MAX_URL_LENGTH];


								        if (szRelative1)

								        {

								            // NOTE: lstr[cpy|cat]W are macroed to work on Win95.

								            DWORD dwLen2 = lstrlenW(*szRelative2);

								            StrCpyNW(wszTemp, szRelative1, ARRAYSIZE(wszTemp) - 1); //paranoia

								            DWORD dwTempLen = lstrlenW(wszTemp);

								            if ((dwLen2 > 0) && ((*szRelative2)[dwLen2-1] == (unsigned short)L'\\') ||

								                                ((*szRelative2)[dwLen2-1] == (unsigned short) L'/'))

								            {

								                StrNCatW(wszTemp, *szRelative2, ARRAYSIZE(wszTemp) - dwTempLen);

								            }

								            else

								            {

								                StrNCatW(wszTemp, L"/", ARRAYSIZE(wszTemp) - dwTempLen);

								                StrNCatW(wszTemp, *szRelative2, ARRAYSIZE(wszTemp) - dwTempLen - 1);

								            }


								            szNewRel = wszTemp;

								        }

								        else

								        {

								            szNewRel = *szRelative2;

								        }


								        dwLen = INTERNET_MAX_URL_LENGTH;

								        if (SUCCEEDED(UrlCombineW((LPCWSTR)szBaseURL, (LPCWSTR)szNewRel, (LPWSTR)wszCombined, &dwLen, 0)))

								        {

								            BSTR bstrNew = SysAllocString(wszCombined);

								            if (bstrNew)

								            {

								                SAFEFREEBSTR(*szRelative2);

								                *szRelative2 = bstrNew;

								                return S_OK;

								            }

								        }

								    }


								    // In all likelyhood one of the URL's in bad and nothing good can be done.

								    return E_FAIL;

								}


								//////////////////////////////////////////////////////////////////////////

								//

								// CWebCrawler implementation

								//

								//////////////////////////////////////////////////////////////////////////


								//

								// CWebCrawler Helpers

								//


								HRESULT CWebCrawler::CreateInstance(IUnknown *punkOuter, IUnknown **ppunk)

								{

								    HRESULT hr;


								    ASSERT(NULL == punkOuter);

								    ASSERT(NULL != ppunk);


								    CWebCrawler *pwc = new CWebCrawler;


								    if (NULL != pwc)

								    {

								        hr = pwc->Initialize();


								        if (FAILED(hr))

								        {

								            pwc->Release();

								        }

								    }

								    else

								    {

								        hr = E_OUTOFMEMORY;

								    }


								    if (SUCCEEDED(hr))

								    {

								        *ppunk = (ISubscriptionAgentControl *)pwc;

								    }


								    return hr;

								}


								HRESULT CWebCrawler::Initialize()

								{

								    m_hrCritDependencies = InitializeCriticalSectionAndSpinCount(&m_critDependencies, 0) ? S_OK : E_OUTOFMEMORY;


								    return m_hrCritDependencies;

								}


								CWebCrawler::CWebCrawler()

								{

								    DBG("Creating CWebCrawler object");

								}


								CWebCrawler::~CWebCrawler()

								{

								    _CleanUp();


								    if (SUCCEEDED(m_hrCritDependencies))

								    {

								        DeleteCriticalSection(&m_critDependencies);

								    }

								    DBG("Destroyed CWebCrawler object");

								}


								void CWebCrawler::CleanUp()

								{

								    _CleanUp();

								    CDeliveryAgent::CleanUp();

								}


								void CWebCrawler::_CleanUp()

								{

								    if (m_pCurDownload)

								    {

								        m_pCurDownload->DoneDownloading();

								        m_pCurDownload->Release();

								        m_pCurDownload = NULL;

								    }


								    CRunDeliveryAgent::SafeRelease(m_pRunAgent);


								    SAFEFREEBSTR(m_bstrHostName);

								    SAFEFREEBSTR(m_bstrBaseURL);

								    SAFELOCALFREE(m_pszLocalDest);

								    SAFELOCALFREE(m_pBuf);


								    EnterCriticalSection(&m_critDependencies);

								    SAFEDELETE(m_pDependencies);

								    LeaveCriticalSection(&m_critDependencies);

								    if (m_pDownloadNotify)

								    {

								        m_pDownloadNotify->LeaveMeAlone();

								        m_pDownloadNotify->Release();

								        m_pDownloadNotify=NULL;

								    }


								    SAFEDELETE(m_pPages);

								    SAFEDELETE(m_pPendingLinks);

								    SAFEDELETE(m_pDependencyLinks);

								    SAFERELEASE(m_pUrlIconHelper);


								    FreeRobotsTxt();

								    FreeCodeBaseList();

								}


								// Format of m_pRobotsTxt:

								// Array of hostnames for which we have attempted to get Robots.txt

								// DWORD for each hostname contains pointer to CDwordStringList of Robots.txt data,

								//  or 0 if we couldn't find robots.txt for that host name

								// Robots.txt data stored in form: url, flag = allow or disallow

								void CWebCrawler::FreeRobotsTxt()

								{

								    if (m_pRobotsTxt)

								    {

								        DWORD_PTR dwPtr;

								        int iLen = m_pRobotsTxt->NumStrings();

								        for (int i=0; i<iLen; i++)

								        {

								            dwPtr = m_pRobotsTxt->GetStringData(i);

								            if (dwPtr)

								            {

								                delete ((CWCStringList *)dwPtr);

								                m_pRobotsTxt->SetStringData(i, 0);

								            }

								        }


								        delete m_pRobotsTxt;

								        m_pRobotsTxt = NULL;

								    }

								}


								void CWebCrawler::FreeCodeBaseList()

								{

								    if (m_pCodeBaseList) {

								        CCodeBaseHold *pcbh;

								        int iLen = m_pCodeBaseList->NumStrings();

								        for (int i=0; i<iLen; i++)

								        {

								            pcbh = (CCodeBaseHold *)m_pCodeBaseList->GetStringData(i);

								            if (pcbh != NULL)

								            {

								                SAFEFREEBSTR(pcbh->szDistUnit);

								                SAFEDELETE(pcbh);

								                m_pCodeBaseList->SetStringData(i, 0);

								            }

								        }


								        SAFEDELETE(m_pCodeBaseList);

								    }

								}


								HRESULT CWebCrawler::StartOperation()

								{

								    ISubscriptionItem *pItem = m_pSubscriptionItem;


								    DWORD           dwTemp;


								    ASSERT(pItem);


								    DBG("CWebCrawler in StartOperation");


								    if (m_pCurDownload || GetBusy())

								    {

								        DBG_WARN("Webcrawl busy, returning failure");

								        return E_FAIL;

								    }


								    SAFEFREEBSTR(m_bstrBaseURL);

								    if (FAILED(

								        ReadBSTR(pItem, c_szPropURL, &m_bstrBaseURL)) ||

								        !m_bstrBaseURL ||

								        !CUrlDownload::IsValidURL(m_bstrBaseURL))

								    {

								        DBG_WARN("Couldn't get valid URL, aborting");

								        SetEndStatus(E_INVALIDARG);

								        SendUpdateNone();

								        return E_INVALIDARG;

								    }


								    if (SHRestricted2W(REST_NoSubscriptionContent, NULL, 0))

								        SetAgentFlag(FLAG_CHANGESONLY);


								    if (IsAgentFlagSet(FLAG_CHANGESONLY))

								    {

								        m_dwRecurseLevels = 0;

								        m_dwRecurseFlags = WEBCRAWL_DONT_MAKE_STICKY;

								        DBG("Webcrawler is in 'changes only' mode.");

								    }

								    else

								    {

								/*

								        BSTR bstrLocalDest=NULL;

								        SAFELOCALFREE(m_pszLocalDest);

								        ReadBSTR(c_szPropCrawlLocalDest, &bstrLocalDest);

								        if (bstrLocalDest && bstrLocalDest[0])

								        {

								            int iLen = SysStringByteLen(bstrLocalDest)+1;

								            m_pszLocalDest = (LPTSTR) MemAlloc(LMEM_FIXED, iLen);

								            if (m_pszLocalDest)

								            {

								                MyOleStrToStrN(m_pszLocalDest, iLen, bstrLocalDest);

								            }

								        }

								        SAFEFREEBSTR(bstrLocalDest);

								*/


								        m_dwRecurseLevels=0;

								        ReadDWORD(pItem, c_szPropCrawlLevels, &m_dwRecurseLevels);


								        if (!IsAgentFlagSet(DELIVERY_AGENT_FLAG_NO_RESTRICTIONS))

								        {

								            // Note: MaxWebcrawlLevels is stored as N+1 because 0

								            // disables the restriction

								            dwTemp = SHRestricted2W(REST_MaxWebcrawlLevels, NULL, 0);

								            if (dwTemp && m_dwRecurseLevels >= dwTemp)

								                m_dwRecurseLevels = dwTemp - 1;

								        }


								        m_dwRecurseFlags=0;

								        ReadDWORD(pItem, c_szPropCrawlFlags, &m_dwRecurseFlags);


								        // Read max size in cache in KB

								        m_dwMaxSize=0;

								        ReadDWORD(pItem, c_szPropCrawlMaxSize, &m_dwMaxSize);

								        if (!IsAgentFlagSet(DELIVERY_AGENT_FLAG_NO_RESTRICTIONS))

								        {

								            dwTemp = SHRestricted2W(REST_MaxSubscriptionSize, NULL, 0);

								            if (dwTemp && (!m_dwMaxSize || m_dwMaxSize > dwTemp))

								                m_dwMaxSize = dwTemp;

								        }


								        if (IsRecurseFlagSet(WEBCRAWL_DONT_MAKE_STICKY))

								            dwTemp = 0;


								        // Read old group ID

								        ReadLONGLONG(pItem, c_szPropCrawlGroupID, &m_llOldCacheGroupID);


								        // Read new ID if present

								        m_llCacheGroupID = 0;

								        ReadLONGLONG(pItem, c_szPropCrawlNewGroupID, &m_llCacheGroupID);

								        if (m_llCacheGroupID)

								        {

								            DBG("Adding to existing cache group");

								        }

								    } // !ChangesOnly


								    // finish initializing new operation

								    m_iDownloadErrors = 0;

								    m_dwCurSize = 0;

								    m_lMaxNumUrls = (m_dwRecurseLevels) ? -1 : 1;

								    SAFEFREEBSTR(m_bstrHostName);


								    m_dwCurSize = NULL;

								    m_pPages = NULL;

								    m_pDependencies = NULL;


								    // After calling this, we'll reenter either in "StartDownload" (connection successful)

								    //  or in "AbortUpdate" with GetEndStatus() == INET_E_AGENT_CONNECTION_FAILED

								    return CDeliveryAgent::StartOperation();

								}


								HRESULT CWebCrawler::AgentPause(DWORD dwFlags)

								{

								    DBG("CWebCrawler::AgentPause");


								    // Abort our current url

								    if (m_pRunAgent)

								    {

								        m_pRunAgent->AgentPause(dwFlags);

								    }


								    if (m_pCurDownload)

								    {

								        m_pCurDownload->AbortDownload();

								        m_pCurDownload->DestroyBrowser();

								    }


								    return CDeliveryAgent::AgentPause(dwFlags);

								}


								HRESULT CWebCrawler::AgentResume(DWORD dwFlags)

								{

								    DBG("CWebCrawler::AgentResume");


								    if (m_pRunAgent)

								    {

								        m_pRunAgent->AgentResume(dwFlags);

								    }

								    else

								    {

								        // If we just increased our cache size, reprocess same url

								        if (SUBSCRIPTION_AGENT_RESUME_INCREASED_CACHE & dwFlags)

								        {

								            DBG("CWebCrawler reprocessing same url after cache size increase");

								            OnDownloadComplete(0, _ERROR_REPROCESSING);

								        }

								        else

								        {

								            // If we're not still downloading, restart our same url

								            if (0 == m_iNumPagesDownloading)

								            {

								                if (FAILED(ActuallyStartDownload(m_pCurDownloadStringList, m_iCurDownloadStringIndex, TRUE)))

								                {

								                    ASSERT_MSG(0, "CWebCrawler::AgentResume"); // this should never happen

								                    SetEndStatus(E_FAIL);

								                    CleanUp();

								                }

								            }

								        }

								    }


								    return CDeliveryAgent::AgentResume(dwFlags);

								}


								// Forcibly abort current operation

								HRESULT CWebCrawler::AgentAbort(DWORD dwFlags)

								{

								    DBG("CWebCrawler::AgentAbort");


								    if (m_pCurDownload)

								    {

								        m_pCurDownload->DoneDownloading();

								    }


								    if (m_pRunAgent)

								    {

								        m_pRunAgent->AgentAbort(dwFlags);

								    }


								    return CDeliveryAgent::AgentAbort(dwFlags);

								}


								//---------------------------------------------------------------

								//


								HRESULT CWebCrawler::StartDownload()

								{

								    ASSERT(!m_pCurDownload);


								    m_iPagesStarted = 0;

								    m_iRobotsStarted = 0;

								    m_iDependencyStarted = 0;

								    m_iDependenciesProcessed = 0;

								    m_iTotalStarted = 0;

								    m_iCodeBaseStarted = 0;

								    m_iNumPagesDownloading = 0;


								    // Create new cache group

								    if (IsAgentFlagSet(FLAG_CHANGESONLY))

								    {

								        m_llCacheGroupID = 0;

								    }

								    else

								    {

								        if (!m_llCacheGroupID)

								        {

								            m_llCacheGroupID = CreateUrlCacheGroup(

								                (IsRecurseFlagSet(WEBCRAWL_DONT_MAKE_STICKY) ? 0 : CACHEGROUP_FLAG_NONPURGEABLE), 0);


								            ASSERT_MSG(m_llCacheGroupID != 0, "Create cache group failed");

								        }

								    }


								    // Create string lists

								    m_pPages = new CWCDwordStringList;

								    if (m_pPages)

								        m_pPages->Init(m_dwRecurseLevels ? -1 : 512);

								    else

								        SetEndStatus(E_FAIL);


								    if (m_dwRecurseLevels && !IsRecurseFlagSet(WEBCRAWL_IGNORE_ROBOTSTXT))

								    {

								        m_pRobotsTxt = new CWCDwordStringList;

								        if (m_pRobotsTxt)

								            m_pRobotsTxt->Init(512);

								        else

								            SetEndStatus(E_FAIL);

								    }


								    // FEATURE : Shouldn't allocate this memory in changes only mode

								    m_pCodeBaseList = new CWCDwordStringList;

								    if (m_pCodeBaseList)

								        m_pCodeBaseList->Init(512);

								    else

								        SetEndStatus(E_FAIL);


								    // Avoid duplicate processing of dependencies

								    if (!IsAgentFlagSet(FLAG_CHANGESONLY))

								    {

								        m_pDependencies = new CWCDwordStringList;

								        if (m_pDependencies)

								            m_pDependencies->Init();

								        else

								            SetEndStatus(E_FAIL);

								    }


								    if (GetEndStatus() == E_FAIL)

								        return E_FAIL;


								    m_pCurDownload = new CUrlDownload(this, 0);

								    if (!m_pCurDownload)

								        return E_OUTOFMEMORY;


								    // Add first URL to string list, then start it

								    if ((CWCStringList::STRLST_ADDED == m_pPages->AddString(m_bstrBaseURL, m_dwRecurseLevels)) &&

								        m_pPages->NumStrings() == 1)

								    {

								        return StartNextDownload();

								    }


								    SetEndStatus(E_FAIL);

								    return E_FAIL;

								}


								// Attempts to begin the next download

								HRESULT CWebCrawler::StartNextDownload()

								{

								    if (!m_pPages || m_iNumPagesDownloading)

								        return E_FAIL;


								    CWCStringList *pslUrls = NULL;

								    int iIndex = 0;


								    // See if we have any more URLs to download.

								    // Check dependency links first

								    if (m_pDependencyLinks)

								    {

								        ProcessDependencyLinks(&pslUrls, &iIndex);

								#ifdef DEBUG

								        if (pslUrls) DBG("Downloading dependency link (frame):");

								#endif

								    }


								    if (!pslUrls)

								    {

								        // Check robots.txt

								        if (m_pRobotsTxt && (m_iRobotsStarted < m_pRobotsTxt->NumStrings()))

								        {

								            pslUrls = m_pRobotsTxt;

								            iIndex = m_iRobotsStarted ++;

								        }

								        else if (m_pPendingLinks)   // add pending links to pages list

								        {

								            // Pending links to process and we've retrieved all robots.txt

								            // Process pending links (validate & add to download list)

								            ProcessPendingLinks();

								        }


								        if (!pslUrls && (m_iPagesStarted < m_pPages->NumStrings()))

								        {

								            DWORD_PTR dwTmp;

								            ASSERT(!m_pDependencyLinks);// should be downloaded already

								            ASSERT(!m_pPendingLinks);   // should be validated already

								            // Skip any pages we've started

								            while (m_iPagesStarted < m_pPages->NumStrings())

								            {

								                dwTmp = m_pPages->GetStringData(m_iPagesStarted);

								                if (IsFlagSet(dwTmp, DATA_DLSTARTED))

								                    m_iPagesStarted++;

								                else

								                    break;

								            }

								            if (m_iPagesStarted < m_pPages->NumStrings())

								            {

								                pslUrls = m_pPages;

								                iIndex = m_iPagesStarted ++;

								            }

								        }


								        if (!pslUrls && (m_iCodeBaseStarted < m_pCodeBaseList->NumStrings()))

								        {

								            // Nothing else pull, do code bases last.


								            while (m_iCodeBaseStarted < m_pCodeBaseList->NumStrings())

								            {

								                CCodeBaseHold *pcbh = (CCodeBaseHold *)

								                                    m_pCodeBaseList->GetStringData(m_iCodeBaseStarted);

								                if (IsFlagSet(pcbh->dwFlags, DATA_DLSTARTED))

								                    m_iCodeBaseStarted++;

								                else

								                    break;

								            }

								            while (m_iCodeBaseStarted < m_pCodeBaseList->NumStrings())

								            {

								                // We have some codebases to download.

								                // We return if the download is async and simply

								                // start the next one if it finishes synchronously

								                iIndex = m_iCodeBaseStarted;

								                m_iCodeBaseStarted++; // increment so that next download is not repeated


								                // Init the cur download infor for resume if paused

								                m_iCurDownloadStringIndex = iIndex;

								                m_pCurDownloadStringList = m_pCodeBaseList;


								               if(ActuallyDownloadCodeBase(m_pCodeBaseList, iIndex, FALSE) == E_PENDING)

								                    return S_OK; // We break out of the while and try next download in OnAgentEnd()


								            }

								        }

								    }


								    if (pslUrls)

								    {

								        m_iCurDownloadStringIndex = iIndex;

								        m_pCurDownloadStringList = pslUrls;


								        return ActuallyStartDownload(pslUrls, iIndex);

								    }


								    DBG("WebCrawler: StartNextDownload failing, nothing more to download.");

								    return E_FAIL;

								}


								HRESULT CWebCrawler::ActuallyStartDownload(CWCStringList *pslUrls, int iIndex, BOOL fReStart /* = FALSE */)

								{

								    // We have urls to download. Do it.

								    DWORD_PTR dwData;

								    LPCWSTR pwszURL;

								    DWORD   dwBrowseFlags;

								    BDUMethod method;

								    BDUOptions options;


								    if(pslUrls == m_pCodeBaseList)

								    {

								        ASSERT(fReStart); // Should happen only with resume

								        HRESULT hr = ActuallyDownloadCodeBase(m_pCodeBaseList, iIndex, fReStart);

								        if(E_PENDING == hr)

								            return S_OK;

								        return E_FAIL; // hackhack - since we don't handle synchronous downloads well - we hang if

								                       // resumed download is synchronous

								    }


								    if (pslUrls != m_pRobotsTxt)

								    {

								        dwData = pslUrls->GetStringData(iIndex);

								#ifdef DEBUG

								        if (fReStart)

								            if (~(dwData & DATA_DLSTARTED)) DBG_WARN("WebCrawler: Trying to restart one we haven't started yet!");

								        else

								            if ((dwData & DATA_DLSTARTED)) DBG_WARN("WebCrawler: Trying to download one we've already started?");

								#endif

								        pslUrls->SetStringData(iIndex, DATA_DLSTARTED | dwData);

								    }

								    else

								        dwData = DATA_ROBOTSTXT;


								    pwszURL = pslUrls->GetString(iIndex);


								    ASSERT(iIndex < pslUrls->NumStrings());


								#ifdef DEBUG

								    int iMax = m_lMaxNumUrls;

								    if (iMax<0)

								        iMax = m_pPages->NumStrings() + ((m_pRobotsTxt) ? m_pRobotsTxt->NumStrings() : 0);

								    TraceMsgA(TF_THISMODULE, "WebCrawler GET_URL (%d of %c%d) Recurse %d : %ws",

								        m_iTotalStarted+1, ((m_lMaxNumUrls>0) ? ' ' : '?'), iMax,

								        pslUrls->GetStringData(iIndex) & DATA_RECURSEMASK, pwszURL);

								#endif


								    dwBrowseFlags = DLCTL_DOWNLOADONLY |

								        DLCTL_NO_FRAMEDOWNLOAD | DLCTL_NO_SCRIPTS | DLCTL_NO_JAVA |

								        DLCTL_NO_RUNACTIVEXCTLS;


								    if (IsRecurseFlagSet(WEBCRAWL_GET_IMAGES))      dwBrowseFlags |= DLCTL_DLIMAGES;

								    if (IsRecurseFlagSet(WEBCRAWL_GET_VIDEOS))      dwBrowseFlags |= DLCTL_VIDEOS;

								    if (IsRecurseFlagSet(WEBCRAWL_GET_BGSOUNDS))    dwBrowseFlags |= DLCTL_BGSOUNDS;

								    if (!IsRecurseFlagSet(WEBCRAWL_GET_CONTROLS))   dwBrowseFlags |= DLCTL_NO_DLACTIVEXCTLS;

								    if (IsRecurseFlagSet(WEBCRAWL_PRIV_OFFLINE_MODE))

								    {

								        dwBrowseFlags |= DLCTL_FORCEOFFLINE;

								        dwBrowseFlags &= ~(DLCTL_DLIMAGES | DLCTL_VIDEOS | DLCTL_BGSOUNDS);

								        DBG("GET is OFFLINE");

								    }


								    m_pCurDownload->SetDLCTL(dwBrowseFlags);


								#ifdef DEBUG

								    if (fReStart)

								    {

								        ASSERT(m_iCurDownloadStringIndex == iIndex);

								        ASSERT(m_pCurDownloadStringList == pslUrls);

								    }

								#endif


								    if (!fReStart)

								    {

								        // Get the info for change detection, unless we already know it's changed

								        if (!IsAgentFlagSet(FLAG_CRAWLCHANGED) && !(dwData & DATA_ROBOTSTXT))

								        {

								            TCHAR   szUrl[INTERNET_MAX_URL_LENGTH];


								            m_varChange.vt = VT_EMPTY;


								            if (IsAgentFlagSet(FLAG_CHANGESONLY))

								            {

								                // "Changes Only" mode, we have persisted a change detection code

								                ASSERT(m_iTotalStarted == 0);

								                LPCWSTR pPropChange = c_szPropChangeCode;

								                m_pSubscriptionItem->ReadProperties(1, &pPropChange, &m_varChange);

								            }


								            BOOL fMustGET = TRUE;


								            MyOleStrToStrN(szUrl, INTERNET_MAX_URL_LENGTH, pwszURL);

								            PreCheckUrlForChange(szUrl, &m_varChange, &fMustGET);


								            if (IsAgentFlagSet(FLAG_CHANGESONLY) && !fMustGET)

								                SetAgentFlag(FLAG_HEADONLY);

								        }


								        m_iTotalStarted ++;

								    }


								    if (IsPaused())

								    {

								        DBG("WebCrawler paused, not starting another download");

								        if (m_pCurDownload)

								            m_pCurDownload->DestroyBrowser(); // free browser until resumed

								        return E_PENDING;

								    }


								    m_iNumPagesDownloading ++;


								    // Send our update progress with the url we're about to download

								    SendUpdateProgress(pwszURL, m_iTotalStarted, m_lMaxNumUrls, (m_dwCurSize >> 10));


								    if (IsAgentFlagSet(FLAG_HEADONLY))

								    {

								        ASSERT(m_iTotalStarted == 1);

								        method = BDU2_HEADONLY;                 // Only get HEAD info with Urlmon

								    }

								    else if (IsAgentFlagSet(FLAG_CHANGESONLY)   // Only want HTML, or

								        || m_pszLocalDest                       // We're going to move this one file, or

								        || (dwData & DATA_ROBOTSTXT))           // This is a robots.txt, so

								    {

								        method = BDU2_URLMON;                   // Get with Urlmon

								    }

								    else if (m_iTotalStarted == 1)              // First file, we need status code, so

								    {

								        ISubscriptionItem *pCDFItem;

								        method = BDU2_SNIFF;                    // Get with Urlmon then MSHTML (if HTML)


								        // Find out if we're hosted by channel agent

								        if (SUCCEEDED(GetChannelItem(&pCDFItem)))

								        {

								            // If we're hosted by channel agent, use its original hostname

								            BSTR bstrBaseUrl;

								            if (SUCCEEDED(ReadBSTR(pCDFItem, c_szPropURL, &bstrBaseUrl)))

								            {

								                GetHostName(bstrBaseUrl, &m_bstrHostName);

								                SysFreeString(bstrBaseUrl);

								            }

								#ifdef DEBUG

								            if (m_bstrHostName)

								                TraceMsg(TF_THISMODULE, "Got host name from channel agent: %ws", m_bstrHostName);

								#endif

								            pCDFItem->Release();


								            DBG("Using 'smart' mode for first url in webcrawl; spawned from channel crawl");

								            method = BDU2_SMART;                // Use 'smart' mode for first url if channel crawl

								            SetAgentFlag(FLAG_HOSTED);

								        }

								    }

								    else

								        method = BDU2_SMART;                    // Get with Urlmon or MSHTML as appropriate


								    if (dwData & DATA_ROBOTSTXT)

								        options = BDU2_NEEDSTREAM;              // Need IStream to parse robots.txt

								    else

								        options = BDU2_NONE;


								    options |= BDU2_DOWNLOADNOTIFY_REQUIRED;    // Always get download notify callbacks


								    if (IsRecurseFlagSet(WEBCRAWL_ONLY_LINKS_TO_HTML) && (dwData & DATA_LINK))

								    {

								        // Don't follow any links unless they are to html pages.

								        options |= BDU2_FAIL_IF_NOT_HTML;

								    }


								    if (FAILED(m_pCurDownload->BeginDownloadURL2(pwszURL,

								            method, options, m_pszLocalDest,

								            m_dwMaxSize ? (m_dwMaxSize<<10)-m_dwCurSize : 0)))

								    {

								        DBG("BeginDownloadURL2 failed (ignoring & waiting for OnDownloadComplete call)");

								    }


								    return S_OK;

								}


								HRESULT CWebCrawler::ActuallyDownloadCodeBase(CWCStringList *pslUrls, int iIndex, BOOL fReStart)

								{

								    CCodeBaseHold *pcbh;

								    LPCWSTR pwszURL;

								    HRESULT hr = S_OK;


								    if (pslUrls != m_pCodeBaseList)

								    {

								        ASSERT(0);

								        DBG_WARN("WebCrawler: Wrong URLs being processed as CodeBase.");

								        hr = E_FAIL;

								        goto Exit;

								    }


								    pcbh = (CCodeBaseHold *)pslUrls->GetStringData(iIndex);


								#ifdef DEBUG

								    if (fReStart)

								        if (~(pcbh->dwFlags & DATA_DLSTARTED)) DBG_WARN("WebCrawler: Trying to restart CodeBase D/L we haven't started yet!");

								    else

								        if ((pcbh->dwFlags & DATA_DLSTARTED)) DBG_WARN("WebCrawler: Trying to download CodeBase D/L we've already started?");

								#endif

								    pcbh->dwFlags |= DATA_DLSTARTED;


								    pwszURL = pslUrls->GetString(iIndex);


								    ASSERT(iIndex < pslUrls->NumStrings());


								    if (!fReStart)

								        m_iTotalStarted ++;


								    if (IsPaused())

								    {

								        DBG("WebCrawler paused, not starting another download");

								        if (m_pCurDownload)

								            m_pCurDownload->DestroyBrowser(); // free browser until resumed

								        return S_FALSE;

								    }


								    m_iNumPagesDownloading ++;


								    // Send our update progress with the CODEBASE we're about to download

								    SendUpdateProgress(pwszURL, m_iTotalStarted, m_lMaxNumUrls);


								    if (m_pRunAgent)

								    {

								        ASSERT(0);

								        DBG_WARN("WebCrawler: Attempting to download next CODEBASE when not done last one.");

								        hr = E_FAIL;

								        goto Exit;

								    }

								    else

								    {

								        // create subscription item for CDL agent.


								        ISubscriptionItem *pItem = NULL;


								        if (m_dwMaxSize && ((m_dwCurSize>>10) >= m_dwMaxSize))

								        {

								            // We've exceeded our maximum download KB limit and can't continue.

								            DBG_WARN("WebCrawler: Exceeded Maximum KB download limit with CodeBase download.");

								            SetEndStatus(hr = INET_E_AGENT_MAX_SIZE_EXCEEDED);

								            goto Exit;

								        }


								        if (!m_pSubscriptionItem ||

								            FAILED(hr = DoCloneSubscriptionItem(m_pSubscriptionItem, NULL, &pItem)))

								        {

								            goto Exit;

								        }

								        ASSERT(pItem != NULL);


								        WriteOLESTR(pItem, c_szPropURL, pwszURL);

								        WriteOLESTR(pItem, L"DistUnit", pcbh->szDistUnit);

								        WriteDWORD(pItem, L"VersionMS", pcbh->dwVersionMS);

								        WriteDWORD(pItem, L"VersionLS", pcbh->dwVersionLS);

								        if (m_dwMaxSize)

								            WriteDWORD(pItem, c_szPropCrawlMaxSize, m_dwMaxSize - (m_dwCurSize>>10));    // KB limit for us to pull.


								        m_pRunAgent = new CRunDeliveryAgent();

								        if (m_pRunAgent)

								            hr = m_pRunAgent->Init((CRunDeliveryAgentSink *)this, pItem, CLSID_CDLAgent);

								        pItem->Release();


								        if (m_pRunAgent && SUCCEEDED(hr))

								        {

								            hr = m_pRunAgent->StartAgent();

								            //if (hr == E_PENDING)

								            //{

								                //hr = S_OK;

								            //}

								        }

								        else

								        {

								            hr = E_OUTOFMEMORY;

								        }

								    }


								Exit:

								    return hr;


								}


								HRESULT CWebCrawler::ProcessDependencyLinks(CWCStringList **ppslUrls, int *piStarted)

								{

								    ASSERT(ppslUrls && !*ppslUrls && piStarted);


								    int iIndex;

								    DWORD_PTR dwData;


								    if (!m_pDependencyLinks)

								        return S_FALSE;


								    // See if we have any more dependency links to download

								    while (m_iDependencyStarted < m_pDependencyLinks->NumStrings())

								    {

								        if (!m_pPages->FindString(m_pDependencyLinks->GetString(m_iDependencyStarted),

								                               m_pDependencyLinks->GetStringLen(m_iDependencyStarted), &iIndex))

								        {

								            ASSERT(0);  // find string failed?!? We added it above!

								            return E_FAIL;

								        }


								        ASSERT(iIndex>=0 && iIndex<m_pPages->NumStrings());


								        m_iDependencyStarted ++;


								        // See if we've downloaded this yet.

								        dwData = m_pPages->GetStringData(iIndex);

								        if (!(dwData & DATA_DLSTARTED))

								        {

								            // Nope. Start download.

								            *ppslUrls = m_pPages;

								            *piStarted = iIndex;

								            return S_OK;

								        }


								        // We have already downloaded this page. Go to next dependency link.

								    }


								    // Done processing. Clear for next page.

								    SAFEDELETE(m_pDependencyLinks);


								    return S_FALSE;

								}


								HRESULT CWebCrawler::ProcessPendingLinks()

								{

								    int         iNumLinks, iAddCode, i, iAddIndex, iRobotsIndex;

								    LPCWSTR     pwszUrl;

								    BOOL        fAllow;


								    if (!m_pPendingLinks)

								        return S_FALSE;


								    ASSERT(m_lMaxNumUrls<0);

								    ASSERT(0 == (m_dwPendingRecurseLevel & ~DATA_RECURSEMASK));


								    iNumLinks = m_pPendingLinks->NumStrings();


								    TraceMsg(TF_THISMODULE, "Processing %d pending links from %ws",

								        iNumLinks, m_pPages->GetString(m_iPagesStarted-1));


								    // Add the links to our global page list

								    for (i=0; i<iNumLinks; i++)

								    {

								        // Validate with robots.txt if appropriate

								        pwszUrl = m_pPendingLinks->GetString(i);

								        iRobotsIndex = (int)(m_pPendingLinks->GetStringData(i) & DATA_ROBOTSTXTMASK);

								        ValidateWithRobotsTxt(pwszUrl, iRobotsIndex, &fAllow);


								        if (fAllow)

								        {

								/*

								As long as we retrieve pages in decreasing-recursion order (top to bottom), we don't

								have to worry about bumping pages to a higher recurse level (except for frames).

								*/

								            iAddCode = m_pPages->AddString(pwszUrl,

								                        DATA_LINK | m_dwPendingRecurseLevel,

								                        &iAddIndex);

								            if (iAddCode == CWCStringList::STRLST_FAIL)

								                break;

								        }

								    }

								    SAFEDELETE(m_pPendingLinks);


								    return S_OK;

								}


								// Combine with our base url to get full url

								// We use this for frames, but also for <Link> tags, since the processing is identical

								HRESULT CWebCrawler::CheckFrame(IUnknown *punkItem, BSTR *pbstrItem, DWORD_PTR dwBaseUrl, DWORD *pdwStringData)

								{

								    WCHAR   wszCombined[INTERNET_MAX_URL_LENGTH];

								    DWORD   dwLen = ARRAYSIZE(wszCombined);


								    ASSERT(pbstrItem && *pbstrItem && punkItem && dwBaseUrl);

								    if (!pbstrItem || !*pbstrItem || !punkItem || !dwBaseUrl)

								        return E_FAIL;      // bogus


								    if (SUCCEEDED(UrlCombineW((LPCWSTR)dwBaseUrl, *pbstrItem, wszCombined, &dwLen, 0)))

								    {

								        BSTR bstrNew = SysAllocString(wszCombined);


								        if (bstrNew)

								        {

								            SysFreeString(*pbstrItem);

								            *pbstrItem = bstrNew;

								            return S_OK;

								        }

								    }


								    TraceMsg(TF_WARNING, "CWebCrawler::CheckFrame failing. Not getting frame or <link> url=%ws.", *pbstrItem);

								    return E_FAIL;  // Couldn't combine url; don't add

								}


								// See if we should follow this link. Clears pbstrItem if not.

								// Accepts either pLink or pArea

								HRESULT CWebCrawler::CheckLink(IUnknown *punkItem, BSTR *pbstrItem, DWORD_PTR dwThis, DWORD *pdwStringData)

								{

								    HRESULT         hrRet = S_OK;

								    CWebCrawler    *pThis = (CWebCrawler *)dwThis;


								    ASSERT(pbstrItem && *pbstrItem && punkItem && dwThis);

								    if (!pbstrItem || !*pbstrItem || !punkItem || !dwThis)

								        return E_FAIL;      // bogus


								    // First see if it's 'valid'

								    // We only add the link if it's HTTP (or https)

								    // (we don't want to get mailto: links, for example)

								    if (CUrlDownload::IsValidURL(*pbstrItem))

								    {

								        // Strip off any anchor

								        CUrlDownload::StripAnchor(*pbstrItem);

								    }

								    else

								    {

								        // Skip this link

								        SysFreeString(*pbstrItem);

								        *pbstrItem = NULL;

								        return S_FALSE;

								    }


								    if (pThis->IsRecurseFlagSet(WEBCRAWL_ONLY_LINKS_TO_HTML))

								    {

								        // See if we can tell that this is not an HTML link

								        if (CUrlDownload::IsNonHtmlUrl(*pbstrItem))

								        {

								            // Skip this link

								            SysFreeString(*pbstrItem);

								            *pbstrItem = NULL;

								            return S_FALSE;

								        }

								    }


								    if (!(pThis->IsRecurseFlagSet(WEBCRAWL_LINKS_ELSEWHERE)))

								    {

								        BSTR bstrHost=NULL;

								        IHTMLAnchorElement *pLink=NULL;

								        IHTMLAreaElement *pArea=NULL;


								        // Check to see if the host names match

								        punkItem->QueryInterface(IID_IHTMLAnchorElement, (void **)&pLink);


								        if (pLink)

								        {

								            pLink->get_hostname(&bstrHost);

								            pLink->Release();

								        }

								        else

								        {

								            punkItem->QueryInterface(IID_IHTMLAreaElement, (void **)&pArea);


								            if (pArea)

								            {

								                pArea->get_hostname(&bstrHost);

								                pArea->Release();

								            }

								            else

								            {

								                DBG_WARN("CWebCrawler::CheckLink Unable to get Area or Anchor interface!");

								                return E_FAIL;      // Bad element

								            }

								        }


								        if (!bstrHost || !*bstrHost)

								        {

								            DBG_WARN("CWebCrawler::CheckLink : (pLink|pArea)->get_hostname() failed");

								            hrRet = S_OK;      // always accept if get_hostname fails

								        }

								        else

								        {

								            if (pThis->m_bstrHostName && MyAsciiCmpW(bstrHost, pThis->m_bstrHostName))

								            {

								                // Skip url; different host name.

								                SAFEFREEBSTR(*pbstrItem);

								                hrRet = S_FALSE;

								            }

								        }


								        SAFEFREEBSTR(bstrHost);

								    }


								    if (*pbstrItem && pdwStringData)

								    {

								        pThis->GetRobotsTxtIndex(*pbstrItem, TRUE, pdwStringData);

								        *pdwStringData &= DATA_ROBOTSTXTMASK;

								    }

								    else if (pdwStringData)

								        *pdwStringData = 0;


								    return hrRet;

								}


								// S_OK    : Already retrieved this robots.txt info

								// S_FALSE : Haven't yet retrieved this robots.txt info

								// E_*     : Bad

								HRESULT CWebCrawler::GetRobotsTxtIndex(LPCWSTR pwszUrl, BOOL fAddToList, DWORD *pdwRobotsTxtIndex)

								{

								    HRESULT hr=S_OK;

								    int    iIndex=-1;


								    if (m_pRobotsTxt)

								    {

								        // See which robots.txt file we should use to validate this link

								        // If not yet available, add it to the list to be downloaded

								        DWORD  dwBufLen = lstrlenW(pwszUrl) + ARRAYSIZE(c_wszRobotsTxtURL); //This get's us a terminating NULL

								        LPWSTR pwszRobots = (LPWSTR)MemAlloc(LMEM_FIXED, dwBufLen * sizeof(WCHAR));

								        int    iAddCode;


								        if (pwszRobots)

								        {

								            // PERF: do the internetcombine in startnextdownload

								            if (SUCCEEDED(UrlCombineW(pwszUrl, c_wszRobotsTxtURL, pwszRobots, &dwBufLen, 0))

								                && !memcmp(pwszRobots, L"http", 4 * sizeof(WCHAR)))

								            {

								                if (fAddToList)

								                {

								                    iAddCode = m_pRobotsTxt->AddString(pwszRobots, 0, &iIndex);

								                }

								                else

								                {

								                    if (m_pRobotsTxt->FindString(pwszRobots, -1, &iIndex))

								                    {

								                        iAddCode = CWCStringList::STRLST_DUPLICATE;

								                    }

								                    else

								                    {

								                        iIndex=-1;

								                        iAddCode = CWCStringList::STRLST_FAIL;

								                    }

								                }


								                if (CWCStringList::STRLST_FAIL == iAddCode)

								                    hr = E_FAIL;    // bad news

								                else if (CWCStringList::STRLST_ADDED == iAddCode)

								                    hr = S_FALSE;   // haven't gotten it yet

								                else

								                    hr = S_OK;      // already got it

								            }

								            MemFree(pwszRobots);

								        }

								        else

								            hr = E_OUTOFMEMORY;

								    }

								    else

								    {

								        hr = E_FAIL;    // too many robots.txt files???

								    }


								    *pdwRobotsTxtIndex = iIndex;


								    return hr;

								}


								// iRobotsIndex : Index into robots.txt, -1 if unavailable

								HRESULT CWebCrawler::ValidateWithRobotsTxt(LPCWSTR pwszUrl, int iRobotsIndex, BOOL *pfAllow)

								{

								    int iNumDirectives, i;

								    CWCStringList *pslThisRobotsTxt=NULL;


								    *pfAllow = TRUE;


								    if (!m_pRobotsTxt)

								        return S_OK;


								    if (iRobotsIndex == -1)

								    {

								        DWORD dwIndex;


								        if (S_OK != GetRobotsTxtIndex(pwszUrl, FALSE, &dwIndex))

								            return E_FAIL;


								        iRobotsIndex = (int)dwIndex;

								    }


								    if ((iRobotsIndex >= 0) && iRobotsIndex<m_pRobotsTxt->NumStrings())

								    {

								        pslThisRobotsTxt = (CWCStringList *)(m_pRobotsTxt->GetStringData(iRobotsIndex));


								        if (pslThisRobotsTxt)

								        {

								            iNumDirectives = pslThisRobotsTxt->NumStrings();


								            for (i=0; i<iNumDirectives; i++)

								            {

								                // See if this url starts with the same thing as the directive

								                if (!MyAsciiCmpNIW(pwszUrl, pslThisRobotsTxt->GetString(i), pslThisRobotsTxt->GetStringLen(i)))

								                {

								                    // hit! see if this is "allow" or "disallow"

								                    if (!(pslThisRobotsTxt->GetStringData(i) & DATA_ALLOW))

								                    {

								                        TraceMsg(TF_THISMODULE, "ValidateWithRobotsTxt disallowing: (%ws) (%ws)",

								                            pslThisRobotsTxt->GetString(i), pwszUrl);

								                        *pfAllow = FALSE;

								                        m_iSkippedByRobotsTxt ++;

								                    }

								                    break;

								                }

								            }

								        }

								        return S_OK;

								    }


								    return E_FAIL;

								}


								typedef struct

								{

								    LPCWSTR         pwszThisUrl;

								    CWCStringList   *pslGlobal;

								    BOOL            fDiskFull;

								    DWORD           dwSize;

								    GROUPID         llGroupID;

								}

								ENUMDEPENDENCIES;


								// Doesn't process it if we already have it in the global dependency list

								HRESULT CWebCrawler::CheckImageOrLink(IUnknown *punkItem, BSTR *pbstrItem, DWORD_PTR dwEnumDep, DWORD *pdwStringData)

								{

								    if (!dwEnumDep)

								        return E_FAIL;


								    ENUMDEPENDENCIES *pEnumDep = (ENUMDEPENDENCIES *) dwEnumDep;


								    WCHAR   wszCombinedUrl[INTERNET_MAX_URL_LENGTH];

								    DWORD   dwLen = ARRAYSIZE(wszCombinedUrl);


								    HRESULT hr;


								    if (pEnumDep->fDiskFull)

								        return E_ABORT;     // Abort enumeration


								    if (SUCCEEDED(UrlCombineW(pEnumDep->pwszThisUrl, *pbstrItem, wszCombinedUrl, &dwLen, 0)))

								    {

								        TCHAR   szCombinedUrl[INTERNET_MAX_URL_LENGTH];

								        BYTE    chBuf[MY_MAX_CACHE_ENTRY_INFO];


								        if (pEnumDep->pslGlobal != NULL)

								        {

								            int iCode = pEnumDep->pslGlobal->AddString(*pbstrItem, 0);


								            if (CWCStringList::STRLST_ADDED != iCode)

								            {

								                // The string already existed (or Add failed). Don't process this.

								                return S_OK;

								            }

								        }


								        // Process this url.

								        MyOleStrToStrN(szCombinedUrl, INTERNET_MAX_URL_LENGTH, wszCombinedUrl);


								        hr = GetUrlInfoAndMakeSticky(NULL, szCombinedUrl,

								                (LPINTERNET_CACHE_ENTRY_INFO)chBuf, sizeof(chBuf),

								                pEnumDep->llGroupID);


								        if (E_OUTOFMEMORY == hr)

								        {

								            pEnumDep->fDiskFull = TRUE;

								            return E_ABORT;     // Skip rest of enumeration

								        }


								        if (SUCCEEDED(hr))

								            pEnumDep->dwSize += ((LPINTERNET_CACHE_ENTRY_INFO)chBuf)->dwSizeLow;

								    }


								    return S_OK;

								}


								HRESULT CWebCrawler::MatchNames(BSTR bstrName, BOOL fPassword)

								{

								    static const WCHAR c_szPassword1[] = L"password";

								    static const WCHAR c_szUsername1[] = L"user";

								    static const WCHAR c_szUsername2[] = L"username";


								    HRESULT hr = E_FAIL;

								    LPCTSTR pszKey = c_szRegKeyPasswords;


								    // See if the name matches our preset options.

								    // Should these be localized?  I don't think so or subscribing to

								    // US sites will fail in international versions of the browser.

								    if (fPassword)

								    {

								        if (StrCmpIW(bstrName, c_szPassword1) == 0)

								        {

								            hr = S_OK;

								        }

								    }

								    else

								    {

								        if ((StrCmpIW(bstrName, c_szUsername1) == 0) ||

								            (StrCmpIW(bstrName, c_szUsername2) == 0))

								        {

								            hr = S_OK;

								        }

								        else

								        {

								            pszKey = c_szRegKeyUsernames;

								        }

								    }


								    // Try the registry for custom form names if the presets didn't match.

								    if (FAILED(hr))

								    {

								        LONG lRes;

								        HKEY hKey;

								        DWORD cValues;

								        DWORD i;

								        lRes = RegOpenKeyEx(HKEY_CURRENT_USER, pszKey, 0, KEY_READ, &hKey);

								        if (ERROR_SUCCESS == lRes)

								        {

								            lRes = RegQueryInfoKey(hKey, NULL, NULL, NULL, NULL, NULL, NULL, &cValues, NULL, NULL, NULL, NULL);

								            if (ERROR_SUCCESS == lRes)

								            {

								                for (i = 0; i < cValues; i++)

								                {

								                    TCHAR szValueName[MAX_PATH];

								                    DWORD cchValueName = ARRAYSIZE(szValueName);


								                    lRes = SHEnumValue(hKey, i, szValueName, &cchValueName, NULL, NULL, NULL);

								                    if (ERROR_SUCCESS == lRes)

								                    {

								                        WCHAR wszValueName[MAX_PATH];

								                        MyStrToOleStrN(wszValueName, ARRAYSIZE(wszValueName), szValueName);

								                        if (StrCmpIW(bstrName, wszValueName) == 0)

								                        {

								                            hr = S_OK;

								                            break;

								                        }

								                    }

								                }

								            }

								            lRes = RegCloseKey(hKey);

								            ASSERT(ERROR_SUCCESS == lRes);

								        }

								    }


								    return hr;

								}


								HRESULT CWebCrawler::FindAndSubmitForm(void)

								{

								    // FindAndSubmitForm - If there is a user name and password in

								    // the start item, this will attempt to fill in and submit

								    // a form.  It should only be called on the top level page of a

								    // webcrawl. We still need to check the host name in case we were

								    // spawned from a channel crawl.

								    //

								    // return values: S_OK      successfully found and submitted a form -> restart webcrawl

								    //                S_FALSE   no username, no form, or unrecognized form ->continue webcrawl

								    //                E_FAIL    submit failed -> abort webcrawl

								    //

								    HRESULT hrReturn = S_FALSE;

								    HRESULT hr = S_OK;

								    BSTR bstrUsername = NULL;

								    BSTR bstrPassword = NULL;

								    BSTR bstrInputType= NULL;


								    static const WCHAR c_szInputTextType[]=L"text";


								    // If our host name doesn't match the root host name, don't return auth

								    // information.

								    if (m_bstrHostName)

								    {

								        LPWSTR pwszUrl, bstrHostName=NULL;


								        m_pCurDownload->GetRealURL(&pwszUrl);   // may re-enter Trident


								        if (pwszUrl)

								        {

								            GetHostName(pwszUrl, &bstrHostName);

								            LocalFree(pwszUrl);

								        }


								        if (bstrHostName)

								        {

								            if (MyAsciiCmpW(bstrHostName, m_bstrHostName))

								            {

								                hr = E_FAIL;

								            }

								            SysFreeString(bstrHostName);

								        }

								    }


								    if (SUCCEEDED(hr))

								        hr = ReadBSTR(m_pSubscriptionItem, c_szPropCrawlUsername, &bstrUsername);


								    if (SUCCEEDED(hr) && bstrUsername && bstrUsername[0])

								    {

								        // NOTE: We don't allow NULL passwords.

								        hr = ReadPassword(m_pSubscriptionItem, &bstrPassword);

								        if (SUCCEEDED(hr) && bstrPassword && bstrPassword[0])

								        {

								            IHTMLDocument2 *pDoc = NULL;

								            hr = m_pCurDownload->GetDocument(&pDoc);

								            if (SUCCEEDED(hr) && pDoc)

								            {

								                IHTMLElementCollection *pFormsCollection = NULL;

								                hr = pDoc->get_forms(&pFormsCollection);

								                if (SUCCEEDED(hr) && pFormsCollection)

								                {

								                    long length;

								                    hr = pFormsCollection->get_length(&length);

								                    TraceMsg(TF_THISMODULE, "**** FOUND USER NAME, PASSWORD, & %d FORMS ****", (int)length);

								                    if (SUCCEEDED(hr) && length > 0)

								                    {

								                        // We only check the first form for a user name and password.

								                        // Why do we pass an index to IHTMLElementCollection when

								                        // the interface prototype says it takes a name?

								                        IDispatch *pDispForm = NULL;

								                        VARIANT vIndex, vEmpty;

								                        VariantInit(&vIndex);

								                        VariantInit(&vEmpty);

								                        vIndex.vt = VT_I4;

								                        vIndex.lVal = 0;

								                        hr = pFormsCollection->item(vIndex, vEmpty, &pDispForm);

								                        if (SUCCEEDED(hr) && pDispForm)

								                        {

								                            IHTMLFormElement *pForm = NULL;

								                            hr = pDispForm->QueryInterface(IID_IHTMLFormElement, (void **)&pForm);

								                            if (SUCCEEDED(hr) && pForm)

								                            {

								                                // Enum form elements looking for the input types we care about.

								                                // Would it be faster to use tags()?

								                                hr = pForm->get_length(&length);

								                                if (SUCCEEDED(hr) && length >= 2)

								                                {

								                                    // TraceMsg(TF_THISMODULE, "**** FORM ELEMENTS (%d) ****", (int)length);

								                                    BOOL fUsernameSet = FALSE;

								                                    BOOL fPasswordSet = FALSE;

								                                    IDispatch *pDispItem = NULL;

								                                    long i;

								                                    for (i = 0; i < length; i++)

								                                    {

								                                        vIndex.lVal = i;    // re-use vIndex above

								                                        hr = pForm->item(vIndex, vEmpty, &pDispItem);

								                                        if (SUCCEEDED(hr) && pDispItem)

								                                        {

								                                            IHTMLInputTextElement *pInput = NULL;

								                                            // QI was the easiest way to tell them apart...

								                                            // InputText is derived from InputPassword

								                                            hr = pDispItem->QueryInterface(IID_IHTMLInputTextElement, (void **)&pInput);

								                                            SAFERELEASE(pDispItem);

								                                            if (SUCCEEDED(hr) && pInput)

								                                            {

								                                                hr = pInput->get_type(&bstrInputType);

								                                                ASSERT(SUCCEEDED(hr) && bstrInputType);

								                                                BSTR bstrName = NULL;

								                                                if (StrCmpIW(bstrInputType, c_szInputTextType) == 0)

								                                                {

								                                                    // We found an INPUT element with attribute TYPE="text".

								                                                    // Set it if the NAME attribute matches.

								                                                    // Only setting the first matching input.

								                                                    // Do we care about max length or does put_value handle it?

								                                                    // TraceMsg(TF_THISMODULE, "**** FORM ELEMENT INPUT (%d) ****", (int)i);

								                                                    if (!fUsernameSet)

								                                                    {

								                                                        hr = pInput->get_name(&bstrName);

								                                                        ASSERT(SUCCEEDED(hr) && bstrName);

								                                                        if (SUCCEEDED(hr) && bstrName && SUCCEEDED(MatchNames(bstrName, FALSE)))

								                                                        {

								                                                            hr = pInput->put_value(bstrUsername);

								                                                            if (SUCCEEDED(hr))

								                                                                fUsernameSet = TRUE;

								                                                        }

								                                                    }

								                                                }

								                                                else

								                                                {

								                                                    // We found an INPUT element with attribute TYPE="password"

								                                                    // Set it if the name attribute matches.

								                                                    // Only setting the first matching input.

								                                                    // Do we care about max length or does put_value handle it?

								                                                    // TraceMsg(TF_THISMODULE, "**** FORM ELEMENT PASSWORD (%d) ****", (int)i);

								                                                    if (!fPasswordSet)

								                                                    {

								                                                        hr = pInput->get_name(&bstrName);

								                                                        ASSERT(SUCCEEDED(hr) && bstrName);

								                                                        if (SUCCEEDED(hr) && bstrName  && SUCCEEDED(MatchNames(bstrName, TRUE)))

								                                                        {

								                                                            hr = pInput->put_value(bstrPassword);

								                                                            if (SUCCEEDED(hr))

								                                                                fPasswordSet = TRUE;

								                                                        }

								                                                    }

								                                                }

								                                                SAFEFREEBSTR(bstrName);

								                                                SAFERELEASE(pInput);

								                                            }

								                                        }

								                                    }

								                                    // Submit the form is everything was set.

								                                    if (fUsernameSet && fPasswordSet)

								                                    {

								                                        ASSERT(!m_pCurDownload->GetFormSubmitted());

								                                        m_pCurDownload->SetFormSubmitted(TRUE);

								                                        hr = pForm->submit();

								                                        if (SUCCEEDED(hr))

								                                        {

								                                            m_iNumPagesDownloading ++;

								                                            TraceMsg(TF_THISMODULE, "**** FORM SUBMIT WORKED ****");

								                                            hrReturn = S_OK;

								                                        }

								                                        else

								                                        {

								                                            TraceMsg(TF_THISMODULE, "**** FORM SUBMIT FAILED ****");

								                                            hrReturn = E_FAIL;

								                                        }

								                                    }

								                                }

								                                SAFERELEASE(pForm);

								                            }

								                            SAFERELEASE(pDispForm);

								                        }

								                        // only length

								                    }

								                    SAFERELEASE(pFormsCollection);

								                }

								                SAFERELEASE(pDoc);

								            }

								            // free bstr below because we check for empty bstrs

								        }

								        SAFEFREEBSTR(bstrPassword);

								    }

								    SAFEFREEBSTR(bstrUsername);

								    return hrReturn;

								}


								// Make page and dependencies sticky and get total size

								HRESULT CWebCrawler::MakePageStickyAndGetSize(LPCWSTR pwszURL, DWORD *pdwSize, BOOL *pfDiskFull)

								{

								    ASSERT(m_pDependencies || IsRecurseFlagSet(WEBCRAWL_DONT_MAKE_STICKY));


								    HRESULT hr;

								    TCHAR   szThisUrl[INTERNET_MAX_URL_LENGTH]; // use ansi internally

								    BYTE    chBuf[MY_MAX_CACHE_ENTRY_INFO];


								    LPINTERNET_CACHE_ENTRY_INFO lpInfo = (LPINTERNET_CACHE_ENTRY_INFO) chBuf;


								    DWORD   dwBufSize = sizeof(chBuf);


								    *pdwSize = 0;


								    // First we make our base url sticky and check it for changes


								    MyOleStrToStrN(szThisUrl, INTERNET_MAX_URL_LENGTH, pwszURL);


								    hr = GetUrlInfoAndMakeSticky(NULL, szThisUrl, lpInfo, dwBufSize, m_llCacheGroupID);


								    if (E_OUTOFMEMORY != hr)

								    {

								        if (SUCCEEDED(hr))

								            *pdwSize += lpInfo->dwSizeLow;


								        if (!IsAgentFlagSet(FLAG_CRAWLCHANGED) && SUCCEEDED(hr))

								        {

								            hr = PostCheckUrlForChange(&m_varChange, lpInfo, lpInfo->LastModifiedTime);

								            // If we FAILED, we mark it as changed.

								            if (hr == S_OK || FAILED(hr))

								            {

								                SetAgentFlag(FLAG_CRAWLCHANGED);

								                DBG("URL has changed; will flag webcrawl as changed");

								            }


								            // "Changes Only" mode, persist change detection code

								            if (IsAgentFlagSet(FLAG_CHANGESONLY))

								            {

								                ASSERT(m_iTotalStarted == 1);

								                WriteVariant(m_pSubscriptionItem, c_szPropChangeCode, &m_varChange);

								                return S_OK;    // We know there are no dependencies

								            }


								            hr = S_OK;

								        }

								    }

								    else

								    {

								        *pfDiskFull = TRUE;

								    }


								    // Now we make all the new dependencies we downloaded for this page sticky

								    if (!*pfDiskFull && m_pDependencies)

								    {

								        EnterCriticalSection(&m_critDependencies);


								        for (; m_iDependenciesProcessed < m_pDependencies->NumStrings(); m_iDependenciesProcessed ++)

								        {

								            MyOleStrToStrN(szThisUrl, INTERNET_MAX_URL_LENGTH, m_pDependencies->GetString(m_iDependenciesProcessed));


								            hr = GetUrlInfoAndMakeSticky(NULL, szThisUrl, lpInfo, dwBufSize, m_llCacheGroupID);


								            if (E_OUTOFMEMORY == hr)

								            {

								                *pfDiskFull = TRUE;

								                break;

								            }


								            if (SUCCEEDED(hr))

								                *pdwSize += lpInfo->dwSizeLow;

								        }


								        LeaveCriticalSection(&m_critDependencies);

								    }


								    if (*pfDiskFull)

								    {

								        DBG_WARN("Webcrawler: UrlCache full trying to make sticky");

								        return E_OUTOFMEMORY;

								    }


								    return S_OK;

								}


								// true if found token & made null-term

								LPSTR GetToken(LPSTR pszBuf, /*inout*/int *piBufPtr, /*out*/int *piLen)

								{

								static const CHAR szWhitespace[] = " \t\n\r";


								    int iPtr = *piBufPtr;

								    int iLen;


								    while (1)

								    {

								        // skip leading whitespace

								        iPtr += StrSpnA(pszBuf+iPtr, szWhitespace);


								        if (!pszBuf[iPtr])

								            return NULL;


								        if (pszBuf[iPtr] == '#')

								        {

								            // comment; skip line

								            while (pszBuf[iPtr] && pszBuf[iPtr]!='\r' && pszBuf[iPtr]!='\n') iPtr++;


								            if (!pszBuf[iPtr])

								                return NULL;


								            continue;

								        }


								        // skip to next whitespace

								        iLen = StrCSpnA(pszBuf+iPtr, szWhitespace);


								        if (iLen == 0)

								            return NULL;        // shoudln't happen


								        *piBufPtr = iLen + iPtr;


								        if (piLen)

								            *piLen = iLen;


								        if (pszBuf[iLen+iPtr])

								        {

								            pszBuf[iLen+iPtr] = NULL;

								            ++ *piBufPtr;

								        }


								        break;

								    }


								//  TraceMsgA(TF_THISMODULE, "GetToken returning \"%s\"", (LPSTR)(pszBuf+iPtr));

								    return pszBuf + iPtr;

								}


								// === Support functions for OnDownloadComplete


								// ParseRobotsTxt gets the stream from CUrlDownload, parses it, and fills in parsed

								//  info to *ppslRet

								HRESULT CWebCrawler::ParseRobotsTxt(LPCWSTR pwszRobotsTxtURL, CWCStringList **ppslRet)

								{

								    // Given a robots.txt file (from CUrlDownload), it

								    //  parses the file and fills in a string list with appropriate

								    //  info.

								    *ppslRet = FALSE;


								    CHAR    szRobotsTxt[MAX_ROBOTS_SIZE];

								    HRESULT hr=S_OK;

								    LPSTR   pszToken;

								    IStream *pstm=NULL;

								    DWORD_PTR dwData;


								    hr = m_pCurDownload->GetStream(&pstm);


								    if (SUCCEEDED(hr))

								    {

								        STATSTG st;

								        DWORD   dwSize;


								        DBG("CWebCrawler parsing robots.txt file");


								        pstm->Stat(&st, STATFLAG_NONAME);


								        dwSize = st.cbSize.LowPart;


								        if (st.cbSize.HighPart || dwSize >= MAX_ROBOTS_SIZE)

								        {

								            szRobotsTxt[0] = 0;

								            DBG("CWebCrawler: Robots.Txt too big; ignoring");

								            hr = E_FAIL;

								        }

								        else

								        {

								            hr = pstm->Read(szRobotsTxt, dwSize, NULL);

								            szRobotsTxt[dwSize] = 0;

								        }


								        pstm->Release();

								        pstm=NULL;


								        if ((szRobotsTxt[0] == 0xff) && (szRobotsTxt[1] == 0xfe))

								        {

								            DBG_WARN("Unicode robots.txt! Ignoring ...");

								            hr = E_FAIL;

								        }

								    }


								    if (FAILED(hr))

								        return hr;


								    int iPtr = 0;

								    WCHAR wchBuf2[256];

								    WCHAR wchBuf[INTERNET_MAX_URL_LENGTH];

								    DWORD dwBufSize;


								    // Find the first "user-agent" which matches

								    while ((pszToken = GetToken(szRobotsTxt, &iPtr, NULL)) != NULL)

								    {

								        if (lstrcmpiA(pszToken, c_szRobots_UserAgent))

								            continue;


								        pszToken = GetToken(szRobotsTxt, &iPtr, NULL);

								        if (!pszToken)

								            break;


								        if ((*pszToken == '*') ||

								            (!lstrcmpiA(pszToken, c_szRobots_OurUserAgent)))

								        {

								            TraceMsgA(TF_THISMODULE, "Using user agent segment: \"%s\"", pszToken);

								            break;

								        }

								    }


								    if (!pszToken)

								        return E_FAIL;


								    CWCStringList *psl = new CWCDwordStringList;

								    if (psl)

								    {

								        psl->Init(2048);


								        // Look for Allow: or Disallow: sections

								        while ((pszToken = GetToken(szRobotsTxt, &iPtr, NULL)) != NULL)

								        {

								            if (!lstrcmpiA(pszToken, c_szRobots_UserAgent))

								                break;  // end of our 'user-agent' section


								            dwData = 0;


								            if (!lstrcmpiA(pszToken, c_szRobots_Allow))     dwData = DATA_ALLOW;

								            if (!lstrcmpiA(pszToken, c_szRobots_Disallow))  dwData = DATA_DISALLOW;


								            if (!dwData)

								                continue;   // look for next token


								            pszToken = GetToken(szRobotsTxt, &iPtr, NULL);

								            if (!pszToken)

								                break;


								            // Ensure that they don't have blank entries; we'll abort if so

								            if (!lstrcmpiA(pszToken, c_szRobots_UserAgent) ||

								                !lstrcmpiA(pszToken, c_szRobots_Allow) ||

								                !lstrcmpiA(pszToken, c_szRobots_Disallow))

								            {

								                break;

								            }


								            // Combine this url with the base for this site.

								            dwBufSize = ARRAYSIZE(wchBuf);

								            if (SHAnsiToUnicode(pszToken, wchBuf2, ARRAYSIZE(wchBuf2)) &&

								                SUCCEEDED(UrlCombineW(pwszRobotsTxtURL, wchBuf2, wchBuf, &dwBufSize, 0)))

								            {

								                TraceMsgA(TF_THISMODULE, "Robots.txt will %s urls with %s (%ws)",

								                    ((dwData==DATA_ALLOW) ? c_szRobots_Allow : c_szRobots_Disallow),

								                    pszToken, wchBuf);


								                // if this is a duplicate url we effectively ignore this directive

								                //  thanks to CWCStringList removing duplicates for us


								                psl->AddString(wchBuf, dwData);

								            }

								        }

								    }


								    if (psl && (psl->NumStrings() > 0))

								    {

								        *ppslRet = psl;

								        return S_OK;

								    }


								    if (psl)

								        delete psl;


								    return E_FAIL;

								}


								HRESULT CWebCrawler::GetRealUrl(int iPageIndex, LPWSTR *ppwszThisUrl)

								{

								    m_pCurDownload->GetRealURL(ppwszThisUrl);


								    if (*ppwszThisUrl)

								    {

								        return S_OK;

								    }


								    DBG_WARN("m_pCurDownload->GetRealURL failed!!!");


								    // Get url from string list

								    LPCWSTR pwszUrl=NULL;


								    pwszUrl = m_pPages->GetString(iPageIndex);


								    if (pwszUrl)

								    {

								        *ppwszThisUrl = StrDupW(pwszUrl);

								    }


								    return (*ppwszThisUrl) ? S_OK : E_OUTOFMEMORY;

								}


								// Allocates BSTR for host name.

								HRESULT CWebCrawler::GetHostName(LPCWSTR pwszThisUrl, BSTR *pbstrHostName)

								{

								    if (pwszThisUrl)

								    {

								        URL_COMPONENTSA comp;

								        LPSTR           pszUrl;

								        int             iLen;


								//      InternetCrackUrlW(pszUrl, 0, 0, &comp)  // this is even slower than converting it ourselves...


								        // convert to ansi

								        iLen = lstrlenW(pwszThisUrl) + 1;

								        pszUrl = (LPSTR)MemAlloc(LMEM_FIXED, iLen);

								        if (pszUrl)

								        {

								            SHUnicodeToAnsi(pwszThisUrl, pszUrl, iLen);


								            // crack out the host name

								            ZeroMemory(&comp, sizeof(comp));

								            comp.dwStructSize = sizeof(comp);

								            comp.dwHostNameLength = 1;  // indicate that we want the host name


								            if (InternetCrackUrlA(pszUrl, 0, 0, &comp))

								            {

								                *pbstrHostName = SysAllocStringLen(NULL, comp.dwHostNameLength);

								                if (*pbstrHostName)

								                {

								                    comp.lpszHostName[comp.dwHostNameLength] = 0; // avoid debug rip

								                    SHAnsiToUnicode(comp.lpszHostName, *pbstrHostName, comp.dwHostNameLength + 1);

								                    ASSERT((*pbstrHostName)[comp.dwHostNameLength] == 0);

								                }

								            }


								            MemFree((HLOCAL)pszUrl);

								        }

								    }


								    return S_OK;

								}


								// Gets partly validated (CUrlDownload::IsValidUrl and hostname validation)

								//  string lists and leaves in m_pPendingLinks

								// Remaining validation is robots.txt if any

								HRESULT CWebCrawler::GetLinksFromPage()

								{

								    // Get links from this page that we want to follow.

								    CWCStringList *pslLinks=NULL, slMeta;


								    IHTMLDocument2  *pDoc;

								    BOOL            fFollowLinks = TRUE;

								    int             i;


								    slMeta.Init(2048);


								    m_pCurDownload->GetDocument(&pDoc);

								    if (pDoc)

								    {

								        // See if there is a META tag telling us not to follow

								        CHelperOM::GetCollection(pDoc, &slMeta, CHelperOM::CTYPE_META, NULL, 0);

								        for (i=0; i<slMeta.NumStrings(); i++)

								        {

								            if (!StrCmpNIW(slMeta.GetString(i), c_wszRobotsMetaName, c_iRobotsMetaNameLen))

								            {

								                LPCWSTR pwszContent = slMeta.GetString(i) + c_iRobotsMetaNameLen;

								                TraceMsg(TF_THISMODULE, "Found 'robots' meta tag; content=%ws", pwszContent);


								                while (pwszContent && *pwszContent)

								                {

								                    if (!StrCmpNIW(pwszContent, c_wszRobotsNoFollow, c_iRobotsNoFollow))

								                    {

								                        DBG("Not following links from this page.");

								                        fFollowLinks = FALSE;

								                        break;

								                    }

								                    pwszContent = StrChrW(pwszContent+1, L',');

								                    if (pwszContent && *pwszContent)

								                        pwszContent ++;

								                }

								                break;

								            }

								        }

								        if (fFollowLinks)

								        {

								            if (m_pPendingLinks)

								                pslLinks = m_pPendingLinks;

								            else

								            {

								                pslLinks = new CWCDwordStringList;

								                if (pslLinks)

								                    pslLinks->Init();

								                else

								                    return E_OUTOFMEMORY;

								            }


								            CHelperOM::GetCollection(pDoc, pslLinks, CHelperOM::CTYPE_LINKS, &CheckLink, (DWORD_PTR)this);

								            CHelperOM::GetCollection(pDoc, pslLinks, CHelperOM::CTYPE_MAPS, &CheckLink, (DWORD_PTR)this);

								        }

								        pDoc->Release();

								        pDoc=NULL;

								    }


								    m_pPendingLinks = pslLinks;


								    return S_OK;

								}


								// Gets 'dependency links' such as frames from a page

								HRESULT CWebCrawler::GetDependencyLinksFromPage(LPCWSTR pwszThisUrl, DWORD dwRecurse)

								{

								    CWCStringList *psl=NULL;

								    IHTMLDocument2 *pDoc;

								    int i, iAdd, iIndex, iOldMax;

								    DWORD_PTR dwData;


								    if (m_pDependencyLinks)

								        psl = m_pDependencyLinks;

								    else

								    {

								        m_iDependencyStarted = 0;

								        psl = new CWCStringList;

								        if (psl)

								            psl->Init(2048);

								        else

								            return E_OUTOFMEMORY;

								    }


								    iOldMax = psl->NumStrings();


								    m_pCurDownload->GetDocument(&pDoc);

								    if (pDoc)

								    {

								        // Add Frames ("Frame" and "IFrame" tags) if present

								        CHelperOM::GetCollection(pDoc, psl, CHelperOM::CTYPE_FRAMES, CheckFrame, (DWORD_PTR)pwszThisUrl);

								    }


								    SAFERELEASE(pDoc);


								    m_pDependencyLinks = psl;


								    // Add the new urls to the main page list

								    for (i = iOldMax; i<psl->NumStrings(); i++)

								    {

								        iAdd = m_pPages->AddString(m_pDependencyLinks->GetString(i),

								                        dwRecurse,

								                        &iIndex);


								        if (m_lMaxNumUrls > 0 && iAdd==CWCStringList::STRLST_ADDED)

								            m_lMaxNumUrls ++;


								        if (iAdd == CWCStringList::STRLST_FAIL)

								            return E_OUTOFMEMORY;


								        if (iAdd == CWCStringList::STRLST_DUPLICATE)

								        {

								            // bump up recursion level of old page if necessary

								            // See if we've downloaded this yet.

								            dwData = m_pPages->GetStringData(iIndex);

								            if (!(dwData & DATA_DLSTARTED))

								            {

								                // Haven't downloaded it yet.

								                // Update the recurse levels if necessary.

								                if ((dwData & DATA_RECURSEMASK) < dwRecurse)

								                {

								                    dwData = (dwData & ~DATA_RECURSEMASK) | dwRecurse;

								                }


								                // Turn off the "link" bit

								                dwData &= ~DATA_LINK;


								                m_pPages->SetStringData(iIndex, dwData);

								            }

								#ifdef DEBUG

								            // Shouldn't happen; this frame already dl'd with lower recurse level

								            else

								                ASSERT((dwData & DATA_RECURSEMASK) >= dwRecurse);

								#endif

								        }

								    }


								    return S_OK;

								}


								//-------------------------------------

								// OnDownloadComplete

								//

								// Called when a url is finished downloading, it processes the url

								//  and kicks off the next download

								//

								HRESULT CWebCrawler::OnDownloadComplete(UINT iID, int iError)

								{

								    int         iPageIndex = m_iCurDownloadStringIndex;

								    BOOL        fOperationComplete = FALSE;

								    BOOL        fDiskFull = FALSE;

								    BSTR        bstrCDFURL = NULL; //  CDF URL if there is one

								    LPWSTR      pwszThisUrl=NULL;


								    HRESULT     hr;


								    TraceMsg(TF_THISMODULE, "WebCrawler: OnDownloadComplete(%d)", iError);

								    ASSERT(m_pPages);

								    ASSERT(iPageIndex < m_pCurDownloadStringList->NumStrings());


								    if (_ERROR_REPROCESSING != iError)

								    {

								        m_iNumPagesDownloading --;

								        ASSERT(m_iNumPagesDownloading == 0);

								    }


								    if (m_pCurDownloadStringList == m_pRobotsTxt)

								    {

								        CWCStringList *pslNew=NULL;


								        // Process robots.txt file

								        if (SUCCEEDED(ParseRobotsTxt(m_pRobotsTxt->GetString(iPageIndex), &pslNew)))

								        {

								            m_pRobotsTxt->SetStringData(iPageIndex, (DWORD_PTR)(pslNew));

								        }

								    }

								    else

								    {

								        // Process normal file

								        ASSERT(m_pCurDownloadStringList == m_pPages);


								        DWORD dwData, dwRecurseLevelsFromThisPage;


								        dwData = (DWORD)m_pPages->GetStringData(iPageIndex);

								        dwRecurseLevelsFromThisPage = dwData & DATA_RECURSEMASK;


								        dwData |= DATA_DLFINISHED;

								        if (iError > 0)

								            dwData |= DATA_DLERROR;


								        // mark as downloaded

								        m_pCurDownloadStringList->SetStringData(iPageIndex, dwData);


								        // Is this the first page?

								        if (m_iTotalStarted == 1)

								        {

								            // Check the HTTP response code

								            DWORD dwResponseCode;


								            hr = m_pCurDownload->GetResponseCode(&dwResponseCode);


								            if (SUCCEEDED(hr))

								            {

								                hr = CheckResponseCode(dwResponseCode);

								                if (FAILED(hr))

								                    fOperationComplete = TRUE;

								            }

								            else

								                DBG("CWebCrawler failed to GetResponseCode");


								            // Get the Charset

								            BSTR bstrCharSet=NULL;

								            IHTMLDocument2 *pDoc=NULL;


								            // -> Bharats --------

								            // Find a link tag and store it away the cdf by copying it (if it points to a cdf.)

								            // do url combine of this cdf

								            if (SUCCEEDED(m_pCurDownload->GetDocument(&pDoc)) && pDoc &&

								                SUCCEEDED(pDoc->get_charset(&bstrCharSet)) && bstrCharSet)

								            {

								                WriteOLESTR(m_pSubscriptionItem, c_szPropCharSet, bstrCharSet);

								                TraceMsg(TF_THISMODULE, "Charset = \"%ws\"", bstrCharSet);

								                SysFreeString(bstrCharSet);

								            }

								            else

								                WriteEMPTY(m_pSubscriptionItem, c_szPropCharSet);


								            if(pDoc)

								            {

								                if(FAILED(GetChannelItem(NULL)))   // A Doc exists and this download is not from a channel itself

								                {

								                    IHTMLLinkElement *pLink = NULL;

								                    hr = SearchForElementInHead(pDoc, OLESTR("REL"), OLESTR("OFFLINE"),

								                                            IID_IHTMLLinkElement, (IUnknown **)&pLink);

								                    if(S_OK == hr)

								                    {

								                        hr = pLink->get_href(&bstrCDFURL);

								                        pLink->Release();

								                    }

								                }

								                pDoc->Release();

								                pDoc = NULL;

								            }

								        }


								        if ((iError != _ERROR_REPROCESSING) && (iError != BDU2_ERROR_NONE))

								        {

								            if (iError != BDU2_ERROR_NOT_HTML)

								                m_iDownloadErrors ++;


								            if (iError == BDU2_ERROR_MAXSIZE)

								            {

								                SetEndStatus(INET_E_AGENT_MAX_SIZE_EXCEEDED);

								                fOperationComplete = TRUE;

								            }

								        }

								        else

								        {

								            // Don't process this url if we already have set fOperationComplete

								            if (!fOperationComplete)

								            {

								                // Did we get *just* the HEAD info?

								                if (IsAgentFlagSet(FLAG_HEADONLY))

								                {

								                    SYSTEMTIME stLastModified;

								                    FILETIME   ftLastModified;


								                    if (SUCCEEDED(m_pCurDownload->GetLastModified(&stLastModified)) &&

								                                  SystemTimeToFileTime(&stLastModified, &ftLastModified))

								                    {

								                        DBG("Retrieved 'HEAD' info; change detection based on Last Modified");


								                        hr = PostCheckUrlForChange(&m_varChange, NULL, ftLastModified);

								                        // If we FAILED, we mark it as changed.

								                        if (hr == S_OK || FAILED(hr))

								                        {

								                            SetAgentFlag(FLAG_CRAWLCHANGED);

								                            DBG("URL has changed; will flag webcrawl as changed");

								                        }


								                        // "Changes Only" mode, persist change detection code

								                        ASSERT(IsAgentFlagSet(FLAG_CHANGESONLY));

								                        ASSERT(m_iTotalStarted == 1);

								                        WriteVariant(m_pSubscriptionItem, c_szPropChangeCode, &m_varChange);

								                    }

								                }

								                else

								                {

								                    // Get real URL in case we were redirected

								                    if (FAILED(GetRealUrl(iPageIndex, &pwszThisUrl)))

								                    {

								                        fOperationComplete = TRUE;        // bad

								                    }

								                    else

								                    {

								                        ASSERT(pwszThisUrl);


								                        // Get host name from first page if necessary

								                        if ((iPageIndex==0) &&

								                            (m_dwRecurseLevels>0) &&

								                            !IsRecurseFlagSet(WEBCRAWL_LINKS_ELSEWHERE) &&

								                            !m_bstrHostName)

								                        {

								                            GetHostName(pwszThisUrl, &m_bstrHostName);

								#ifdef DEBUG

								                            if (m_bstrHostName)

								                                TraceMsg(TF_THISMODULE, "Just got first host name: %ws", m_bstrHostName);

								                            else

								                                DBG_WARN("Get first host name failed!!!");

								#endif

								                        }


								                        DWORD dwCurSize = 0, dwRepeat = 0;


								                        HRESULT hr1;


								                        do

								                        {

								                            hr1 = S_OK;


								                            // Make page and dependencies sticky and get their total size

								                            fDiskFull = FALSE;

								                            MakePageStickyAndGetSize(pwszThisUrl, &dwCurSize, &fDiskFull);


								                            if (fDiskFull && (dwRepeat < 2))

								                            {

								                                // If we couldn't make stuff sticky, ask host to make cache bigger

								                                hr1 = m_pAgentEvents->ReportError(&m_SubscriptionCookie,

								                                            INET_E_AGENT_EXCEEDING_CACHE_SIZE, NULL);


								                                if (hr1 == E_PENDING)

								                                {

								                                    // Host is going to ask the user to increase the cache size.

								                                    // Host should either abort or resume us later.

								                                    SetAgentFlag(FLAG_WAITING_FOR_INCREASED_CACHE);

								                                    goto done;

								                                }

								                                else if (hr1 == INET_S_AGENT_INCREASED_CACHE_SIZE)

								                                {

								                                    // Host just increased the cache size. Try it again.

								                                }

								                                else

								                                {

								                                    // Not gonna do it. Abort.

								                                }

								                            }

								                        }

								                        while ((hr1 == INET_S_AGENT_INCREASED_CACHE_SIZE) && (++dwRepeat <= 2));


								                        m_dwCurSize += dwCurSize;


								                        // Is there form based authentication that we need to handle

								                        // on the top page of this subscription?

								                        if (!fDiskFull && (0 == iPageIndex) && !m_pCurDownload->GetFormSubmitted())

								                        {

								                            hr = FindAndSubmitForm();

								                            if (S_OK == hr)

								                            {

								                                // Successfully submitted form.  Bail and wait for the next OnDownloadComplete() call.

								                                // FEATURE: Should we make the form URL and dependencies sticky?

								                                return S_OK;

								                            }

								                            else if (FAILED(hr))

								                            {

								                                // We failed trying to submit the form.  Bail.

								                                // FEATURE: Should we set a better error string?

								                                SetEndStatus(E_FAIL);

								                                CleanUp();

								                                return S_OK;

								                            }

								                            // else no form - fall through

								                        }


								                        TraceMsg(TF_THISMODULE, "WebCrawler up to %d kb", (int)(m_dwCurSize>>10));


								                        if ((m_lMaxNumUrls < 0) &&

								                            !dwRecurseLevelsFromThisPage &&

								                            !(dwData & DATA_CODEBASE))

								                        {

								                            m_lMaxNumUrls = m_pPages->NumStrings() + ((m_pRobotsTxt) ? m_pRobotsTxt->NumStrings() : 0);

								                        }

								                    }  // SUCCEEDED(GetRealUrl)

								                }  // !FLAG_HEADONLY

								            } // !fOperationComplete


								            // If we're in "Changes Only" mode, we're done.

								            if (IsAgentFlagSet(FLAG_CHANGESONLY))

								                fOperationComplete = TRUE;


								            // Check to see if we're past our max size

								            if (!fOperationComplete && fDiskFull || (m_dwMaxSize && (m_dwCurSize >= (m_dwMaxSize<<10))))

								            {

								        #ifdef DEBUG

								                if (fDiskFull)

								                    DBG_WARN("Disk/cache full; aborting.");

								                else

								                    TraceMsg(TF_WARNING, "Past maximum size; aborting. (%d kb of %d kb)", (int)(m_dwCurSize>>10), (int)m_dwMaxSize);

								        #endif

								                // abort operation

								                fOperationComplete = TRUE;


								                if (fDiskFull)

								                {

								                    SetEndStatus(INET_E_AGENT_CACHE_SIZE_EXCEEDED);

								                }

								                else

								                {

								                    SetEndStatus(INET_E_AGENT_MAX_SIZE_EXCEEDED);

								                }

								            }


								            if (!fOperationComplete)

								            {

								                // Get any links from page

								                // Get "dependency links" from page - frames, etc.


								                // we do this even if a CDF file is specified

								                // Essentially, since the user has no clue about the CDF

								                // file - we do not want to confuse the user

								                GetDependencyLinksFromPage(pwszThisUrl, dwRecurseLevelsFromThisPage);


								                if (dwRecurseLevelsFromThisPage)

								                {

								                    // Get links from this page that we want to follow.

								                    GetLinksFromPage();


								                    if (m_pPendingLinks)

								                        TraceMsg(TF_THISMODULE,

								                            "Total of %d unique valid links found", m_pPendingLinks->NumStrings());


								                    m_dwPendingRecurseLevel = dwRecurseLevelsFromThisPage - 1;

								                }


								            }

								        }   // !iError

								    } // !robots.txt


								    if(!fOperationComplete)

								        StartCDFDownload(bstrCDFURL, pwszThisUrl);


								    if(!m_fCDFDownloadInProgress)

								    {

								        // Don't try code downloads or any of the rest until you're done with

								        // the cdf download

								        // See if we have any more URLs to download.

								        if (!fOperationComplete && FAILED(StartNextDownload()))

								            fOperationComplete = TRUE;  // No, we're done!

								    }


								    CheckOperationComplete(fOperationComplete);


								done:

								    if (pwszThisUrl)

								        MemFree(pwszThisUrl);


								    SAFEFREEBSTR(bstrCDFURL);


								    return S_OK;

								}


								HRESULT CWebCrawler::StartCDFDownload(WCHAR *pwszCDFURL, WCHAR *pwszBaseUrl)

								{

								    HRESULT hr = E_FAIL;

								    m_fCDFDownloadInProgress = FALSE;

								    if(pwszCDFURL)

								    {

								        // We have a CDF File - begin download of it


								        if (m_pRunAgent)

								        {

								            ASSERT(0);

								            DBG_WARN("WebCrawler: Attempting to download next CDF when nother CDF exists.");

								            hr = E_FAIL;

								            goto Exit;

								        }

								        else

								        {

								             // create subscription item for CDL agent.


								            ISubscriptionItem *pItem = NULL;


								            if (m_dwMaxSize && ((m_dwCurSize>>10) >= m_dwMaxSize))

								            {

								                // We've exceeded our maximum download KB limit and can't continue.

								                DBG_WARN("WebCrawler: Exceeded Maximum KB download limit with CodeBase download.");

								                SetEndStatus(hr = INET_E_AGENT_MAX_SIZE_EXCEEDED);

								                goto Exit;

								            }


								            if (!m_pSubscriptionItem ||

								                FAILED(hr = DoCloneSubscriptionItem(m_pSubscriptionItem, NULL, &pItem)))

								            {

								                goto Exit;

								            }

								            ASSERT(pItem != NULL);

								            ASSERT(pwszCDFURL != NULL);

								            WCHAR   wszCombined[INTERNET_MAX_URL_LENGTH];

								            DWORD dwBufSize = ARRAYSIZE(wszCombined);


								            if (SUCCEEDED(UrlCombineW(pwszBaseUrl, pwszCDFURL, wszCombined, &dwBufSize, 0)))

								            {


								                WriteOLESTR(pItem, c_szPropURL, wszCombined);


								                WriteEMPTY(pItem, c_szPropCrawlGroupID); // clear the old cache group id - don't want

								                                                         // children to know of it

								                // The crawler already has a cache group id that we simply use as the new ID

								                WriteLONGLONG(pItem, c_szPropCrawlNewGroupID, m_llCacheGroupID);

								                WriteDWORD(pItem, c_szPropChannelFlags, CHANNEL_AGENT_PRECACHE_ALL);

								                // Finally - since we know that this is for offline use, we just set the flags to precache all


								                m_pRunAgent = new CRunDeliveryAgent();

								                if (m_pRunAgent)

								                    hr = m_pRunAgent->Init((CRunDeliveryAgentSink *)this, pItem, CLSID_ChannelAgent);

								                pItem->Release();


								                if (m_pRunAgent && SUCCEEDED(hr))

								                {

								                    hr = m_pRunAgent->StartAgent();

								                    if (hr == E_PENDING)

								                    {

								                        hr = S_OK;

								                        m_fCDFDownloadInProgress = TRUE;

								                    }

								                }

								                else

								                {

								                    hr = E_OUTOFMEMORY;

								                }

								            }

								        }

								    }

								Exit:

								    if((S_OK != hr) && m_pRunAgent)

								    {

								        CRunDeliveryAgent::SafeRelease(m_pRunAgent);

								    }

								    return hr;


								}


								// CRunDeliveryAgentSink call back method to signal the end of a codebase download.


								HRESULT CWebCrawler::OnAgentEnd(const SUBSCRIPTIONCOOKIE *pSubscriptionCookie,

								                               long lSizeDownloaded, HRESULT hrResult, LPCWSTR wszResult,

								                               BOOL fSynchronous)

								{

								    ASSERT(m_pRunAgent != NULL);

								    BOOL        fOperationComplete = FALSE;

								    CRunDeliveryAgent::SafeRelease(m_pRunAgent);


								    if(m_fCDFDownloadInProgress)

								    {

								        m_fCDFDownloadInProgress = FALSE;

								    }

								    else

								    {

								        int         iPageIndex = m_iCurDownloadStringIndex;

								        BOOL        fDiskFull = FALSE;

								        CCodeBaseHold *pcbh = NULL;

								        BOOL        fError;

								        LPCWSTR     pwszThisURL=NULL;


								        TraceMsg(TF_THISMODULE, "WebCrawler: OnAgentEnd of CRunDeliveryAgentSink");

								        ASSERT(m_pCodeBaseList);

								        ASSERT(iPageIndex < m_pCurDownloadStringList->NumStrings());

								        ASSERT(m_pCurDownloadStringList == m_pCodeBaseList);


								        m_iNumPagesDownloading --;

								        ASSERT(m_iNumPagesDownloading == 0);


								        pcbh = (CCodeBaseHold *)m_pCodeBaseList->GetStringData(iPageIndex);

								        pwszThisURL = m_pCodeBaseList->GetString(iPageIndex);

								        ASSERT(pwszThisURL);


								        pcbh->dwFlags |= DATA_DLFINISHED;


								        fError = FAILED(hrResult);

								        if (fSynchronous)

								        {

								            fError = TRUE;

								            ASSERT(FAILED(hrResult));       // we can't succeed synchronously...

								        }


								        //NOTE: The CDL agent will abort if it finds the file exceeds the MaxSizeKB.  In this case the file is not

								        //      counted and there may be other smaller CAB's that can be downloaded, so we continue to proceed.


								        if (fError)

								        {

								            pcbh->dwFlags |= DATA_DLERROR;

								            m_iDownloadErrors ++;

								            SetEndStatus(hrResult);

								        }

								        else

								        {

								            BYTE chBuf[MY_MAX_CACHE_ENTRY_INFO];

								            LPINTERNET_CACHE_ENTRY_INFO lpInfo = (LPINTERNET_CACHE_ENTRY_INFO) chBuf;

								            TCHAR   szUrl[INTERNET_MAX_URL_LENGTH];


								            MyOleStrToStrN(szUrl, INTERNET_MAX_URL_LENGTH, pwszThisURL);


								            if (FAILED(GetUrlInfoAndMakeSticky(NULL, szUrl,

								                                 lpInfo, sizeof(chBuf), m_llCacheGroupID)))

								            {

								                //REVIEW: Do something here?  Unlikely to occur in practice.

								                fOperationComplete = TRUE;

								                ASSERT(0);

								            }

								            else

								            {

								                m_dwCurSize += lpInfo->dwSizeLow;

								            }


								            TraceMsg(TF_THISMODULE, "WebCrawler up to %d kb", (int)(m_dwCurSize>>10));


								            if (m_dwMaxSize && ((m_dwCurSize>>10)>m_dwMaxSize))

								            {


								                // abort operation

								                fOperationComplete = TRUE;

								                if (fDiskFull)

								                    SetEndStatus(INET_E_AGENT_CACHE_SIZE_EXCEEDED);

								                else

								                    SetEndStatus(INET_E_AGENT_MAX_SIZE_EXCEEDED);

								            }


								        } // !fError

								    }

								    // See if we have any more URLs to download.

								    if (!fOperationComplete && FAILED(StartNextDownload()))

								        fOperationComplete = TRUE;  // No, we're done!


								    if(!fSynchronous)

								        CheckOperationComplete(fOperationComplete);


								    return S_OK;

								}


								//////////////////////////////////////////////////////////////////////////

								//

								// CheckCompleteOperation :: If parameter is TRUE, then all downloads are

								//                           complete, the appropriate STATUS_CODE is set

								//                           and clean up initiated.

								//

								//////////////////////////////////////////////////////////////////////////

								void CWebCrawler::CheckOperationComplete(BOOL fOperationComplete)

								{

								    if (fOperationComplete)

								    {

								        DBG("WebCrawler complete. Shutting down.");

								        if (INET_S_AGENT_BASIC_SUCCESS == GetEndStatus())

								        {

								            // Set end status appropriately

								            if (m_iDownloadErrors)

								            {

								                if (m_iPagesStarted<=1)

								                {

								                    DBG("Webcrawl failed - first URL failed.");

								                    SetEndStatus(E_INVALIDARG);

								                }

								                else

								                {

								                    DBG("Webcrawl succeeded - some URLs failed.");

								                    SetEndStatus(INET_S_AGENT_PART_FAIL);

								                }

								            }

								            else

								            {

								                DBG("Webcrawl succeeded");

								                if (!IsAgentFlagSet(FLAG_CRAWLCHANGED))

								                {

								                    SetEndStatus(S_FALSE);

								                    DBG("No changes were detected");

								                }

								                else

								                {

								                    DBG("Webcrawl succeeded");

								                    SetEndStatus(S_OK);

								                }

								            }

								        }


								        if (m_llOldCacheGroupID)

								        {

								            DBG("Nuking old cache group.");

								            if (!DeleteUrlCacheGroup(m_llOldCacheGroupID, 0, 0))

								            {

								                DBG_WARN("Failed to delete old cache group!");

								            }

								        }


								        WriteLONGLONG(m_pSubscriptionItem, c_szPropCrawlGroupID, m_llCacheGroupID);


								        m_lSizeDownloadedKB = ((m_dwCurSize+511)>>10);


								        WriteDWORD(m_pSubscriptionItem, c_szPropCrawlActualSize, m_lSizeDownloadedKB);


								        if (m_lMaxNumUrls >= 0)

								        {

								            WriteDWORD(m_pSubscriptionItem, c_szPropActualProgressMax, m_lMaxNumUrls);

								        }


								        // Send a robots.txt warning to the user if we ended up not downloading stuff

								        //  because of the server's robots.txt file

								        if (m_iSkippedByRobotsTxt != 0)

								        {

								            HRESULT hr = S_OK;      // Make it an "information" message

								            WCHAR wszMessage[200];


								            if (m_iPagesStarted==1)

								            {

								                hr = INET_E_AGENT_WARNING;  // Unless we're missing almost everything

								            }


								            if (MLLoadStringW(IDS_CRAWL_ROBOTS_TXT_WARNING, wszMessage, ARRAYSIZE(wszMessage)))

								            {

								                m_pAgentEvents->ReportError(&m_SubscriptionCookie, hr, wszMessage);

								            }

								        }


								        // Will call "UpdateEnd"

								        CleanUp();

								    }

								}


								HRESULT CWebCrawler::ModifyUpdateEnd(ISubscriptionItem *pEndItem, UINT *puiRes)

								{

								    // Customize our end status string

								    switch (GetEndStatus())

								    {

								        case INET_E_AGENT_MAX_SIZE_EXCEEDED :

								                              *puiRes = IDS_AGNT_STATUS_SIZELIMIT; break;

								        case INET_E_AGENT_CACHE_SIZE_EXCEEDED :

								                              *puiRes = IDS_AGNT_STATUS_CACHELIMIT; break;

								        case E_FAIL         : *puiRes = IDS_CRAWL_STATUS_NOT_OK; break;

								        case S_OK           :

								            if (!IsAgentFlagSet(FLAG_CHANGESONLY))

								                *puiRes = IDS_CRAWL_STATUS_OK;

								            else

								                *puiRes = IDS_URL_STATUS_OK;

								            break;

								        case S_FALSE        :

								            if (!IsAgentFlagSet(FLAG_CHANGESONLY))

								                *puiRes = IDS_CRAWL_STATUS_UNCHANGED;

								            else

								                *puiRes = IDS_URL_STATUS_UNCHANGED;

								            break;

								        case INET_S_AGENT_PART_FAIL : *puiRes = IDS_CRAWL_STATUS_MOSTLYOK; break;

								    }


								    return CDeliveryAgent::ModifyUpdateEnd(pEndItem, puiRes);

								}


								HRESULT CWebCrawler::DownloadStart(LPCWSTR pchUrl, DWORD dwDownloadId, DWORD dwType, DWORD dwReserved)

								{

								    HRESULT hr = S_OK, hr2;


								    // free threaded

								    EnterCriticalSection(&m_critDependencies);


								    if (NULL == pchUrl)

								    {

								        DBG_WARN("CWebCrawler::DownloadStart pchUrl=NULL");

								    }

								    else

								    {

								        // Check to see if this is already in our dependencies list and abort if so

								        if (CWCStringList::STRLST_ADDED != m_pDependencies->AddString(pchUrl, 0))

								        {

								            hr = E_ABORT;       // Don't download this thing.

								            TraceMsg(TF_THISMODULE, "Aborting mshtml url (already added): %ws", pchUrl);

								        }


								        if (SUCCEEDED(hr))

								        {

								            // Check to see if this fails the robots.txt and abort if so

								            // Note, this will only work if we happen to have already gotten this robots.txt

								            // Need to abort here if we haven't gotten it, then get it, then get just this dep. Yuck.

								            // Also shouldn't do the check if this is the first page downloaded

								            DWORD dwIndex;

								            hr2 = GetRobotsTxtIndex(pchUrl, FALSE, &dwIndex);

								            if (SUCCEEDED(hr2))

								            {

								                BOOL fAllow;

								                if (SUCCEEDED(ValidateWithRobotsTxt(pchUrl, dwIndex, &fAllow)))

								                {

								                    if (!fAllow)

								                        hr = E_ABORT;   // ooh, failed the test.

								                }

								            }

								        }

								    }


								    LeaveCriticalSection(&m_critDependencies);


								    return hr;

								}


								HRESULT CWebCrawler::DownloadComplete(DWORD dwDownloadId, HRESULT hrNotify, DWORD dwReserved)

								{

								    // free threaded

								    // Do nothing. We may wish to post message to make sticky here. We may wish to

								    //  mark as downloaded in string list here.

								//  EnterCriticalSection(&m_critDependencies);

								//  LeaveCriticalSection(&m_critDependencies);

								    return S_OK;

								}


								/* 41927 (IE5 4491)

								HRESULT CWebCrawler::OnGetReferer(LPCWSTR *ppwszReferer)

								{

								    if (m_iPagesStarted <= 1)

								    {

								        *ppwszReferer = NULL;

								        return S_FALSE;

								    }


								    if (m_pCurDownloadStringList == m_pRobotsTxt)

								    {

								        // Referer is last page from main list to be downloaded

								        *ppwszReferer = m_pPages->GetString(m_iPagesStarted-1);

								        return S_OK;

								    }


								    if (m_pCurDownloadStringList == m_pPages)

								    {

								        // Referer is stored in string list data

								        *ppwszReferer = m_pPages->GetString(

								            ((m_pPages->GetStringData(m_iCurDownloadStringIndex) & DATA_REFERERMASK) >> DATA_REFERERSHIFT));

								        return S_OK;

								    }


								    // We don't return a referer for code bases

								    ASSERT(m_pCurDownloadStringList == m_pCodeBaseList);


								    return S_FALSE;

								}

								*/


								HRESULT CWebCrawler::OnAuthenticate(HWND *phwnd, LPWSTR *ppszUsername, LPWSTR *ppszPassword)

								{

								    HRESULT hr, hrRet=E_FAIL;

								    ASSERT(phwnd && ppszUsername && ppszPassword);

								    ASSERT((HWND)-1 == *phwnd && NULL == *ppszUsername && NULL == *ppszPassword);


								    // If our host name doesn't match the root host name, don't return auth

								    // information.


								    LPWSTR pwszUrl, bstrHostName=NULL;


								    m_pCurDownload->GetRealURL(&pwszUrl);   // may re-enter Trident


								    if (pwszUrl)

								    {

								        GetHostName(pwszUrl, &bstrHostName);

								        LocalFree(pwszUrl);

								    }


								    if (bstrHostName)

								    {

								        if (!m_bstrHostName || !MyAsciiCmpW(bstrHostName, m_bstrHostName))

								        {

								            // Host names match. Return auth information.

								            // If we're hosted by channel agent, use its auth information

								            ISubscriptionItem *pChannel=NULL;

								            ISubscriptionItem *pItem=m_pSubscriptionItem;


								            if (SUCCEEDED(GetChannelItem(&pChannel)))

								            {

								                pItem = pChannel;

								            }


								            hr = ReadOLESTR(pItem, c_szPropCrawlUsername, ppszUsername);

								            if (SUCCEEDED(hr))

								            {

								                BSTR bstrPassword = NULL;

								                hr = ReadPassword(pItem, &bstrPassword);

								                if (SUCCEEDED(hr))

								                {

								                    int len = (lstrlenW(bstrPassword) + 1) * sizeof(WCHAR);

								                    *ppszPassword = (LPWSTR) CoTaskMemAlloc(len);

								                    if (*ppszPassword)

								                    {

								                        CopyMemory(*ppszPassword, bstrPassword, len);

								                    }

								                    SAFEFREEBSTR(bstrPassword);

								                    if (*ppszPassword)

								                    {

								                        hrRet = S_OK;

								                    }

								                }

								            }


								            if (FAILED(hrRet))

								            {

								                SAFEFREEOLESTR(*ppszUsername);

								                SAFEFREEOLESTR(*ppszPassword);

								            }


								            SAFERELEASE(pChannel);

								        }


								        SysFreeString(bstrHostName);

								    }

								    return hrRet;

								}


								HRESULT CWebCrawler::OnClientPull(UINT iID, LPCWSTR pwszOldURL, LPCWSTR pwszNewURL)

								{

								    // CUrlDownload is informing us it's about to do a client pull.


								    // Let's send out a progress report for the new url

								    SendUpdateProgress(pwszNewURL, m_iTotalStarted, m_lMaxNumUrls);


								    // Now we need to process the current url: make it and dependencies sticky

								    DWORD dwCurSize=0;

								    BOOL fDiskFull=FALSE;

								    MakePageStickyAndGetSize(pwszOldURL, &dwCurSize, &fDiskFull);

								    m_dwCurSize += dwCurSize;

								    TraceMsg(TF_THISMODULE, "WebCrawler processed page prior to client pull - now up to %d kb", (int)(m_dwCurSize>>10));


								    // Tell CUrlDownload to go ahead and download the new url

								    return S_OK;

								}


								HRESULT CWebCrawler::OnOleCommandTargetExec(const GUID *pguidCmdGroup, DWORD nCmdID,

								                                DWORD nCmdexecopt, VARIANTARG *pvarargIn,

								                                VARIANTARG *pvarargOut)

								{

								    HRESULT hr = OLECMDERR_E_NOTSUPPORTED;

								    IPropertyBag2 *pPropBag = NULL;

								    int i;


								    //REVIEW: CLSID for this not yet defined.

								    if (    pguidCmdGroup

								        && (*pguidCmdGroup == CGID_JavaParambagCompatHack)

								        && (nCmdID == 0)

								        && (nCmdexecopt == MSOCMDEXECOPT_DONTPROMPTUSER))

								    {

								        if (!IsRecurseFlagSet(WEBCRAWL_GET_CONTROLS))

								        {

								            goto Exit;

								        }


								        uCLSSPEC ucs;

								        QUERYCONTEXT qc = { 0 };


								        ucs.tyspec = TYSPEC_CLSID;

								        ucs.tagged_union.clsid = CLSID_JavaVM;


								        // Check to see if Java VM is installed. Don't try to get applets if not.

								        if (!SUCCEEDED(FaultInIEFeature(NULL, &ucs, &qc, FIEF_FLAG_PEEK)))

								        {

								            goto Exit;

								        }


								        ULONG enIndex;

								        const DWORD enMax = 7, enMin = 0;

								        PROPBAG2 pb[enMax];

								        VARIANT vaProps[enMax];

								        HRESULT hrResult[enMax];

								        enum { enCodeBase = 0, enCabBase, enCabinets, enArchive, enUsesLib, enLibrary, enUsesVer };

								        LPWSTR pwszThisURL = NULL;

								        int chLen;


								        //REVIEW: This will need to be reviewed later when matching trident code is available

								        //        and details worked out.


								        if ((pvarargIn->vt != VT_UNKNOWN) ||

								            (FAILED(pvarargIn->punkVal->QueryInterface(IID_IPropertyBag2, (void **)&pPropBag))))

								        {

								             goto Exit;

								        }


								        if (FAILED(GetRealUrl(m_iCurDownloadStringIndex, &pwszThisURL)))

								        {

								            pwszThisURL = StrDupW(L"");

								        }


								        // PROPBAG2 structure for data retrieval

								        for (i=enMin; i<enMax; i++)

								        {

								            pb[i].dwType = PROPBAG2_TYPE_DATA;

								            pb[i].vt = VT_BSTR;

								            pb[i].cfType = NULL;                   // CLIPFORMAT

								            pb[i].dwHint = 0;                      // ????

								            pb[i].pstrName = NULL;

								            pb[i].clsid = CLSID_NULL;              // ????

								            vaProps[i].vt = VT_EMPTY;

								            vaProps[i].bstrVal = NULL;

								            hrResult[i] = E_FAIL;

								        }


								        if (((pb[enCodeBase].pstrName = SysAllocString(L"CODEBASE")) != NULL) &&

								            ((pb[enCabBase].pstrName = SysAllocString(L"CABBASE")) != NULL) &&

								            ((pb[enCabinets].pstrName = SysAllocString(L"CABINETS")) != NULL) &&

								            ((pb[enArchive].pstrName = SysAllocString(L"ARCHIVE")) != NULL) &&

								            ((pb[enUsesLib].pstrName = SysAllocString(L"USESLIBRARY")) != NULL) &&

								            ((pb[enLibrary].pstrName = SysAllocString(L"USESLIBRARYCODEBASE")) != NULL) &&

								            ((pb[enUsesVer].pstrName = SysAllocString(L"USESLIBRARYVERSION")) != NULL))

								        {


								            //Read returns E_FAIL even if it read some of the properties.

								            //Since we check hrResult's below this isn't a big deal.


								            hr = pPropBag->Read(enMax, &pb[0], NULL, &vaProps[0], &hrResult[0]);


								            {

								                BSTR bstrCodeBase = NULL;


								                // check for CODEBASE

								                if (SUCCEEDED(hrResult[enCodeBase]) && (vaProps[enCodeBase].vt == VT_BSTR))

								                {

								                    bstrCodeBase = vaProps[enCodeBase].bstrVal;

								                }


								                // add a trailing slash if not already present

								                chLen = lstrlenW(bstrCodeBase);

								                if (chLen && bstrCodeBase[chLen-1] != '/')

								                {

								                    LPWSTR szNewCodeBase = 0;

								                    int nLen = chLen + 2;

								                    szNewCodeBase = (LPWSTR) LocalAlloc(0,sizeof(WCHAR)*nLen);

								                    if (szNewCodeBase)

								                    {

								                        StrCpyNW(szNewCodeBase, bstrCodeBase, nLen);

								                        StrCatBuffW(szNewCodeBase, L"/", nLen);

								                        SAFEFREEBSTR(bstrCodeBase);

								                        bstrCodeBase = vaProps[enCodeBase].bstrVal = SysAllocString(szNewCodeBase);

								                        LocalFree(szNewCodeBase);

								                    }

								                }


								                // check for CABBASE

								                if (SUCCEEDED(hrResult[enCabBase]) && (vaProps[enCabBase].vt == VT_BSTR))

								                {

								                    BSTR szCabBase = vaProps[enCabBase].bstrVal;


								                    // Add CABBASE URL to list of CABs to pull.

								                    if (SUCCEEDED(CombineBaseAndRelativeURLs(pwszThisURL, bstrCodeBase, &szCabBase)))

								                    {

								                        m_pPages->AddString(szCabBase, 0);

								                    }

								                }


								                // check for CABINETS

								                for (enIndex = enCabinets; enIndex<(enArchive+1); enIndex++)

								                {

								                    if (SUCCEEDED(hrResult[enIndex]) && (vaProps[enIndex].vt == VT_BSTR))

								                    {

								                        BSTR szCur = vaProps[enIndex].bstrVal, szPrev = NULL;

								                        while (szCur)

								                        {

								                            WCHAR wcCur = *szCur;


								                            if ((wcCur == L'+') || (wcCur == L',') || (wcCur == L'\0'))

								                            {

								                                BSTR szLast = szPrev, szCabBase = NULL;

								                                BOOL bLastFile = FALSE;

								                                if (!szPrev)

								                                {

								                                    szLast = vaProps[enIndex].bstrVal;

								                                }

								                                szPrev = szCur; szPrev++;


								                                if (*szCur == L'\0')

								                                {

								                                    bLastFile = TRUE;

								                                }

								                                *szCur = (unsigned short)L'\0';


								                                // szLast points to current CabBase.

								                                szCabBase = SysAllocString(szLast);

								                                if (SUCCEEDED(CombineBaseAndRelativeURLs(pwszThisURL, bstrCodeBase, &szCabBase)))

								                                {

								                                    int iAdd=m_pPages->AddString(szCabBase, DATA_CODEBASE);

								                                    if (m_lMaxNumUrls > 0 && iAdd==CWCStringList::STRLST_ADDED)

								                                        m_lMaxNumUrls ++;

								                                }

								                                SAFEFREEBSTR(szCabBase);


								                                if (bLastFile)

								                                {

								                                    szCur = NULL;

								                                    break;

								                                }

								                            }

								                            szCur++;

								                        }  // while (szCur)

								                    }  // cabinets

								                }


								                // check for USESLIBRARY* parameters.

								                CCodeBaseHold *pcbh = NULL;

								                if (SUCCEEDED(hrResult[enUsesLib]) && (vaProps[enUsesLib].vt == VT_BSTR) &&

								                    SUCCEEDED(hrResult[enLibrary]) && (vaProps[enLibrary].vt == VT_BSTR))

								                {

								                    BSTR szThisLibCAB = NULL;

								                    pcbh = new CCodeBaseHold();

								                    if (pcbh)

								                    {

								                        pcbh->szDistUnit = SysAllocString(vaProps[enUsesLib].bstrVal);

								                        pcbh->dwVersionMS = pcbh->dwVersionLS = -1;

								                        pcbh->dwFlags = 0;

								                        szThisLibCAB = SysAllocString(vaProps[enLibrary].bstrVal);

								                        if (FAILED(CombineBaseAndRelativeURLs(pwszThisURL, bstrCodeBase, &szThisLibCAB)) ||

								                            m_pCodeBaseList->AddString(szThisLibCAB, (DWORD_PTR)pcbh) != CWCStringList::STRLST_ADDED)

								                        {

								                            SAFEFREEBSTR(pcbh->szDistUnit);

								                            SAFEDELETE(pcbh);

								                        }

								                        SAFEFREEBSTR(szThisLibCAB);

								                    }

								                }


								                // Check for USESLIBRARYVERSION (optional)

								                if (pcbh && SUCCEEDED(hrResult[enUsesVer]) && (vaProps[enUsesVer].vt == VT_BSTR))

								                {

								                    int iLen = SysStringByteLen(vaProps[enUsesVer].bstrVal)+1;

								                    CHAR *szVerStr = (LPSTR)MemAlloc(LMEM_FIXED, iLen);


								                    if (szVerStr)

								                    {

								                        SHUnicodeToAnsi(vaProps[enUsesVer].bstrVal, szVerStr, iLen);


								                        if (FAILED(GetVersionFromString(szVerStr,

								                                     &pcbh->dwVersionMS, &pcbh->dwVersionLS)))

								                        {

								                            hr = HRESULT_FROM_WIN32(GetLastError());

								                            MemFree(szVerStr);

								                            SAFEFREEBSTR(pcbh->szDistUnit);

								                            SAFEDELETE(pcbh);

								                        }

								                        MemFree(szVerStr);

								                    }

								                }

								            }

								        } // Read properties


								        for (i=enMin; i<enMax; i++)

								        {

								            SAFEFREEBSTR(pb[i].pstrName);

								        }


								        if (pwszThisURL)

								            LocalFree(pwszThisURL);


								        hr = S_OK;

								    }


								Exit:

								    SAFERELEASE(pPropBag);

								    return hr;

								}


								HRESULT CWebCrawler::GetDownloadNotify(IDownloadNotify **ppOut)

								{

								    HRESULT hr=S_OK;


								    if (m_pDownloadNotify)

								    {

								        m_pDownloadNotify->LeaveMeAlone();

								        m_pDownloadNotify->Release();

								        m_pDownloadNotify=NULL;

								    }


								    CDownloadNotify *pdn = new CDownloadNotify(this);


								    if (pdn)

								    {

								        hr = pdn->Initialize();


								        if (SUCCEEDED(hr))

								        {

								            m_pDownloadNotify = pdn;

								            *ppOut = m_pDownloadNotify;

								            m_pDownloadNotify->AddRef();

								        }

								        else

								        {

								            pdn->Release();

								        }

								    }

								    else

								    {

								        hr = E_OUTOFMEMORY;

								        *ppOut = NULL;

								    }


								    return hr;

								}


								//---------------------------------------------------------------

								// CWebCrawler::CDownloadNotify class

								//---------------------------------------------------------------

								CWebCrawler::CDownloadNotify::CDownloadNotify(CWebCrawler *pParent)

								{

								    ASSERT(pParent);


								    m_cRef = 1;


								    m_pParent = pParent;

								    pParent->AddRef();

								}


								HRESULT CWebCrawler::CDownloadNotify::Initialize()

								{

								    m_hrCritParent = InitializeCriticalSectionAndSpinCount(&m_critParent, 0) ? S_OK : E_OUTOFMEMORY;


								    return m_hrCritParent;

								}


								CWebCrawler::CDownloadNotify::~CDownloadNotify()

								{

								    DBG("Destroying CWebCrawler::CDownloadNotify");


								    ASSERT(!m_pParent);

								    SAFERELEASE(m_pParent);


								    if (SUCCEEDED(m_hrCritParent))

								    {

								        DeleteCriticalSection(&m_critParent);

								    }

								}


								void CWebCrawler::CDownloadNotify::LeaveMeAlone()

								{

								    if (m_pParent)

								    {

								        EnterCriticalSection(&m_critParent);

								        SAFERELEASE(m_pParent);

								        LeaveCriticalSection(&m_critParent);

								    }

								}


								// IUnknown members

								HRESULT CWebCrawler::CDownloadNotify::QueryInterface(REFIID riid, void **ppv)

								{

								    if ((IID_IUnknown == riid) ||

								        (IID_IDownloadNotify == riid))

								    {

								        *ppv = (IDownloadNotify *)this;

								    }

								    else

								    {

								        *ppv = NULL;

								        return E_NOINTERFACE;

								    }


								    ((LPUNKNOWN)*ppv)->AddRef();


								    return S_OK;

								}


								ULONG CWebCrawler::CDownloadNotify::AddRef(void)

								{

								    return InterlockedIncrement(&m_cRef);

								}


								ULONG CWebCrawler::CDownloadNotify::Release(void)

								{

								    ASSERT( 0 != m_cRef );

								    ULONG cRef = InterlockedDecrement(&m_cRef);

								    if ( 0 == cRef )

								    {

								        delete this;

								    }

								    return cRef;

								}


								// IDownloadNotify

								HRESULT CWebCrawler::CDownloadNotify::DownloadStart(LPCWSTR pchUrl, DWORD dwDownloadId, DWORD dwType, DWORD dwReserved)

								{

								    HRESULT hr = E_ABORT;   // abort it if we have nobody listening


								    TraceMsg(TF_THISMODULE, "DownloadStart id=%d url=%ws", dwDownloadId, pchUrl ? pchUrl : L"(null)");


								    EnterCriticalSection(&m_critParent);

								    if (m_pParent)

								        hr = m_pParent->DownloadStart(pchUrl, dwDownloadId, dwType, dwReserved);

								    LeaveCriticalSection(&m_critParent);


								    return hr;

								}


								HRESULT CWebCrawler::CDownloadNotify::DownloadComplete(DWORD dwDownloadId, HRESULT hrNotify, DWORD dwReserved)

								{

								    HRESULT hr = S_OK;


								//  TraceMsg(TF_THISMODULE, "DownloadComplete id=%d hr=%x", dwDownloadId, hrNotify);


								    EnterCriticalSection(&m_critParent);

								    if (m_pParent)

								        hr = m_pParent->DownloadComplete(dwDownloadId, hrNotify, dwReserved);

								    LeaveCriticalSection(&m_critParent);


								    return hr;

								}


								//////////////////////////////////////////////////////////////////////////

								//

								// Other functions

								//

								//////////////////////////////////////////////////////////////////////////

								// Make a single absolute or relative url sticky and get size

								HRESULT GetUrlInfoAndMakeSticky(

								            LPCTSTR                     pszBaseUrl,

								            LPCTSTR                     pszThisUrl,

								            LPINTERNET_CACHE_ENTRY_INFO lpCacheEntryInfo,

								            DWORD                       dwBufSize,

								            GROUPID                     llCacheGroupID)

								{

								    DWORD   dwSize;

								    TCHAR   szCombined[INTERNET_MAX_URL_LENGTH];


								    ASSERT(lpCacheEntryInfo);


								    // Combine urls if necessary

								    if (pszBaseUrl)

								    {

								        dwSize = ARRAYSIZE(szCombined);

								        if (SUCCEEDED(UrlCombine(pszBaseUrl, pszThisUrl,

								                szCombined, &dwSize, 0)))

								        {

								            pszThisUrl = szCombined;

								        }

								        else

								            DBG_WARN("UrlCombine failed!");

								    }


								    // Add the size of this URL

								    lpCacheEntryInfo->dwStructSize = dwBufSize;

								    if (!GetUrlCacheEntryInfo(pszThisUrl, lpCacheEntryInfo, &dwBufSize))

								    {

								#ifdef DEBUG

								        if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)

								            DBG_WARN("Failed GetUrlCacheEntryInfo, insufficient buffer");

								        else

								            TraceMsgA(llCacheGroupID ? TF_WARNING : TF_THISMODULE,

								                "Failed GetUrlCacheEntryInfo (not in cache) URL=%ws", pszThisUrl);

								#endif

								        return E_FAIL;

								    }


								    // Add to new group

								    if (llCacheGroupID != 0)

								    {

								        if (!SetUrlCacheEntryGroup(pszThisUrl, INTERNET_CACHE_GROUP_ADD,

								            llCacheGroupID, NULL, 0, NULL))

								        {

								            switch (GetLastError())

								            {

								                case ERROR_FILE_NOT_FOUND:  //  Huh? Must not have been able to add the index entry?

								                case ERROR_DISK_FULL:

								                    return E_OUTOFMEMORY;


								                case ERROR_NOT_ENOUGH_QUOTA:

								                    return S_OK;            //  We do our own quota handling.


								                default:

								                    TraceMsgA(TF_WARNING | TF_THISMODULE, "GetUrlInfoAndMakeSticky: Got unexpected error from SetUrlCacheEntryGroup() - GLE = 0x%08x", GetLastError());

								                    return E_FAIL;

								            }

								        }

								    }


								    return S_OK;

								}


								// GenerateCode will generate a DWORD code from a file.


								#define ELEMENT_PER_READ        256

								#define ELEMENT_SIZE            sizeof(DWORD)


								HRESULT GenerateCode(LPCTSTR lpszLocalFileName, DWORD *pdwRet)

								{

								    DWORD dwCode=0;

								    DWORD dwData[ELEMENT_PER_READ], i, dwRead;

								    HRESULT hr = S_OK;

								    HANDLE  hFile;


								    hFile = CreateFile(lpszLocalFileName, GENERIC_READ,

								            FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING,

								            0, NULL);


								    if (INVALID_HANDLE_VALUE != hFile)

								    {

								        do

								        {

								            dwRead = 0;

								            if (ReadFile(hFile, dwData, ELEMENT_PER_READ * ELEMENT_SIZE, &dwRead, NULL))

								            {

								                for (i=0; i<dwRead / ELEMENT_SIZE; i++)

								                {

								                    dwCode = (dwCode << 31) | (dwCode >> 1) + dwData[i];

								//                  dwCode += dwData[i];

								                }

								            }

								        }

								        while (ELEMENT_PER_READ * ELEMENT_SIZE == dwRead);


								        CloseHandle(hFile);

								    }

								    else

								    {

								        hr = E_FAIL;

								        TraceMsg(TF_THISMODULE|TF_WARNING,"GenerateCode: Unable to open cache file, Error=%x", GetLastError());

								    }


								    *pdwRet = dwCode;


								    return hr;

								}


								// S_OK : We retrieved a good last modified or content code to use

								// S_FALSE : We fell back to using the one passed into pvarChange

								// E_FAIL : We failed miserably.

								// E_INVALIDARG : Get a clue

								// *pfGetContent : TRUE if we need a GET for PostCheckUrlForChange to work right

								HRESULT PreCheckUrlForChange(LPCTSTR lpURL, VARIANT *pvarChange, BOOL *pfGetContent)

								{

								    BYTE    chBuf[MY_MAX_CACHE_ENTRY_INFO];


								    LPINTERNET_CACHE_ENTRY_INFO lpInfo = (LPINTERNET_CACHE_ENTRY_INFO) chBuf;


								    if (pvarChange->vt != VT_EMPTY && pvarChange->vt != VT_I4 && pvarChange->vt != VT_CY)

								        return E_INVALIDARG;


								    if (SUCCEEDED(GetUrlInfoAndMakeSticky(NULL, lpURL, lpInfo, sizeof(chBuf), 0)))

								    {

								        FILETIME ftOldLastModified = *((FILETIME *) &pvarChange->cyVal);


								        if (lpInfo->LastModifiedTime.dwHighDateTime || lpInfo->LastModifiedTime.dwLowDateTime)

								        {

								            // We have a last modified time. Use it or the persisted one.


								            if (pfGetContent)

								                *pfGetContent = FALSE;


								            if ((pvarChange->vt != VT_CY)

								             || (lpInfo->LastModifiedTime.dwHighDateTime > ftOldLastModified.dwHighDateTime)

								             || ((lpInfo->LastModifiedTime.dwHighDateTime == ftOldLastModified.dwHighDateTime)

								                && (lpInfo->LastModifiedTime.dwLowDateTime > ftOldLastModified.dwLowDateTime)))

								            {

								                // Cache Last Modified is newer than saved Last Modified. Use cache's.

								                pvarChange->vt = VT_CY;

								                pvarChange->cyVal = *((CY *)&(lpInfo->LastModifiedTime));


								                return S_OK;

								            }


								            ASSERT(pvarChange->vt == VT_CY);


								            // Persisted Last Modified time is most recent. Use it.

								            return S_OK;

								        }


								        DWORD dwCode;


								        if (SUCCEEDED(GenerateCode(lpInfo->lpszLocalFileName, &dwCode)))

								        {

								            pvarChange->vt = VT_I4;

								            pvarChange->lVal = (LONG) dwCode;


								            if (pfGetContent)

								                *pfGetContent = TRUE;


								            return S_OK;

								        }


								        // Failed GenerateCode. Weird. Fall through.

								    }


								    if (pvarChange->vt != VT_EMPTY)

								    {

								        if (pfGetContent)

								            *pfGetContent = (pvarChange->vt == VT_I4);


								        return S_FALSE;

								    }


								    // We don't have old change detection, we don't have cache content, better GET

								    if (pfGetContent)

								        *pfGetContent = TRUE;


								    return E_FAIL;  // Couldn't get anything. pvarChange->vt==VT_EMPTY

								}


								// S_FALSE : no change

								// S_OK    : changed

								// E_      : failure of some sort


								// pvarChange from PreCheckUrlForChange. We return a new one.

								// lpInfo  : must be valid if *pfGetContent was TRUE

								// ftNewLastModified : must be filled in if *pfGetContent was FALSE

								HRESULT PostCheckUrlForChange(VARIANT                    *pvarChange,

								                              LPINTERNET_CACHE_ENTRY_INFO lpInfo,

								                              FILETIME                    ftNewLastModified)

								{

								    HRESULT hr = S_FALSE;

								    VARIANT varChangeNew;


								    DWORD   dwNewCode = 0;


								    if (!pvarChange || (pvarChange->vt != VT_I4 && pvarChange->vt != VT_CY && pvarChange->vt != VT_EMPTY))

								        return E_INVALIDARG;


								    varChangeNew.vt = VT_EMPTY;


								    if (ftNewLastModified.dwHighDateTime || ftNewLastModified.dwLowDateTime)

								    {

								        varChangeNew.vt = VT_CY;

								        varChangeNew.cyVal = *((CY *) &ftNewLastModified);

								    }

								    else

								    {

								        if (lpInfo &&

								            SUCCEEDED(GenerateCode(lpInfo->lpszLocalFileName, &dwNewCode)))

								        {

								            varChangeNew.vt = VT_I4;

								            varChangeNew.lVal = dwNewCode;

								        }

								    }


								    if (pvarChange->vt == VT_CY)

								    {

								        // We have an old last modified time. Use that to determine change.

								        FILETIME ftOldLastModified = *((FILETIME *) &(pvarChange->cyVal));


								        if ((!ftNewLastModified.dwHighDateTime && !ftNewLastModified.dwLowDateTime)

								            || (ftNewLastModified.dwHighDateTime > ftOldLastModified.dwHighDateTime)

								            || ((ftNewLastModified.dwHighDateTime == ftOldLastModified.dwHighDateTime)

								                && (ftNewLastModified.dwLowDateTime > ftOldLastModified.dwLowDateTime)))

								        {

								            // NewLastModified > OldLastModified (or we don't have a NewLastModified)

								            DBG("PostCheckUrlForChange change detected via Last Modified");

								            hr = S_OK;      // We have changed

								        }

								    }

								    else if (pvarChange->vt == VT_I4)

								    {

								        // We have an old code. Use that to determine change.

								        DWORD dwOldCode = (DWORD) (pvarChange->lVal);


								        if ((dwOldCode != dwNewCode) ||

								            !dwNewCode)

								        {

								            DBG("PostCheckUrlForChange change detected via content code");

								            hr = S_OK;  // We have changed

								        }

								    }

								    else

								        hr = E_FAIL;    // No old code.


								    *pvarChange = varChangeNew;


								    return hr;

								}


								//////////////////////////////////////////////////////////////////////////

								//

								// CHelperOM implementation

								//

								//////////////////////////////////////////////////////////////////////////


								CHelperOM::CHelperOM(IHTMLDocument2 *pDoc)

								{

								    ASSERT(pDoc);

								    m_pDoc = pDoc;

								    if (pDoc)

								        pDoc->AddRef();

								}


								CHelperOM::~CHelperOM()

								{

								    SAFERELEASE(m_pDoc);

								}


								HRESULT CHelperOM::GetTagCollection(

								                        IHTMLDocument2          *pDoc,

								                        LPCWSTR                  wszTagName,

								                        IHTMLElementCollection **ppCollection)

								{

								    IHTMLElementCollection *pAll=NULL;

								    IDispatch              *pDisp=NULL;

								    VARIANT                 TagName;

								    HRESULT                 hr;


								    // We have to get "all", then sub-collection

								    hr = pDoc->get_all(&pAll);

								    if (pAll)

								    {

								        TagName.vt = VT_BSTR;

								        TagName.bstrVal = SysAllocString(wszTagName);

								        if (NULL == TagName.bstrVal)

								            hr = E_OUTOFMEMORY;

								        else

								        {

								            hr = pAll->tags(TagName, &pDisp);

								            SysFreeString(TagName.bstrVal);

								        }

								        pAll->Release();

								    }

								    if (pDisp)

								    {

								        hr = pDisp->QueryInterface(IID_IHTMLElementCollection,

								                                        (void **)ppCollection);

								        pDisp->Release();

								    }

								    if (FAILED(hr)) DBG("GetSubCollection failed");


								    return hr;

								}


								// Collections we get:

								//

								// IHTMLWindow2->get_document

								//  IHTMLDocument2  ->get_links

								//  IHTMLElementCollection->item

								//                          ->get_hostname

								//                          ->get_href

								//                  ->get_all

								//                      ->tags("map")

								//  IHTMLElementCollection  ->item

								//                              ->get_areas

								//  IHTMLElementCollection          ->item

								//  IHTMLAreaElement                    ->get_href

								//                  ->get_all

								//                      ->tags("meta")

								//  IHTMLElementCollection  ->item

								//                  ->get_all

								//                      ->tags("frame")

								//  IHTMLElementCollection  ->item

								//                  ->get_all

								//                      ->tags("iframe")

								//  IHTMLElementCollection  ->item


								// We recurse EnumCollection to get the maps (since

								//      it's a collection of collections)


								// hideous hack: IHTMLElementCollection can actually be IHTMLAreasCollection

								//  the interface used to be derived from the other. It still has identical

								//  methods. We typecast just in case that changes. Hopefully they will fix

								//  so that Areas is derived from Element again.

								HRESULT CHelperOM::EnumCollection(

								            IHTMLElementCollection *pCollection,

								            CWCStringList          *pStringList,

								            CollectionType          Type,

								            PFN_CB                  pfnCB,

								            DWORD_PTR               dwCBData)

								{

								    IHTMLAnchorElement *pLink;

								    IHTMLMapElement  *pMap;

								    IHTMLAreaElement *pArea;

								    IHTMLMetaElement *pMeta;

								    IHTMLElement     *pEle;

								    IDispatch        *pDispItem = NULL;


								    HRESULT hr;

								    BSTR    bstrItem=NULL;

								    long    l, lCount;

								    VARIANT vIndex, vEmpty, vData;

								    BSTR    bstrTmp1, bstrTmp2;

								    DWORD   dwStringData;


								    VariantInit(&vEmpty);

								    VariantInit(&vIndex);

								    VariantInit(&vData);


								    if (Type==CTYPE_MAP)

								        hr = ((IHTMLAreasCollection *)pCollection)->get_length(&lCount);

								    else

								        hr = pCollection->get_length(&lCount);


								    if (FAILED(hr))

								        lCount = 0;


								#ifdef DEBUG

								    LPSTR lpDSTR[]={"Links","Maps","Areas (links) In Map", "Meta", "Frames"};

								    TraceMsgA(TF_THISMODULE, "CWebCrawler::GetCollection, %d %s found", lCount, lpDSTR[(int)Type]);

								#endif


								    for (l=0; l<lCount; l++)

								    {

								        vIndex.vt = VT_I4;

								        vIndex.lVal = l;

								        dwStringData = 0;


								        if (Type==CTYPE_MAP)

								            hr = ((IHTMLAreasCollection *)pCollection)->item(vIndex, vEmpty, &pDispItem);

								        else

								            hr = pCollection->item(vIndex, vEmpty, &pDispItem);


								        if (SUCCEEDED(hr))

								        {

								            ASSERT(vData.vt == VT_EMPTY);

								            ASSERT(!bstrItem);


								            if (pDispItem)

								            {

								                // Get the URL from the IDispatch

								                switch(Type)

								                {

								                    case CTYPE_LINKS:       // get href from <a>

								                        hr = pDispItem->QueryInterface(IID_IHTMLAnchorElement, (void **)&pLink);


								                        if (SUCCEEDED(hr) && pLink)

								                        {

								                            hr = pLink->get_href(&bstrItem);

								                            pLink->Release();

								                        }

								                        break;


								                    case CTYPE_MAPS:    // enumeration areas for this map

								                        hr = pDispItem->QueryInterface(IID_IHTMLMapElement, (void **)&pMap);


								                        if (SUCCEEDED(hr) && pMap)

								                        {

								                            IHTMLAreasCollection *pNewCollection=NULL;

								                            // This gives us another collection. Enumerate it

								                            //  for the strings.

								                            hr = pMap->get_areas(&pNewCollection);

								                            if (pNewCollection)

								                            {

								                                hr = EnumCollection((IHTMLElementCollection *)pNewCollection, pStringList, CTYPE_MAP, pfnCB, dwCBData);

								                                pNewCollection->Release();

								                            }

								                            pMap->Release();

								                        }

								                        break;


								                    case CTYPE_MAP:     // get href for this area

								                        hr = pDispItem->QueryInterface(IID_IHTMLAreaElement, (void **)&pArea);


								                        if (SUCCEEDED(hr) && pArea)

								                        {

								                            hr = pArea->get_href(&bstrItem);

								                            pArea->Release();

								                        }

								                        break;


								                    case CTYPE_META:    // get meta name and content as single string

								                        hr = pDispItem->QueryInterface(IID_IHTMLMetaElement, (void **)&pMeta);


								                        if (SUCCEEDED(hr) && pMeta)

								                        {

								                            pMeta->get_name(&bstrTmp1);

								                            pMeta->get_content(&bstrTmp2);

								                            if (bstrTmp1 && bstrTmp2 && *bstrTmp1 && *bstrTmp2)

								                            {

								                                int nLen = lstrlenW(bstrTmp1) + lstrlenW(bstrTmp2) + 2;

								                                bstrItem = SysAllocStringLen(NULL, nLen);


								                                StrCpyNW(bstrItem, bstrTmp1, nLen);

								                                StrCatBuffW(bstrItem, L"\n", nLen);

								                                StrCatBuffW(bstrItem, bstrTmp2, nLen);

								                            }

								                            SysFreeString(bstrTmp1);

								                            SysFreeString(bstrTmp2);

								                            pMeta->Release();

								                        }

								                        break;


								                    case CTYPE_FRAMES:      // get "src" attribute

								                        hr = pDispItem->QueryInterface(IID_IHTMLElement, (void **)&pEle);


								                        if (SUCCEEDED(hr) && pEle)

								                        {

								                            bstrTmp1 = SysAllocString(L"SRC");


								                            if (bstrTmp1)

								                            {

								                                hr = pEle->getAttribute(bstrTmp1, VARIANT_FALSE, &vData);

								                                if (SUCCEEDED(hr) && vData.vt == VT_BSTR)

								                                {

								                                    bstrItem = vData.bstrVal;

								                                    vData.vt = VT_EMPTY;

								                                }

								                                else

								                                    VariantClear(&vData);


								                                SysFreeString(bstrTmp1);

								                            }

								                            else

								                            {

								                                hr = E_FAIL;

								                            }


								                            pEle->Release();

								                        }

								                        break;


								                    default:

								                        ASSERT(0);

								                        // bug in calling code

								                }


								                if (SUCCEEDED(hr) && bstrItem)

								                {

								                    // Verify we want to add this item to string list & get data

								                    if (pfnCB)

								                        hr = pfnCB(pDispItem, &bstrItem, dwCBData, &dwStringData);


								                    if (SUCCEEDED(hr) && bstrItem && pStringList)

								                        pStringList->AddString(bstrItem, dwStringData);

								                }

								                SAFERELEASE(pDispItem);

								                SAFEFREEBSTR(bstrItem);

								            }

								        }

								        if (E_ABORT == hr)

								        {

								            DBG_WARN("Aborting enumeration in CHelperOM::EnumCollection at callback's request.");

								            break;

								        }

								    }


								    return hr;

								}


								// Gets all urls from a collection, recursing through frames

								HRESULT CHelperOM::GetCollection(

								    IHTMLDocument2 *pDoc,

								    CWCStringList  *pStringList,

								    CollectionType  Type,

								    PFN_CB          pfnCB,

								    DWORD_PTR       dwCBData)

								{

								    HRESULT         hr;


								    // Get the collection from the document

								    ASSERT(pDoc);

								    ASSERT(pStringList || pfnCB);


								    hr = _GetCollection(pDoc, pStringList, Type, pfnCB, dwCBData);


								    return hr;

								}


								// get all urls from a collection

								HRESULT CHelperOM::_GetCollection(

								    IHTMLDocument2 *pDoc,

								    CWCStringList  *pStringList,

								    CollectionType  Type,

								    PFN_CB          pfnCB,

								    DWORD_PTR       dwCBData)

								{

								    HRESULT         hr;

								    IHTMLElementCollection *pCollection=NULL;


								    // From IHTMLDocument2 we get IHTMLElementCollection, then enumerate for the urls


								    // Get appropriate collection from document

								    switch (Type)

								    {

								        case CTYPE_LINKS:

								            hr = pDoc->get_links(&pCollection);

								            break;

								        case CTYPE_MAPS:

								            hr = GetTagCollection(pDoc, L"map", &pCollection);

								            break;

								        case CTYPE_META:

								            hr = GetTagCollection(pDoc, L"meta", &pCollection);

								            break;

								        case CTYPE_FRAMES:

								            hr = GetTagCollection(pDoc, L"frame", &pCollection);

								            break;


								        default:

								            hr = E_FAIL;

								    }

								    if (!pCollection) hr=E_NOINTERFACE;

								#ifdef DEBUG

								    if (FAILED(hr)) DBG_WARN("CWebCrawler::_GetCollection:  get_collection failed");

								#endif


								    if (SUCCEEDED(hr))

								    {

								        hr = EnumCollection(pCollection, pStringList, Type, pfnCB, dwCBData);


								        // If we're getting frames, we need to enum "iframe" tags separately

								        if (SUCCEEDED(hr) && (Type == CTYPE_FRAMES))

								        {

								            SAFERELEASE(pCollection);

								            hr = GetTagCollection(pDoc, L"iframe", &pCollection);


								            if (SUCCEEDED(hr) && pCollection)

								            {

								                hr = EnumCollection(pCollection, pStringList, Type, pfnCB, dwCBData);

								            }

								        }

								    }


								    if (pCollection)

								        pCollection->Release();


								    return hr;

								}


								extern HRESULT LoadWithCookie(LPCTSTR, POOEBuf, DWORD *, SUBSCRIPTIONCOOKIE *);


								// IExtractIcon members

								STDMETHODIMP CWebCrawler::GetIconLocation(UINT uFlags, LPTSTR szIconFile, UINT cchMax, int * piIndex, UINT * pwFlags)

								{

								    IUniformResourceLocator* pUrl = NULL;

								    IExtractIcon* pUrlIcon = NULL;

								    HRESULT hr = S_OK;

								    BOOL bCalledCoInit = FALSE;


								    if (!szIconFile || !piIndex || !pwFlags)

								        return E_INVALIDARG;

								    //zero out return values in case one of the COM calls fails...

								    *szIconFile = 0;

								    *piIndex = -1;


								    if (!m_pBuf)    {

								        m_pBuf = (POOEBuf)MemAlloc(LPTR, sizeof(OOEBuf));

								        if (!m_pBuf)

								            return E_OUTOFMEMORY;


								        DWORD   dwSize;

								        hr = LoadWithCookie(NULL, m_pBuf, &dwSize, &m_SubscriptionCookie);

								        RETURN_ON_FAILURE(hr);

								    }


								    if (m_pBuf->bDesktop)

								    {

								        StrCpyN(szIconFile, TEXT(":desktop:"), cchMax);

								    }

								    else

								    {

								        if (m_pUrlIconHelper)

								        {

								            hr = m_pUrlIconHelper->GetIconLocation (uFlags, szIconFile, cchMax, piIndex, pwFlags);

								        }

								        else

								        {

								            hr = CoCreateInstance (CLSID_InternetShortcut, NULL, CLSCTX_INPROC_SERVER, IID_IUniformResourceLocator, (void**)&pUrl);

								            if ((CO_E_NOTINITIALIZED == hr || REGDB_E_IIDNOTREG == hr) &&

								                SUCCEEDED (CoInitialize(NULL)))

								            {

								                bCalledCoInit = TRUE;

								                hr = CoCreateInstance (CLSID_InternetShortcut, NULL, CLSCTX_INPROC_SERVER, IID_IUniformResourceLocator, (void**)&pUrl);

								            }


								            if (SUCCEEDED (hr))

								            {

								                hr = pUrl->SetURL (m_pBuf->m_URL, 1);

								                if (SUCCEEDED (hr))

								                {

								                    hr = pUrl->QueryInterface (IID_IExtractIcon, (void**)&pUrlIcon);

								                    if (SUCCEEDED (hr))

								                    {

								                        hr = pUrlIcon->GetIconLocation (uFlags, szIconFile, cchMax, piIndex, pwFlags);


								                        //pUrlIcon->Release();  //released in destructor

								                        ASSERT (m_pUrlIconHelper == NULL);

								                        m_pUrlIconHelper = pUrlIcon;

								                    }

								                }

								                pUrl->Release();

								            }


								            //balance CoInit with CoUnit

								            //(we still have a pointer to the CLSID_InternetShortcut object, m_pUrlIconHelper,

								            //but since that code is in shdocvw there's no danger of it getting unloaded and

								            //invalidating our pointer, sez cdturner.)

								            if (bCalledCoInit)

								                CoUninitialize();

								        }

								    }


								    return hr;

								}


								STDMETHODIMP CWebCrawler::Extract(LPCTSTR szIconFile, UINT nIconIndex, HICON * phiconLarge, HICON * phiconSmall, UINT nIconSize)

								{

								    HRESULT hr = S_OK;


								    if (!phiconLarge || !phiconSmall)

								        return E_INVALIDARG;


								    //zero out return values in case one of the COM calls fails...

								    *phiconLarge = NULL;

								    *phiconSmall = NULL;


								    if ((NULL != m_pBuf) && (m_pBuf->bDesktop))

								    {

								        LoadDefaultIcons();

								        *phiconLarge = *phiconSmall = g_desktopIcon;

								    }

								    else

								    {

								        if (!m_pUrlIconHelper)

								            return E_FAIL;


								        hr = m_pUrlIconHelper->Extract (szIconFile, nIconIndex, phiconLarge, phiconSmall, nIconSize);

								    }


								    return hr;

								}