|
|
#include <urlmon.h>
#include <mshtmdid.h>
#include <mshtml.h>
#include <shlobj.h>
#include "htmparse.h"
CHTMLParser::CHTMLParser() { m_cRef = 1; m_hrConnected = CONNECT_E_CANNOTCONNECT; m_dwCookie = 0; m_pCP = NULL; m_pMSHTML = NULL; m_hEventTridentDone = 0; }
CHTMLParser::~CHTMLParser() { if (m_pMSHTML) m_pMSHTML->Release(); }
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////// IUnknown implementation
///////
///////
STDMETHODIMP CHTMLParser::QueryInterface(REFIID riid, LPVOID* ppv) { *ppv = NULL;
if (IID_IUnknown == riid || IID_IPropertyNotifySink == riid) { *ppv = (LPUNKNOWN)(IPropertyNotifySink*)this; AddRef(); return NOERROR; } else if (IID_IOleClientSite == riid) { *ppv = (IOleClientSite*)this; AddRef(); return NOERROR; } else if (IID_IDispatch == riid) { *ppv = (IDispatch*)this; AddRef(); return NOERROR; } else { return E_NOTIMPL; } }
STDMETHODIMP_(ULONG) CHTMLParser::AddRef() { return ++m_cRef; }
STDMETHODIMP_(ULONG) CHTMLParser::Release() { if (!(--m_cRef)) delete this;
return m_cRef; }
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////// IPropertyNotifySink implementation
///////
///////
// Fired on change of the value of a 'bindable' property
STDMETHODIMP CHTMLParser::OnChanged(DISPID dispID) { if (DISPID_READYSTATE == dispID) { EXCEPINFO excepInfo; UINT uArgErr;
VARIANT varResult = {0}; DISPPARAMS dispparams = {NULL, NULL, 0, 0};
// check the value of the readystate property
assert(m_pMSHTML);
if (SUCCEEDED(m_pMSHTML->Invoke(DISPID_READYSTATE, IID_NULL, LOCALE_SYSTEM_DEFAULT, DISPATCH_PROPERTYGET, &dispparams, &varResult, &excepInfo, &uArgErr))) { assert(VT_I4 == V_VT(&varResult));
if (READYSTATE_COMPLETE == (READYSTATE)V_I4(&varResult)) SetEvent(m_hEventTridentDone); VariantClear(&varResult); } } return NOERROR; }
STDMETHODIMP CHTMLParser::OnRequestEdit(DISPID dispID) { return NOERROR; }
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////// IOleClientSite implementation
///////
///////
STDMETHODIMP CHTMLParser::SaveObject() { return E_NOTIMPL; } STDMETHODIMP CHTMLParser::GetMoniker(DWORD dwAssign, DWORD dwWhichMoniker, IMoniker** ppmk) { return E_NOTIMPL; }
STDMETHODIMP CHTMLParser::GetContainer(IOleContainer** ppContainer) { return E_NOTIMPL; }
STDMETHODIMP CHTMLParser::ShowObject() { return E_NOTIMPL; }
STDMETHODIMP CHTMLParser::OnShowWindow(BOOL fShow) { return E_NOTIMPL; }
STDMETHODIMP CHTMLParser::RequestNewObjectLayout() { return E_NOTIMPL; }
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
/////// IDispatch implementation
///////
///////
STDMETHODIMP CHTMLParser::GetTypeInfoCount(UINT* pctinfo) { return E_NOTIMPL; } STDMETHODIMP CHTMLParser::GetTypeInfo(UINT iTInfo, LCID lcid, ITypeInfo** ppTInfo) { return E_NOTIMPL; }
STDMETHODIMP CHTMLParser::GetIDsOfNames(REFIID riid, LPOLESTR* rgszNames, UINT cNames, LCID lcid, DISPID* rgDispId) { return E_NOTIMPL; }
// MSHTML Queries for the IDispatch interface of the host through the IOleClientSite
// interface that MSHTML is passed through its implementation of IOleObject::SetClientSite()
STDMETHODIMP CHTMLParser::Invoke(DISPID dispIdMember, REFIID riid, LCID lcid, WORD wFlags, DISPPARAMS* pDispParams, VARIANT* pvarResult, EXCEPINFO* pExcepInfo, UINT* puArgErr) { if (!pvarResult) return E_POINTER; switch(dispIdMember) { case DISPID_AMBIENT_DLCONTROL: { // respond to this ambient to indicate that we only want to
// download the page, but we don't want to run scripts,
// Java applets, or ActiveX controls
V_VT(pvarResult) = VT_I4; V_I4(pvarResult) = DLCTL_DOWNLOADONLY | DLCTL_NO_SCRIPTS | DLCTL_NO_JAVA | DLCTL_NO_DLACTIVEXCTLS | DLCTL_NO_RUNACTIVEXCTLS; break; } default: return DISP_E_MEMBERNOTFOUND; } return NOERROR; }
// A more traditional form of persistence.
// MSHTML performs this asynchronously as well.
HRESULT CHTMLParser::LoadURLFromFile(BSTR bstrURL) { HRESULT hr;
LPPERSISTFILE pPF; // MSHTML supports file persistence for ordinary files.
if ( SUCCEEDED(hr = m_pMSHTML->QueryInterface(IID_IPersistFile, (LPVOID*) &pPF))) { hr = pPF->Load(bstrURL, 0); pPF->Release(); }
return hr; }
// This function will attached trient to a location FILE: URL, and ensure that it is ready
// to be walked
HRESULT CHTMLParser::InitForMSHTML() { HRESULT hr; LPCONNECTIONPOINTCONTAINER pCPC = NULL; LPOLEOBJECT pOleObject = NULL; LPOLECONTROL pOleControl = NULL;
// Create an instance of an dynamic HTML document
if (FAILED(hr = CoCreateInstance( CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, (LPVOID*)&m_pTrident ))) { goto Error; }
if (FAILED(hr = m_pTrident->QueryInterface(IID_IOleObject, (LPVOID*)&pOleObject))) { goto Error; } hr = pOleObject->SetClientSite((IOleClientSite*)this); pOleObject->Release();
if (FAILED(hr = m_pTrident->QueryInterface(IID_IOleControl, (LPVOID*)&pOleControl))) { goto Error; } hr = pOleControl->OnAmbientPropertyChange(DISPID_AMBIENT_DLCONTROL); pOleControl->Release();
// Hook up sink to catch ready state property change
if (FAILED(hr = m_pTrident->QueryInterface(IID_IConnectionPointContainer, (LPVOID*)&pCPC))) { goto Error; }
if (FAILED(hr = pCPC->FindConnectionPoint(IID_IPropertyNotifySink, &m_pCP))) { goto Error; }
m_hrConnected = m_pCP->Advise((LPUNKNOWN)(IPropertyNotifySink*)this, &m_dwCookie); Error: if (pCPC) pCPC->Release();
return hr; }
// Clean up connection point
HRESULT CHTMLParser::TermForMSHTML() { HRESULT hr = NOERROR;
// Disconnect from property change notifications
if (SUCCEEDED(m_hrConnected)) { hr = m_pCP->Unadvise(m_dwCookie); }
// Release the connection point
if (m_pCP) m_pCP->Release();
if (m_pTrident) m_pTrident->Release(); return hr; }
HRESULT CHTMLParser::AttachToMSHTML(BSTR bstrURL) { HRESULT hr; // Release any previous instance of the HTML document pointer we might be holding on to
if(m_pMSHTML) { m_pMSHTML->Release(); m_pMSHTML = NULL; } m_pMSHTML = m_pTrident; m_pMSHTML->AddRef(); m_hEventTridentDone = CreateEvent(NULL, TRUE, FALSE, NULL); hr = LoadURLFromFile(bstrURL); if (SUCCEEDED(hr) || (E_PENDING == hr)) { if (m_hEventTridentDone) { MSG msg; DWORD dwRetCode; HANDLE hEventList[1]; hEventList[0] = m_hEventTridentDone; while (TRUE) { // We will wait on window messages and also the named event.
dwRetCode = MsgWaitForMultipleObjects(1, &hEventList[0], FALSE, 300000, // 5 minutes
QS_ALLINPUT);
// Determine why we came out of MsgWaitForMultipleObjects(). If
// we timed out then let's do some TrialWatcher work. Otherwise
// process the message that woke us up.
if (WAIT_TIMEOUT == dwRetCode) { break; } else if (WAIT_OBJECT_0 == dwRetCode) { break; } else if (WAIT_OBJECT_0 + 1 == dwRetCode) { // Process all messages in the Queue, since MsgWaitForMultipleObjects
// will not do this for us
while (TRUE) { if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { if (WM_QUIT == msg.message) { break; } else { TranslateMessage(&msg); DispatchMessage(&msg); } } else { break; } } } } CloseHandle(m_hEventTridentDone); m_hEventTridentDone = 0; } else { // If we were pending, and we could not wait, we got a problem...
if(E_PENDING == hr) hr = E_FAIL; } } return (hr); }
HRESULT CHTMLParser::AttachToDocument(IWebBrowser2 *lpWebBrowser) { HRESULT hr; LPDISPATCH pDisp; // Release any previous instance of the HTML document pointer we might be holding on to
if(m_pMSHTML) { // If the m_pMSHMTL is NOT our internal Trident object (for walking files)
// then sombody did not do a detach, so we need to release the previous
// MSHTML object
if (m_pMSHTML != m_pTrident) m_pMSHTML->Release(); m_pMSHTML = NULL; } // Make sure we have a webbrowser to grab onto
assert(lpWebBrowser);
// Get the document pointer from this webbrowser.
if (SUCCEEDED(hr = lpWebBrowser->get_Document(&pDisp))) { if (pDisp) { hr = pDisp->QueryInterface( IID_IHTMLDocument2, (LPVOID*)&m_pMSHTML ); // Paranoia, but trident/shdocvw might say OK, but really not give us a document
if (!m_pMSHTML) hr = E_FAIL; pDisp->Release(); } else { hr = E_FAIL; } } return (hr); }
HRESULT CHTMLParser::Detach() { if(m_pMSHTML) { // If the m_pMSHMTL is NOT our internal Trident object (for walking files)
// then sombody did not do a detach, so we need to release the previous
// MSHTML object
if (m_pMSHTML != m_pTrident) m_pMSHTML->Release(); m_pMSHTML = NULL; } return S_OK; }
HRESULT CHTMLParser::ConcatURLValue(BSTR bstrValue, BSTR bstrName, WCHAR* lpszQuery) {
if(bstrName) { // Append the Name
lstrcat(lpszQuery, bstrName); lstrcat(lpszQuery, cszEquals); if(bstrValue) { //we need to be three times as big since 1 char decoded == 3 char encoded
size_t cch = (lstrlen(bstrValue) + 1) * 3; WCHAR* szVal = (WCHAR*)malloc(BYTES_REQUIRED_BY_CCH(cch)); lstrcpy(szVal, bstrValue); URLEncode(szVal, cch); lstrcat(lpszQuery, szVal); free(szVal); SysFreeString(bstrValue); } lstrcat(lpszQuery, cszAmpersand);
SysFreeString(bstrName); }
return S_OK; }
HRESULT CHTMLParser::CreateQueryString ( IHTMLFormElement *pForm, LPWSTR lpszQuery ) {
VARIANT vIndex;
HRESULT hr = E_FAIL; long lFormLength = 0; VARIANT var2 = { 0 }; LPDISPATCH pDisp = NULL; IHTMLButtonElement* pButton = NULL; IHTMLInputButtonElement* pInputButton = NULL; IHTMLInputFileElement* pInputFile = NULL; IHTMLInputHiddenElement* pInputHidden = NULL; IHTMLInputTextElement* pInputText = NULL; IHTMLSelectElement* pSelect = NULL; IHTMLTextAreaElement* pTextArea = NULL; IHTMLOptionButtonElement* pOptionButton = NULL; BSTR bstrName = NULL; BSTR bstrValue = NULL; vIndex.vt = VT_UINT; if (SUCCEEDED(pForm->get_length(&lFormLength))) { for (int i = 0; i < lFormLength; i++) { vIndex.lVal = i; if (SUCCEEDED(hr = pForm->item( vIndex, var2, &pDisp ))) { if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLInputHiddenElement, (LPVOID*)&pInputHidden ))) { //We could take out the repetative calls to get_name/get_value but that would require
//us to make a sketchy cast
if (SUCCEEDED(pInputHidden->get_name(&bstrName)) && SUCCEEDED(pInputHidden->get_value(&bstrValue))) { ConcatURLValue(bstrValue, bstrName, lpszQuery); } // Release the interface pointer
pInputHidden->Release(); continue; }
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLInputTextElement, (LPVOID*)&pInputText ))) { if (SUCCEEDED(pInputText->get_name(&bstrName)) && SUCCEEDED(pInputText->get_value(&bstrValue)) ) { ConcatURLValue(bstrValue, bstrName, lpszQuery); } // Release the interface pointer
pInputText->Release(); continue; }
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLSelectElement, (LPVOID*)&pSelect ))) { if (SUCCEEDED(pSelect->get_name(&bstrName)) && SUCCEEDED(pSelect->get_value(&bstrValue)) ) { ConcatURLValue(bstrValue, bstrName, lpszQuery); } // Release the interface pointer
pSelect->Release(); continue; }
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLTextAreaElement, (LPVOID*)&pTextArea ))) { if (SUCCEEDED(pTextArea->get_name(&bstrName)) && SUCCEEDED(pTextArea->get_value(&bstrValue)) ) { ConcatURLValue(bstrValue, bstrName, lpszQuery); } // Release the interface pointer
pTextArea->Release(); }
// First check to see if this is an OptionButton.
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLOptionButtonElement, (LPVOID*)&pOptionButton ))) { BSTR bstr = NULL; // See if it is a Radio or a CheckBox
if (SUCCEEDED(pOptionButton->get_type(&bstr))) { LPWSTR lpszType = bstr; if ((lstrcmpi(lpszType, L"radio") == 0) || (lstrcmpi(lpszType, L"checkbox") == 0)) { short bChecked; // See if the button is checked. If it is, then it needs to be
// added to the query string
if (SUCCEEDED(pOptionButton->get_checked(&bChecked))) { if(bChecked) { if ( SUCCEEDED(pOptionButton->get_name(&bstrName)) && SUCCEEDED(pOptionButton->get_value(&bstrValue)) ) { ConcatURLValue(bstrValue, bstrName, lpszQuery); }
} } } SysFreeString(bstr); } // Release the interface
pOptionButton->Release(); continue; } // For the rest we need to form Name=Value pairs
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLButtonElement, (LPVOID*)&pButton ))) { if (SUCCEEDED(pButton->get_name(&bstrName)) && SUCCEEDED(pButton->get_value(&bstrValue)) ) { ConcatURLValue(bstrValue, bstrName, lpszQuery); } // Release the interface pointer
pButton->Release(); continue; } if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLInputFileElement, (LPVOID*)&pInputFile ))) { if (SUCCEEDED(pInputFile->get_name(&bstrName)) && SUCCEEDED(pInputFile->get_value(&bstrValue)) ) { ConcatURLValue(bstrValue, bstrName, lpszQuery); } // Release the interface pointer
pInputFile->Release(); continue; } pDisp->Release(); } } } // Null out the last Ampersand, since we don't know when we added the last pair, so we got
// a trailing ampersand
lpszQuery[lstrlen(lpszQuery)-1] = L'\0'; return S_OK; }
HRESULT CHTMLParser::get_QueryStringForForm(IDispatch* pDisp, WCHAR* szUrl) {
HRESULT hr = E_FAIL; //don't assume succeess
WCHAR szQuery [MAX_PATH*7] = L"\0"; IHTMLFormElement* pForm = NULL; BSTR bstrAction = NULL; if (!pDisp) return (E_FAIL); if(SUCCEEDED(pDisp->QueryInterface(IID_IHTMLFormElement, (void**)&pForm)) && pForm) { // Get the Action for the Next Form
if (SUCCEEDED(pForm->get_action(&bstrAction)) && bstrAction) { lstrcpy(szUrl, bstrAction); lstrcat(szUrl, cszQuestion); SysFreeString(bstrAction);
// Get the Query String
if (SUCCEEDED(CreateQueryString(pForm, szQuery))) { lstrcat(szUrl, szQuery); } } } return hr; }
void CHTMLParser::URLEncode(WCHAR* pszUrl, size_t cchUrlMax) { assert(pszUrl); WCHAR* pszEncoded = NULL; WCHAR* pchEncoded = NULL; WCHAR* pchUrl = pszUrl + lstrlen(pszUrl); int cchUrl = (int)(pchUrl-pszUrl); WCHAR c;
if ((size_t)(cchUrl * 3) < cchUrlMax) { pszEncoded = (WCHAR*)malloc(BYTES_REQUIRED_BY_CCH(cchUrl * 3 + 1)); if(pszEncoded) { ZeroMemory(pszEncoded, BYTES_REQUIRED_BY_CCH(cchUrl * 3 + 1)); for(pchUrl = pszUrl, pchEncoded = pszEncoded; L'\0' != *pchUrl; pchUrl++ ) { switch(*pchUrl) { case L' ': //SPACE
lstrcpyn(pchEncoded, L"+", 1); pchEncoded+=1; break; case L'#': lstrcpyn(pchEncoded, L"%23", 3); pchEncoded+=3; break; case L'&': lstrcpyn(pchEncoded, L"%26", 3); pchEncoded+=3; break; case L'%': lstrcpyn(pchEncoded, L"%25", 3); pchEncoded+=3; break; case L'=': lstrcpyn(pchEncoded, L"%3D", 3); pchEncoded+=3; break; case L'<': lstrcpyn(pchEncoded, L"%3C", 3); pchEncoded+=3; break; case L'+': lstrcpyn(pchEncoded, L"%2B", 3); pchEncoded += 3; break; default: *pchEncoded++ = *pchUrl; break; } }
// String should be null-terminated since the buffer was zeroed
//
ASSERT(L'\0' == *pchEncoded);
// Did we overflow the buffer?
//
ASSERT(pchEncoded - pszEncoded < cchUrlMax);
lstrcpy(pszUrl , pszEncoded); free(pszEncoded); } } }
|