Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

446 lines
14 KiB

  1. VERSION 1.0 CLASS
  2. BEGIN
  3. MultiUse = -1 'True
  4. Persistable = 0 'NotPersistable
  5. DataBindingBehavior = 0 'vbNone
  6. DataSourceBehavior = 0 'vbNone
  7. MTSTransactionMode = 0 'NotAnMTSObject
  8. END
  9. Attribute VB_Name = "XPQuery"
  10. Attribute VB_GlobalNameSpace = False
  11. Attribute VB_Creatable = True
  12. Attribute VB_PredeclaredId = False
  13. Attribute VB_Exposed = False
  14. Option Explicit
  15. Dim m_oDocQTpl As DOMDocument
  16. Dim m_oDocStw As DOMDocument
  17. Dim m_oHsc_query_template_Node As IXMLDOMNode, _
  18. m_o_Query_Term_Node As IXMLDOMNode, _
  19. m_oHsc_qtn_as As IXMLDOMNode
  20. Dim m_o_epilogue As IXMLDOMNode
  21. Dim m_odictStopSigns As Scripting.Dictionary
  22. Const STOPSIGN_AT_END_OF_WORD As Long = 3
  23. Const STOPSIGN_ANYWHERE As Long = 0
  24. Private Sub Class_Initialize()
  25. Set m_oDocQTpl = New DOMDocument
  26. End Sub
  27. Function Init( _
  28. ByRef strQTpl As String, _
  29. ByRef oDomtaxo As DOMDocument _
  30. ) As Boolean
  31. Init = False
  32. oDomtaxo.setProperty "SelectionLanguage", "XSLPattern"
  33. Dim oDomNode As IXMLDOMNode
  34. LoadStopSigns oDomtaxo
  35. Set oDomNode = oDomtaxo.selectSingleNode("METADATA/STOPWORD_ENTRIES")
  36. Set m_oDocStw = New DOMDocument
  37. m_oDocStw.loadXML oDomNode.xml
  38. Init = True
  39. Common_Exit:
  40. Exit Function
  41. End Function
  42. Sub LoadStopSigns(ByRef oDomtaxo As DOMDocument)
  43. Dim oDomNode As IXMLDOMNode, oNodeList As IXMLDOMNodeList
  44. Set m_odictStopSigns = New Scripting.Dictionary
  45. Dim l As Long
  46. Set oNodeList = oDomtaxo.selectNodes("/METADATA/STOPSIGN_ENTRIES/*")
  47. For Each oDomNode In oNodeList
  48. If (oDomNode.Attributes.getNamedItem("CONTEXT").Text = "ENDOFWORD") Then
  49. l = STOPSIGN_AT_END_OF_WORD
  50. Else
  51. l = STOPSIGN_ANYWHERE
  52. End If
  53. m_odictStopSigns.Add oDomNode.Attributes.getNamedItem("STOPSIGN").Text, l
  54. Next
  55. End Sub
  56. Function GetXPathQuery( _
  57. strQTpl As String, _
  58. ByVal strQuery As String, _
  59. Optional ByRef out_dictStopSigns As Scripting.Dictionary, _
  60. Optional ByRef out_dictStopWords As Scripting.Dictionary, _
  61. Optional ByRef out_strCanonicalQuery, _
  62. Optional ByRef out_strXpathAutoStringifyQuery As String, _
  63. Optional ByRef out_strAutoStringifyQuery As String _
  64. ) As String
  65. GetXPathQuery = ""
  66. m_oDocQTpl.loadXML strQTpl
  67. If (m_oDocQTpl.parseError.errorCode <> 0) Then
  68. Err.Raise vbObjectError + 9999, "GetXPathQuery", "Could not load Query Template"
  69. End If
  70. Set m_oHsc_query_template_Node = m_oDocQTpl.selectSingleNode("/hsc-query-template/template")
  71. Set m_o_epilogue = m_oHsc_query_template_Node.selectSingleNode("epilogue")
  72. Set m_o_Query_Term_Node = m_oHsc_query_template_Node.selectSingleNode("query-term")
  73. m_oHsc_query_template_Node.removeChild m_o_Query_Term_Node
  74. Dim oQTNode As IXMLDOMNode, _
  75. oDomText As IXMLDOMText
  76. strQuery = LCase$(Trim$(strQuery))
  77. Dim cQuote As String
  78. ' If (InStr(strQuery, "'") > 0) Then
  79. ' cQuote = "'"
  80. ' Else
  81. ' cQuote = """"
  82. ' End If
  83. cQuote = """"
  84. ' First let's see whether this is an unterminated Query String
  85. If (IsUnTerminatedQuotedQuery(strQuery, cQuote)) Then
  86. strQuery = strQuery + cQuote
  87. End If
  88. GetRidOfStopSignsAndMultipleWhiteSpaces strQuery, out_dictStopSigns
  89. If (IsStringifiableQuery(strQuery)) Then
  90. Set m_oHsc_qtn_as = m_oHsc_query_template_Node.cloneNode(deep:=True)
  91. Set oQTNode = m_o_Query_Term_Node.cloneNode(deep:=True)
  92. Set oDomText = m_oDocQTpl.createTextNode("""" + strQuery + """")
  93. oQTNode.selectSingleNode("in-argument").appendChild oDomText
  94. m_oHsc_qtn_as.insertBefore oQTNode, m_oHsc_qtn_as.selectSingleNode("epilogue")
  95. out_strXpathAutoStringifyQuery = m_oHsc_qtn_as.Text
  96. Set m_oHsc_qtn_as = Nothing
  97. out_strAutoStringifyQuery = oDomText.Text
  98. Else
  99. out_strXpathAutoStringifyQuery = ""
  100. out_strAutoStringifyQuery = ""
  101. End If
  102. If (Not out_dictStopWords Is Nothing) Then out_dictStopWords.RemoveAll
  103. out_strCanonicalQuery = ""
  104. Dim iQTcount As Integer: iQTcount = 0 'Counts the Tokens
  105. Dim oTokens As Tokenizer: Set oTokens = New Tokenizer
  106. oTokens.Init strQuery
  107. Dim strToken As String, bIsString As Boolean
  108. Do While (Not oTokens.eof)
  109. DoEvents
  110. strToken = oTokens.NextWordOrString(bIsString)
  111. If (IsStopWord(strToken)) Then
  112. If (Not out_dictStopWords.Exists(strToken)) Then
  113. out_dictStopWords.Add strToken, strToken
  114. End If
  115. Else
  116. iQTcount = iQTcount + 1
  117. If (bIsString) Then
  118. out_strCanonicalQuery = out_strCanonicalQuery + """" + strToken + """" + " "
  119. Else
  120. out_strCanonicalQuery = out_strCanonicalQuery + strToken + " "
  121. End If
  122. Set oQTNode = m_o_Query_Term_Node.cloneNode(True)
  123. Set oDomText = m_oDocQTpl.createTextNode("""" + strToken + """")
  124. oQTNode.selectSingleNode("in-argument").appendChild oDomText
  125. If (iQTcount > 1) Then
  126. Set oDomText = m_oDocQTpl.createTextNode("and")
  127. m_oHsc_query_template_Node.insertBefore oDomText, m_o_epilogue
  128. End If
  129. m_oHsc_query_template_Node.insertBefore oQTNode, m_o_epilogue
  130. End If
  131. ' GetXPathQuery = GetXPathQuery + " " + Replace(txtQTpl, "%qa%", oTokens.NextWord)
  132. Loop
  133. If (iQTcount > 0) Then
  134. GetXPathQuery = m_oHsc_query_template_Node.Text
  135. Else
  136. GetXPathQuery = ""
  137. End If
  138. End Function
  139. Private Function IsUnTerminatedQuotedQuery(strQuery As String, cQuote As String) As Boolean
  140. IsUnTerminatedQuotedQuery = ((Left$(strQuery, 1) = cQuote) And (Right$(strQuery, 1) <> cQuote))
  141. End Function
  142. Private Function IsStringifiableQuery(ByVal strQuery As String, Optional cQuote As String = """") As Boolean
  143. IsStringifiableQuery = False
  144. If (Left$(strQuery, 1) = cQuote) Then GoTo Common_Exit
  145. Dim oTokens As Tokenizer: Set oTokens = New Tokenizer
  146. oTokens.Init strQuery
  147. Dim strToken As String
  148. Dim lNumTerms As Long: lNumTerms = 0
  149. Do While (Not oTokens.eof)
  150. DoEvents
  151. strToken = oTokens.NextWord
  152. lNumTerms = lNumTerms + 1
  153. If (bIsOpNot(strToken) Or _
  154. bIsOpOr(strToken) Or _
  155. bIsOpAnd(strToken) Or _
  156. "(" = Left$(strToken, 1) Or ")" = Left$(strToken, 1)) Then
  157. GoTo Common_Exit
  158. End If
  159. Loop
  160. If (lNumTerms = 1) Then GoTo Common_Exit
  161. IsStringifiableQuery = True
  162. Common_Exit:
  163. Exit Function
  164. End Function
  165. Private Function bIsOpNot(strToken As String) As Boolean
  166. bIsOpNot = False
  167. If (strToken = "not") Then bIsOpNot = True: GoTo Common_Exit
  168. If (strToken = "!") Then bIsOpNot = True: GoTo Common_Exit
  169. Common_Exit:
  170. Exit Function
  171. End Function
  172. Private Function bIsOpOr(strToken As String) As Boolean
  173. bIsOpOr = False
  174. If (strToken = "or") Then bIsOpOr = True: GoTo Common_Exit
  175. If (strToken = "||") Then bIsOpOr = True: GoTo Common_Exit
  176. Common_Exit:
  177. Exit Function
  178. End Function
  179. Private Function bIsOpAnd(strToken As String) As Boolean
  180. bIsOpAnd = False
  181. If (strToken = "and") Then bIsOpAnd = True: GoTo Common_Exit
  182. If (strToken = "+") Then bIsOpAnd = True: GoTo Common_Exit
  183. If (strToken = "&") Then bIsOpAnd = True: GoTo Common_Exit
  184. Common_Exit:
  185. Exit Function
  186. End Function
  187. Private Function IsStopWord(strWord As String) As Boolean
  188. IsStopWord = False
  189. If (m_oDocStw Is Nothing) Then GoTo Common_Exit
  190. Dim cQuote As String
  191. If (InStr(strWord, "'") > 0) Then
  192. cQuote = """"
  193. Else
  194. cQuote = "'"
  195. End If
  196. IsStopWord = (Not m_oDocStw.selectSingleNode("//STOPWORD[@STOPWORD = " + cQuote + LCase$(strWord) + cQuote + "]") Is Nothing)
  197. Common_Exit:
  198. Exit Function
  199. End Function
  200. Sub GetRidOfStopSignsAndMultipleWhiteSpaces( _
  201. ByRef strQuery As String, _
  202. ByRef out_dictStopSigns As Scripting.Dictionary _
  203. ) ' As String
  204. If (Not out_dictStopSigns Is Nothing) Then
  205. out_dictStopSigns.RemoveAll
  206. Else
  207. Set out_dictStopSigns = New Scripting.Dictionary
  208. End If
  209. Dim i As Long, cSS As String
  210. For i = Len(strQuery) To 1 Step -1
  211. If (bIsStopSign(strQuery, i)) Then
  212. cSS = Mid$(strQuery, i, 1)
  213. strQuery = Left$(strQuery, i - 1) + " " + Mid$(strQuery, i + 1)
  214. If (Not out_dictStopSigns.Exists(cSS)) Then
  215. out_dictStopSigns.Add cSS, cSS
  216. End If
  217. End If
  218. Next i
  219. ' GetRidOfStopSignsAndMultipleWhiteSpaces = strQuery
  220. End Sub
  221. Function bIsStopSign(strWList As String, i As Long) As Boolean
  222. ' Dim strErrMsg As String: strErrMsg = "": If (g_bOnErrorHandling) Then On Error GoTo Error_Handler
  223. bIsStopSign = False
  224. If (i > Len(strWList)) Then GoTo Common_Exit
  225. Dim strStopSign As String: strStopSign = Mid$(strWList, i, 1)
  226. If (m_odictStopSigns.Exists(strStopSign)) Then
  227. Select Case m_odictStopSigns.Item(strStopSign)
  228. Case STOPSIGN_AT_END_OF_WORD
  229. ' We check whether the stop sign follows a space or any other character.
  230. If (i = 1) Then
  231. bIsStopSign = False
  232. Else
  233. Dim strPreviousSign As String: strPreviousSign = Mid$(strWList, i - 1, 1)
  234. Select Case strPreviousSign
  235. Case " ", vbTab
  236. bIsStopSign = False
  237. Case Else
  238. If (i = Len(strWList)) Then
  239. ' If it is the last character in string, then it is a stop sign.
  240. bIsStopSign = True
  241. Else
  242. ' In order to definitely establish that his stopword is at the
  243. ' end of a word, we need to look at the next character
  244. Dim strNextSign As String: strNextSign = Mid$(strWList, i + 1, 1)
  245. Select Case strNextSign
  246. Case " ", vbTab, """"
  247. bIsStopSign = True
  248. Case Else
  249. bIsStopSign = False
  250. End Select ' strNextSign
  251. End If
  252. End Select ' strPreviousSign
  253. End If ' ( i = 1 )
  254. Case Else
  255. ' This is a non-context sensitive Stopsign, so simple existence in the
  256. ' Stop Sign Dictionary means that it is indeed a stop sign
  257. bIsStopSign = True
  258. End Select
  259. Else
  260. ' It does not exist on the StopSign Map, so let's get out of here.
  261. bIsStopSign = False
  262. End If ' (m_odictStopSigns.Exists(strStopSign))
  263. Common_Exit:
  264. Exit Function
  265. Error_Handler:
  266. ' g_XErr.SetInfo "bIsStopSign", strErrMsg
  267. ' Err.Raise Err.Number
  268. End Function
  269. 'Function StopSign2WhiteSpace( _
  270. ' ByRef strQuery, _
  271. ' ByVal i As Long _
  272. ' ) As Boolean
  273. '
  274. '
  275. ' Const STOP_SIGN_AS_END_OF_WORD As Long = 3
  276. ' Const STOP_SIGN_ANYWHERE As Long = 0
  277. '
  278. ' Dim cSS As String: cSS = Mid$(strQuery, i, 1)
  279. ' Dim bIsStopsign As Boolean: bIsStopsign = False
  280. ' If (m_StopSignMap.Exists(cSS)) Then
  281. '
  282. ' Select Case m_StopSignMap.Item(cSS)
  283. ' Case STOPSIGN_AT_END_OF_WORD
  284. '
  285. ' If (i = 1) Then
  286. ' If we are at the beginning of a word, by definition we are NOT at the end of a word,
  287. ' bIsStopsign = False
  288. ' Else
  289. ' Dim strPreviousSign As String: strPreviousSign = Mid$(strWord, i - 1, 1)
  290. '
  291. ' Select Case strPreviousSign
  292. ' Case " ", vbTab
  293. ' bIsStopsign = False
  294. ' Case Else
  295. ' If (i = Len(strWord)) Then
  296. ' If it is the last character in string, then it is a stop sign.
  297. ' bIsStopsign = True
  298. ' Else
  299. ' In order to definitely establish that his stopword is at the
  300. ' end of a word, we need to look at the next character
  301. ' Dim strNextSign As String: strNextSign = Mid$(strWord, i + 1, 1)
  302. ' Select Case strNextSign
  303. ' Case " ", vbTab
  304. ' bIsStopsign = True
  305. ' Case Else
  306. ' bIsStopsign = False
  307. ' End Select ' strNextSign
  308. ' End If
  309. '
  310. ' End Select ' strPreviousSign
  311. ' End If ' ( i = 1 )
  312. '
  313. ' Case Else
  314. ' This is a non-context sensitive Stopsign, so simple existence in the
  315. ' Stop Sign Dictionary means that it is indeed a stop sign
  316. ' bIsStopsign = True
  317. ' End Select
  318. '
  319. ' Else
  320. ' It does not exist on the StopSign Map, so let's get out of here.
  321. ' bIsStopsign = False
  322. '
  323. ' End If ' (m_odictStopSigns.Exists(strStopSign))
  324. '
  325. 'Common_Exit:
  326. ' If (bIsStopsign) Then
  327. ' strQuery = Left$(strQuery, i - 1) + " " + Mid$(strQuery, i + 1)
  328. ' End If
  329. '
  330. ' Exit Function
  331. '
  332. 'Error_Handler:
  333. '
  334. 'End Function
  335. ' This function HAS to be called by bContainsStopSigns, as it is impossible for it to detect
  336. ' Runs of alternate context dependente and context independent Stopsigns.
  337. 'inline static void GetRidOfStopSignsAndMultipleWhiteSpaces( WCHAR *& szStr )
  338. '{
  339. ' WCHAR *psz = szStr ;
  340. ' for ( ; *psz ; ++ psz ) ;
  341. ' for (; psz >= szStr; -- psz )
  342. ' {
  343. ' StopSign2WhiteSpace( szStr, psz ) ;
  344. ' }
  345. '
  346. ' int iWspCount = 0;
  347. ' WCHAR *psz2 = psz ;
  348. ' for ( ; *psz ; ++ psz )
  349. ' {
  350. ' if ( iswspace( *psz ) )
  351. ' {
  352. ' ++ iWspCount ;
  353. ' if ( iWspCount > 1 )
  354. ' {
  355. ' continue ;
  356. ' }
  357. ' }
  358. ' Else
  359. ' {
  360. ' iWspCount = 0 ;
  361. ' }
  362. ' *psz2 = *psz ;
  363. ' ++ psz2 ;
  364. ' }
  365. ' *psz2 = 0 ;
  366. '}
  367. ' This function HAS to be called by bContainsStopSigns, as it is impossible for it to detect
  368. ' Runs of alternate context dependente and context independent Stopsigns.