Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

423 lines
17 KiB

  1. %{
  2. //--------------------------------------------------------------------
  3. // Microsoft Monarch
  4. //
  5. // Copyright (c) Microsoft Corporation, 1997 - 1999.
  6. //
  7. // @doc OPTIONAL EXTRACTION CODES
  8. //
  9. // @module ms-sql.l |
  10. // LEX tokenizer script
  11. //
  12. // @devnotes none
  13. //
  14. // @rev 0 | 04-Feb-97 | v-charca | Created
  15. //
  16. /**
  17. ** NOTE : when adding a new token (XXX) modify the following:
  18. ** 1.) Add %token _XXX to sql.y
  19. ** 2.) Add lexeme pattern to sql.l stating whether the token returns a TOKEN
  20. ** or a VALUE. If the token returns a value a node will need to be created to
  21. ** contain the value information. Therefore the VALUE macro will also need to
  22. ** specify a valid VARIANT type for the value.
  23. **/
  24. #include "msidxtr.h"
  25. #ifdef DEBUG
  26. # define YYTRACE(tknNum) LexerTrace(yytext, yyleng, tknNum);
  27. #else
  28. # define YYTRACE(tknNum)
  29. #endif
  30. #define TOKEN(tknNum) YYTRACE(tknNum) return(tknNum);
  31. #define VALUE(tknNum) \
  32. { \
  33. YYTRACE(tknNum) \
  34. CreateTknValue(yylval, tknNum); \
  35. return tknNum; \
  36. }
  37. #define STRING_VALUE(tknNum, wch, fQuote) \
  38. { \
  39. YYTRACE(tknNum) \
  40. CreateTknValue(yylval, tknNum, wch, fQuote); \
  41. return tknNum; \
  42. }
  43. #define ID_VALUE(tknNum, wch) \
  44. { \
  45. YYTRACE(tknNum) \
  46. CreateTknValue(yylval, tknNum, wch); \
  47. return _ID; \
  48. }
  49. /*
  50. ** Make Lex read from a block of data
  51. ** buffer is the character buffer,
  52. ** result is a variable to store the number of chars read
  53. ** ms is the size of the buffer
  54. */
  55. #undef YY_INPUT
  56. #define YY_INPUT(b, r, ms) (r = yybufferinput(b, ms))
  57. //--------------------------------------------------------------------------------------------
  58. // @func Makes a new copy of UNICODE string. Filters out double quotes
  59. // @side Allocates enough bytes to hold string
  60. // @rdesc Pointer to new UNICODE string
  61. LPWSTR PwszDupFilter(
  62. LPWSTR pwszOrig,
  63. WCHAR wch )
  64. {
  65. LPWSTR pwszCopy = (LPWSTR)CoTaskMemAlloc( (wcslen(pwszOrig)+2)*sizeof(WCHAR) );
  66. if ( 0 != pwszCopy )
  67. {
  68. LPWSTR pwsz = pwszCopy;
  69. while ( 0 != *pwszOrig )
  70. {
  71. if ( *(pwszOrig+1) && *(pwszOrig+1) == *pwszOrig && wch == *pwszOrig )
  72. pwszOrig++;
  73. else
  74. *pwsz++ = *pwszOrig++;
  75. }
  76. *pwsz = L'\0';
  77. }
  78. return pwszCopy;
  79. }
  80. //--------------------------------------------------------------------------------------------
  81. // YYLEXER::CreateTknValue
  82. // Creates a QUERYTREE node structure which is passed to the YACC value stack.
  83. // This routines uses the TokenInfo map to determine which opids to create for
  84. // the given string.
  85. //
  86. //
  87. void YYLEXER::CreateTknValue(
  88. YYSTYPE *ppct,
  89. short tknNum,
  90. YY_CHAR wch,
  91. BOOL fQuote )
  92. {
  93. // Note that values containing variants can only be CONSTANTS or ID's
  94. // SHOULD BE DONE BY valType
  95. switch ( tknNum )
  96. {
  97. case _ID:
  98. case _TEMPVIEW:
  99. {
  100. // Assume table_name for now. Might have to correct this when I
  101. // see the context in the parser.
  102. if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_WSTR)) )
  103. throw(E_OUTOFMEMORY);
  104. (*ppct)->op = DBOP_table_name;
  105. (*ppct)->wKind = DBVALUEKIND_WSTR;
  106. (*ppct)->value.pwszValue = CoTaskStrDup(yytext_ptr);
  107. if( 0 == (*ppct)->value.pwszValue )
  108. {
  109. DeleteDBQT( *ppct );
  110. *ppct = NULL;
  111. throw(E_OUTOFMEMORY);
  112. }
  113. _wcsupr((*ppct)->value.pwszValue);
  114. break;
  115. }
  116. case _DELIMITED_ID:
  117. {
  118. if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_WSTR)) )
  119. throw(E_OUTOFMEMORY);
  120. (*ppct)->op = DBOP_table_name;
  121. (*ppct)->wKind = DBVALUEKIND_WSTR;
  122. // Strip quotes on delimited identifier
  123. yytext_ptr[wcslen(yytext_ptr)-1] = L'\0';
  124. (*ppct)->value.pwszValue = PwszDupFilter(yytext_ptr+1, wch);
  125. if( 0 == (*ppct)->value.pwszValue )
  126. {
  127. DeleteDBQT( *ppct );
  128. *ppct = NULL;
  129. throw(E_OUTOFMEMORY);
  130. }
  131. break;
  132. }
  133. case _URL:
  134. case _STRING:
  135. case _PREFIX_STRING:
  136. {
  137. // NOTE: This is really a PROPVARIANT node, but there is no DBVALUEKIND for PROPVARIANT.
  138. if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )
  139. throw(E_OUTOFMEMORY);
  140. LPWSTR pwsz = yytext_ptr;
  141. LPWSTR pwszCopy = PwszDupFilter(pwsz, wch);
  142. if ( 0 == pwszCopy )
  143. {
  144. DeleteDBQT( *ppct );
  145. *ppct = NULL;
  146. throw(E_OUTOFMEMORY);
  147. }
  148. LPWSTR pwszTemp = pwszCopy;
  149. // Strip quotes on literals or
  150. if ( fQuote && (*pwszCopy == L'\"' || *pwszCopy == L'\'') )
  151. {
  152. pwszCopy++;
  153. Assert(pwszCopy[wcslen(pwszCopy)-1] == L'\"' || pwszCopy[wcslen(pwszCopy)-1] == L'\'');
  154. pwszCopy[wcslen(pwszCopy)-1] = L'\0';
  155. }
  156. ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( pwszCopy );
  157. CoTaskMemFree( pwszTemp ); // throw away temporary before testing for out of memory
  158. ((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;
  159. if( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )
  160. {
  161. DeleteDBQT( *ppct );
  162. *ppct = 0;
  163. throw(E_OUTOFMEMORY);
  164. }
  165. }
  166. break;
  167. case _INTNUM:
  168. if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )
  169. throw(E_OUTOFMEMORY);
  170. ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( yytext_ptr );
  171. ((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;
  172. if ( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )
  173. {
  174. DeleteDBQT( *ppct );
  175. *ppct = 0;
  176. throw(E_OUTOFMEMORY);
  177. }
  178. (*ppct)->hrError = PropVariantChangeTypeI64( (PROPVARIANT*)(*ppct)->value.pvValue );
  179. if ( FAILED((*ppct)->hrError) )
  180. {
  181. HRESULT hr = (*ppct)->hrError;
  182. DeleteDBQT( *ppct );
  183. *ppct = 0;
  184. throw(hr);
  185. }
  186. break;
  187. case _REALNUM:
  188. if ( 0 == (*ppct = PctAllocNode(DBVALUEKIND_VARIANT, DBOP_scalar_constant)) )
  189. throw(E_OUTOFMEMORY);
  190. ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal = SysAllocString( yytext_ptr );
  191. ((PROPVARIANT*)(*ppct)->value.pvValue)->vt = VT_BSTR;
  192. if ( 0 == ((PROPVARIANT*)(*ppct)->value.pvValue)->bstrVal )
  193. {
  194. DeleteDBQT( *ppct );
  195. *ppct = NULL;
  196. throw(E_OUTOFMEMORY);
  197. }
  198. (*ppct)->hrError = VariantChangeTypeEx( (*ppct)->value.pvarValue, // convert in place
  199. (*ppct)->value.pvarValue,
  200. LOCALE_SYSTEM_DEFAULT,
  201. 0,
  202. VT_R8 );
  203. if ( FAILED((*ppct)->hrError) )
  204. {
  205. HRESULT hr = (*ppct)->hrError;
  206. DeleteDBQT( *ppct );
  207. *ppct = 0;
  208. throw(hr);
  209. }
  210. break;
  211. default:
  212. Assert( !"Unkown token value" );
  213. }
  214. }
  215. %}
  216. %x contains
  217. %x cntntsrch
  218. %x scope0
  219. %x scope1
  220. %x scope2
  221. %x view
  222. white [ \t\n\f\r]+
  223. id [a-zA-Z][a-zA-Z0-9_]*
  224. simpleterm ([^ \n\t\f\r\'\(\)\[\]\&\|\~\!\,]+|\'\')*
  225. br_id ([^\"\n]*|\"\")*
  226. integer [-+]?[0-9]+|[-+]?0x[a-fA-F0-9]+
  227. real [-+]?([0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?|-?\.[0-9]+([eE][-+]?[0-9]+)?
  228. quoted_string \'([^'\n]*|\'\')*\'
  229. string \'[^'\n]*\'
  230. comment --[^\n]*
  231. %%
  232. %{
  233. /***
  234. *** Reserved words in every context
  235. ***/
  236. %}
  237. ALL { TOKEN(_ALL); }
  238. AND { TOKEN(_AND); }
  239. ANY { TOKEN(_ANY); }
  240. ARRAY { TOKEN(_ARRAY); }
  241. AS { TOKEN(_AS); }
  242. ASC { TOKEN(_ASC); }
  243. CAST { TOKEN(_CAST); }
  244. CREATE { TOKEN(_CREATE); }
  245. CONTAINS { BEGIN contains;TOKEN(_CONTAINS); }
  246. DESC { TOKEN(_DESC); }
  247. DROP { TOKEN(_DROP); }
  248. FALSE { TOKEN(_FALSE); }
  249. FREETEXT { TOKEN(_FREETEXT); }
  250. FROM { TOKEN(_FROM); }
  251. IS { TOKEN(_IS); }
  252. IS{white}NOT { TOKEN(_IS_NOT); }
  253. LIKE { TOKEN(_LIKE); }
  254. MATCHES { TOKEN(_MATCHES); }
  255. NOT { TOKEN(_NOT); }
  256. NOT{white}LIKE { TOKEN(_NOT_LIKE); }
  257. NULL { TOKEN(_NULL); }
  258. OR { TOKEN(_OR); }
  259. ORDER{white}BY { TOKEN(_ORDER_BY); }
  260. PASSTHROUGH { TOKEN(_PASSTHROUGH); }
  261. PROPERTYNAME { TOKEN(_PROPERTYNAME); }
  262. PROPID { TOKEN(_PROPID); }
  263. RANKMETHOD { TOKEN(_RANKMETHOD); }
  264. SCOPE { BEGIN scope0; TOKEN(_SCOPE); }
  265. SELECT { TOKEN(_SELECT); }
  266. SET { TOKEN(_SET); }
  267. SOME { TOKEN(_SOME); }
  268. TABLE { TOKEN(_TABLE); }
  269. TRUE { TOKEN(_TRUE); }
  270. TYPE { TOKEN(_TYPE); }
  271. UNION { TOKEN(_UNION); }
  272. UNKNOWN { TOKEN(_UNKNOWN); }
  273. VIEW { TOKEN(_VIEW); }
  274. WHERE { TOKEN(_WHERE); }
  275. {white} { /* empty lex rule */ }
  276. {id} { VALUE(_ID); }
  277. \#{id} { VALUE(_TEMPVIEW); }
  278. \#\#{id} { VALUE(_TEMPVIEW); }
  279. \"{br_id}\" { ID_VALUE(_DELIMITED_ID, L'"'); }
  280. {quoted_string} { STRING_VALUE(_STRING, L'\'', TRUE);}
  281. {integer} { VALUE(_INTNUM); }
  282. {real} { VALUE(_REALNUM); }
  283. {comment} { /* empty lex rule */ }
  284. \>\= { TOKEN(_GE); }
  285. \<\= { TOKEN(_LE); }
  286. \<\> { TOKEN(_NE); }
  287. \!\= { TOKEN(_NE); }
  288. \. { TOKEN(_DOT); }
  289. \.\. { BEGIN view; TOKEN(_DOTDOT); }
  290. \.\.\. { BEGIN view; TOKEN(_DOTDOTDOT); }
  291. \.\.SCOPE { BEGIN scope0; TOKEN(_DOTDOT_SCOPE);}
  292. \.\.\.\SCOPE { BEGIN scope0; TOKEN(_DOTDOTDOT_SCOPE);}
  293. . { YYTRACE(yytext[0]); return yytext[0]; }
  294. %{
  295. /***
  296. *** A <contains predicate> has been started. The only things we should see are:
  297. *** ( - matched by .
  298. *** <column reference> - matched by {id} or "{br_id}"
  299. *** , - matched by .
  300. *** ' - matched by \'. Also switch to content search state (cntnsrch).
  301. ***/
  302. %}
  303. <contains>\' { BEGIN cntntsrch;YYTRACE(yytext[0]); return yytext[0];}
  304. <contains>{id} { VALUE(_ID); }
  305. <contains>\"{br_id}\" { ID_VALUE(_DELIMITED_ID, L'"'); }
  306. <contains>{white} { /* empty lex rule */ }
  307. <contains>. { YYTRACE(yytext[0]); return yytext[0];}
  308. %{
  309. /***
  310. *** The only things we should see are:
  311. *** <global view name> - matched by {id}
  312. *** _TEMPVIEW - matched by \#{id} or \#\#{id}
  313. ***/
  314. %}
  315. <view>{id} { BEGIN INITIAL; VALUE(_ID); }
  316. <view>\#{id} { BEGIN INITIAL; VALUE(_TEMPVIEW); }
  317. <view>\#\#{id} { BEGIN INITIAL; VALUE(_TEMPVIEW); }
  318. %{
  319. /***
  320. *** A <content search condition> has been started. There are several keywords we can see here.
  321. *** We are also looking for a quoted string, a prefix string, or a simple term. We are taken
  322. *** back to the initial state by a single quote (').
  323. ***/
  324. %}
  325. <cntntsrch>{white}AND{white} { unput(L' '); TOKEN(_AND); }
  326. <cntntsrch>COERCE { TOKEN(_COERCE); }
  327. <cntntsrch>ISABOUT { TOKEN(_ISABOUT); }
  328. <cntntsrch>{white}NEAR { TOKEN(_NEAR); }
  329. <cntntsrch>{white}NOT{white} { unput(L' '); TOKEN(_NOT); }
  330. <cntntsrch>{white}OR{white} { unput(L' '); TOKEN(_OR); }
  331. <cntntsrch>FORMSOF { TOKEN(_FORMSOF); }
  332. <cntntsrch>WEIGHT { TOKEN(_WEIGHT); }
  333. <cntntsrch>\"{br_id}\*\" { STRING_VALUE(_PREFIX_STRING, L'\'', TRUE);}
  334. <cntntsrch>\"{br_id}\" { STRING_VALUE(_STRING, L'\'', TRUE);}
  335. <cntntsrch>\' { BEGIN INITIAL; YYTRACE(yytext[0]); return yytext[0];}
  336. <cntntsrch>{white} { /* empty lex rule */ }
  337. <cntntsrch>{simpleterm} { STRING_VALUE(_STRING, L'\'', FALSE)}
  338. <cntntsrch>. { YYTRACE(yytext[0]); return yytext[0];}
  339. %{
  340. /***
  341. *** A <from clause> has been started. We've already seen the keyword SCOPE, so this
  342. *** is not a FROM <view name>. We're just looking for a ( now to put us into the
  343. *** next state (scope1).
  344. *** ( - matched by \(. Also switch to scope1 state.
  345. ***/
  346. %}
  347. <scope0>\( { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}
  348. <scope0>{white} { /* empty lex rule */ }
  349. <scope0>. { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}
  350. %{
  351. /***
  352. *** We're in the middle of a <from clause>. We've seen FROM SCOPE(, so now we need to recognize
  353. *** the various scope definitions that we might see here. The two important things to recognize
  354. *** are:
  355. *** ( - matched by \(. Also switch to scope2 state to match parens.
  356. *** ) - matched by \). Also switch to the initial (finished <from clause>).
  357. ***/
  358. %}
  359. <scope1>\"{br_id}\" { STRING_VALUE(_URL, L'"', TRUE); }
  360. <scope1>ALL { TOKEN(_ALL); }
  361. <scope1>DEEP{white}TRAVERSAL { TOKEN(_DEEP_TRAVERSAL); }
  362. <scope1>EXCLUDE{white}SEARCH{white}TRAVERSAL { TOKEN(_EXCLUDE_SEARCH_TRAVERSAL);}
  363. <scope1>OF { TOKEN(_OF); }
  364. <scope1>SHALLOW{white}TRAVERSAL { TOKEN(_SHALLOW_TRAVERSAL); }
  365. <scope1>{white} { /* empty lex rule */ }
  366. <scope1>\( { BEGIN scope2; YYTRACE(yytext[0]); return yytext[0];}
  367. <scope1>\) { BEGIN INITIAL; YYTRACE(yytext[0]); return yytext[0];}
  368. <scope1>. { YYTRACE(yytext[0]); return yytext[0];}
  369. %{
  370. /***
  371. *** We're still in the middle of a <from clause>. So far we've seen:
  372. *** FROM SCOPE( ... (
  373. *** We need to find a ')' to finish out the element we're working on:
  374. *** ) - matched by \). Also switch back to scope1 state.
  375. ***/
  376. %}
  377. <scope2>{white} { /* empty lex rule */ }
  378. <scope2>\"{br_id}\" { STRING_VALUE(_URL, L'"', TRUE); }
  379. <scope2>\) { BEGIN scope1; YYTRACE(yytext[0]); return yytext[0];}
  380. <scope2>. { YYTRACE(yytext[0]); return yytext[0];}
  381. %%