Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1722 lines
48 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //=============================================================================//
  8. /*
  9. *
  10. * Copyright (c) 1998-9
  11. * Dr John Maddock
  12. *
  13. * Permission to use, copy, modify, distribute and sell this software
  14. * and its documentation for any purpose is hereby granted without fee,
  15. * provided that the above copyright notice appear in all copies and
  16. * that both that copyright notice and this permission notice appear
  17. * in supporting documentation. Dr John Maddock makes no representations
  18. * about the suitability of this software for any purpose.
  19. * It is provided "as is" without express or implied warranty.
  20. *
  21. */
  22. /*
  23. * FILE regex.h
  24. * VERSION 2.12
  25. */
  26. /* start with C compatability API */
  27. #ifndef __REGEX_H
  28. #define __REGEX_H
  29. #include <cregex>
  30. #ifdef __cplusplus
  31. // what follows is all C++ don't include in C builds!!
  32. #include <new.h>
  33. #if !defined(JM_NO_TYPEINFO)
  34. #include <typeinfo>
  35. #endif
  36. #include <string.h>
  37. #include <jm/jstack.h>
  38. #include <jm/re_raw.h>
  39. #include <jm/re_nls.h>
  40. #include <jm/regfac.h>
  41. #include <jm/re_cls.h>
  42. #include <jm/re_coll.h>
  43. #include <jm/re_kmp.h>
  44. JM_NAMESPACE(__JM)
  45. //
  46. // define error hanling classes
  47. #if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_EXCEPTION_H)
  48. // standard classes are available:
  49. class JM_IX_DECL bad_expression : public __JM_STD::exception
  50. {
  51. #ifdef RE_LOCALE_CPP
  52. __JM_STD::string code;
  53. public:
  54. bad_expression(const __JM_STD::string& s) : code(s) {}
  55. #else
  56. unsigned int code;
  57. public:
  58. bad_expression(unsigned int err) : code(err) {}
  59. #endif
  60. bad_expression(const bad_expression& e) : __JM_STD::exception(e), code(e.code) {}
  61. bad_expression& operator=(const bad_expression& e)
  62. {
  63. #ifdef _MSC_VER
  64. static_cast<__JM_STD::exception*>(this)->operator=(e);
  65. #else
  66. __JM_STD::exception::operator=(e);
  67. #endif
  68. code = e.code;
  69. return *this;
  70. }
  71. virtual const char* what()const throw();
  72. };
  73. #elif !defined(JM_NO_EXCEPTIONS)
  74. // no standard classes, do it ourselves:
  75. class JM_IX_DECL bad_expression
  76. {
  77. #ifdef RE_LOCALE_CPP
  78. __JM_STD::string code;
  79. public:
  80. bad_expression(const __JM_STD::string& s) : code(s) {}
  81. #else
  82. unsigned int code;
  83. public:
  84. bad_expression(unsigned int err) : code(err) {}
  85. #endif
  86. bad_expression(const bad_expression& e) : code(e.code) {}
  87. bad_expression& operator=(const bad_expression& e) { code = e.code; return *this; }
  88. virtual const char* what()const throw();
  89. };
  90. #endif
  91. //
  92. // define default traits classes for char and wchar_t types:
  93. //
  94. struct re_set_long;
  95. struct re_syntax_base;
  96. enum char_syntax_type
  97. {
  98. syntax_char = 0,
  99. syntax_open_bracket = 1, // (
  100. syntax_close_bracket = 2, // )
  101. syntax_dollar = 3, // $
  102. syntax_caret = 4, // ^
  103. syntax_dot = 5, // .
  104. syntax_star = 6, // *
  105. syntax_plus = 7, // +
  106. syntax_question = 8, // ?
  107. syntax_open_set = 9, // [
  108. syntax_close_set = 10, // ]
  109. syntax_or = 11, // |
  110. syntax_slash = 12, //
  111. syntax_hash = 13, // #
  112. syntax_dash = 14, // -
  113. syntax_open_brace = 15, // {
  114. syntax_close_brace = 16, // }
  115. syntax_digit = 17, // 0-9
  116. syntax_b = 18, // for \b
  117. syntax_B = 19, // for \B
  118. syntax_left_word = 20, // for \<
  119. syntax_right_word = 21, // for \>
  120. syntax_w = 22, // for \w
  121. syntax_W = 23, // for \W
  122. syntax_start_buffer = 24, // for \`
  123. syntax_end_buffer = 25, // for \'
  124. syntax_newline = 26, // for newline alt
  125. syntax_comma = 27, // for {x,y}
  126. syntax_a = 28, // for \a
  127. syntax_f = 29, // for \f
  128. syntax_n = 30, // for \n
  129. syntax_r = 31, // for \r
  130. syntax_t = 32, // for \t
  131. syntax_v = 33, // for \v
  132. syntax_x = 34, // for \xdd
  133. syntax_c = 35, // for \cx
  134. syntax_colon = 36, // for [:...:]
  135. syntax_equal = 37, // for [=...=]
  136. // perl ops:
  137. syntax_e = 38, // for \e
  138. syntax_l = 39, // for \l
  139. syntax_L = 40, // for \L
  140. syntax_u = 41, // for \u
  141. syntax_U = 42, // for \U
  142. syntax_s = 43, // for \s
  143. syntax_S = 44, // for \S
  144. syntax_d = 45, // for \d
  145. syntax_D = 46, // for \D
  146. syntax_E = 47, // for \Q\E
  147. syntax_Q = 48, // for \Q\E
  148. syntax_X = 49, // for \X
  149. syntax_C = 50, // for \C
  150. syntax_Z = 51, // for \Z
  151. syntax_G = 52, // for \G
  152. syntax_max = 53
  153. };
  154. template <class charT>
  155. class char_regex_traits
  156. {
  157. public:
  158. typedef charT char_type;
  159. //
  160. // uchar_type is the same size as char_type
  161. // but must be unsigned:
  162. typedef charT uchar_type;
  163. //
  164. // size_type is normally the same as charT
  165. // but could be unsigned int to improve performance
  166. // of narrow character types, NB must be unsigned:
  167. typedef jm_uintfast32_t size_type;
  168. // length:
  169. // returns the length of a null terminated string
  170. // can be left unimplimented for non-character types.
  171. static size_t length(const char_type* );
  172. // syntax_type
  173. // returns the syntax type of a given charT
  174. // translates customised syntax to a unified enum.
  175. static unsigned int syntax_type(size_type c);
  176. // translate:
  177. //
  178. static charT RE_CALL translate(charT c, bool icase
  179. #ifdef RE_LOCALE_CPP
  180. , const __JM_STD::locale&
  181. #endif
  182. );
  183. // transform:
  184. //
  185. // converts a string into a sort key for locale dependant
  186. // character ranges.
  187. static void RE_CALL transform(re_str<charT>& out, const re_str<charT>& in
  188. #ifdef RE_LOCALE_CPP
  189. , const __JM_STD::locale&
  190. #endif
  191. );
  192. // transform_primary:
  193. //
  194. // converts a string into a primary sort key for locale dependant
  195. // equivalence classes.
  196. static void RE_CALL transform_primary(re_str<charT>& out, const re_str<charT>& in
  197. #ifdef RE_LOCALE_CPP
  198. , const __JM_STD::locale&
  199. #endif
  200. );
  201. // is_separator
  202. // returns true if c is a newline character
  203. static bool RE_CALL is_separator(charT c);
  204. // is_combining
  205. // returns true if the character is a unicode
  206. // combining character
  207. static bool RE_CALL is_combining(charT c);
  208. // is_class
  209. // returns true if the character is a member
  210. // of the specified character class
  211. static bool RE_CALL is_class(charT c, jm_uintfast32_t f
  212. #ifdef RE_LOCALE_CPP
  213. , const __JM_STD::locale&
  214. #endif
  215. );
  216. // toi
  217. // converts c to integer
  218. static int RE_CALL toi(charT c
  219. #ifdef RE_LOCALE_CPP
  220. , const __JM_STD::locale&
  221. #endif
  222. );
  223. // toi
  224. // converts multi-character value to int
  225. // updating first as required
  226. static int RE_CALL toi(const charT*& first, const charT* last, int radix
  227. #ifdef RE_LOCALE_CPP
  228. , const __JM_STD::locale&
  229. #endif
  230. );
  231. // lookup_classname
  232. // parses a class declaration of the form [:class:]
  233. // On entry first points to the first character of the class name.
  234. //
  235. static jm_uintfast32_t RE_CALL lookup_classname(const charT* first, const charT* last
  236. #ifdef RE_LOCALE_CPP
  237. , const __JM_STD::locale&
  238. #endif
  239. );
  240. // lookup_collatename
  241. // parses a collating element declaration of the form [.collating_name.]
  242. // On entry first points to the first character of the collating element name.
  243. //
  244. static bool RE_CALL lookup_collatename(re_str<charT>& s, const charT* first, const charT* last
  245. #ifdef RE_LOCALE_CPP
  246. , const __JM_STD::locale&
  247. #endif
  248. );
  249. };
  250. JM_TEMPLATE_SPECIALISE
  251. class char_regex_traits<char>
  252. {
  253. public:
  254. typedef char char_type;
  255. typedef unsigned char uchar_type;
  256. typedef unsigned int size_type;
  257. static size_t RE_CALL length(const char_type* p)
  258. {
  259. return strlen(p);
  260. }
  261. static unsigned int RE_CALL syntax_type(size_type c
  262. #ifdef RE_LOCALE_CPP
  263. , const __JM_STD::locale& l
  264. #endif
  265. )
  266. {
  267. #ifdef RE_LOCALE_CPP
  268. return JM_USE_FACET(l, regfacet<char>).syntax_type((char)c);
  269. #else
  270. return re_syntax_map[c];
  271. #endif
  272. }
  273. static char RE_CALL translate(char c, bool icase
  274. #ifdef RE_LOCALE_CPP
  275. , const __JM_STD::locale& l
  276. #endif
  277. )
  278. {
  279. #ifdef RE_LOCALE_CPP
  280. return icase ? JM_USE_FACET(l, __JM_STD::ctype<char>).tolower((char_type)c) : c;
  281. #else
  282. return icase ? re_lower_case_map[(size_type)(uchar_type)c] : c;
  283. #endif
  284. }
  285. static void RE_CALL transform(re_str<char>& out, const re_str<char>& in
  286. #ifdef RE_LOCALE_CPP
  287. , const __JM_STD::locale& l
  288. #endif
  289. )
  290. {
  291. #ifndef RE_LOCALE_CPP
  292. re_transform(out, in);
  293. #else
  294. out = JM_USE_FACET(l, __JM_STD::collate<char>).transform(in.c_str(), in.c_str() + in.size()).c_str();
  295. #endif
  296. }
  297. static void RE_CALL transform_primary(re_str<char>& out, const re_str<char>& in
  298. #ifdef RE_LOCALE_CPP
  299. , const __JM_STD::locale& l
  300. #endif
  301. )
  302. {
  303. transform(out, in MAYBE_PASS_LOCALE(l));
  304. #ifdef RE_LOCALE_W32
  305. re_trunc_primary(out);
  306. #else
  307. unsigned n = in.size() + out.size() / 4;
  308. if(n < out.size())
  309. out[n] = 0;
  310. #endif
  311. }
  312. static bool RE_CALL is_separator(char c)
  313. {
  314. return JM_MAKE_BOOL((c == '\n') || (c == '\r'));
  315. }
  316. static bool RE_CALL is_combining(char)
  317. {
  318. return false;
  319. }
  320. static bool RE_CALL is_class(char c, jm_uintfast32_t f
  321. #ifdef RE_LOCALE_CPP
  322. , const __JM_STD::locale& l
  323. #endif
  324. )
  325. {
  326. #ifdef RE_LOCALE_CPP
  327. if(JM_USE_FACET(l, __JM_STD::ctype<char>).is((__JM_STD::ctype<char>::mask)(f & char_class_all_base), c))
  328. return true;
  329. if((f & char_class_underscore) && (c == '_'))
  330. return true;
  331. if((f & char_class_blank) && ((c == ' ') || (c == '\t')))
  332. return true;
  333. return false;
  334. #else
  335. return JM_MAKE_BOOL(re_class_map[(size_type)(uchar_type)c] & f);
  336. #endif
  337. }
  338. static int RE_CALL toi(char c
  339. #ifdef RE_LOCALE_CPP
  340. , const __JM_STD::locale& l
  341. #endif
  342. )
  343. {
  344. return re_toi(c MAYBE_PASS_LOCALE(l));
  345. }
  346. static int RE_CALL toi(const char*& first, const char* last, int radix
  347. #ifdef RE_LOCALE_CPP
  348. , const __JM_STD::locale& l
  349. #endif
  350. )
  351. {
  352. return re_toi(first, last, radix MAYBE_PASS_LOCALE(l));
  353. }
  354. static jm_uintfast32_t RE_CALL lookup_classname(const char* first, const char* last
  355. #ifdef RE_LOCALE_CPP
  356. , const __JM_STD::locale& l
  357. #endif
  358. )
  359. {
  360. #ifdef RE_LOCALE_CPP
  361. return JM_USE_FACET(l, regfacet<char>).lookup_classname(first, last);
  362. #else
  363. return re_lookup_class(first, last);
  364. #endif
  365. }
  366. static bool RE_CALL lookup_collatename(re_str<char>& s, const char* first, const char* last
  367. #ifdef RE_LOCALE_CPP
  368. , const __JM_STD::locale& l
  369. #endif
  370. )
  371. {
  372. #ifdef RE_LOCALE_CPP
  373. re_str<char> n(first, last);
  374. return JM_USE_FACET(l, regfacet<char>).lookup_collatename(s, n);
  375. #else
  376. return re_lookup_collate(s, first, last);
  377. #endif
  378. }
  379. };
  380. #ifndef JM_NO_WCSTRING
  381. JM_TEMPLATE_SPECIALISE
  382. class char_regex_traits<wchar_t>
  383. {
  384. public:
  385. typedef wchar_t char_type;
  386. typedef unsigned short uchar_type;
  387. typedef unsigned int size_type;
  388. static size_t RE_CALL length(const char_type* p)
  389. {
  390. return wcslen(p);
  391. }
  392. static unsigned int RE_CALL syntax_type(size_type c
  393. #ifdef RE_LOCALE_CPP
  394. , const __JM_STD::locale& l
  395. #endif
  396. )
  397. {
  398. #ifdef RE_LOCALE_CPP
  399. return JM_USE_FACET(l, regfacet<wchar_t>).syntax_type((wchar_t)c);
  400. #else
  401. return re_get_syntax_type(c);
  402. #endif
  403. }
  404. static wchar_t RE_CALL translate(wchar_t c, bool icase
  405. #ifdef RE_LOCALE_CPP
  406. , const __JM_STD::locale& l
  407. #endif
  408. )
  409. {
  410. #ifdef RE_LOCALE_CPP
  411. return icase ? JM_USE_FACET(l, __JM_STD::ctype<wchar_t>).tolower((char_type)c) : c;
  412. #else
  413. return icase ? ((c < 256) ? re_lower_case_map_w[(uchar_type)c] : re_wtolower(c)) : c;
  414. #endif
  415. }
  416. static void RE_CALL transform(re_str<wchar_t>& out, const re_str<wchar_t>& in
  417. #ifdef RE_LOCALE_CPP
  418. , const __JM_STD::locale& l
  419. #endif
  420. )
  421. {
  422. #ifndef RE_LOCALE_CPP
  423. re_transform(out, in);
  424. #else
  425. out = JM_USE_FACET(l, __JM_STD::collate<wchar_t>).transform(in.c_str(), in.c_str() + in.size()).c_str();
  426. #endif
  427. }
  428. static void RE_CALL transform_primary(re_str<wchar_t>& out, const re_str<wchar_t>& in
  429. #ifdef RE_LOCALE_CPP
  430. , const __JM_STD::locale& l
  431. #endif
  432. )
  433. {
  434. transform(out, in MAYBE_PASS_LOCALE(l));
  435. #ifdef RE_LOCALE_W32
  436. re_trunc_primary(out);
  437. #else
  438. unsigned n = in.size() + out.size() / 4;
  439. if(n < out.size())
  440. out[n] = 0;
  441. #endif
  442. }
  443. static bool RE_CALL is_separator(wchar_t c)
  444. {
  445. return JM_MAKE_BOOL((c == L'\n') || (c == L'\r') || (c == (wchar_t)0x2028) || (c == (wchar_t)0x2029));
  446. }
  447. static bool RE_CALL is_combining(wchar_t c)
  448. {
  449. return re_is_combining(c);
  450. }
  451. static bool RE_CALL is_class(wchar_t c, jm_uintfast32_t f
  452. #ifdef RE_LOCALE_CPP
  453. , const __JM_STD::locale& l
  454. #endif
  455. )
  456. {
  457. #ifdef RE_LOCALE_CPP
  458. if(JM_USE_FACET(l, __JM_STD::ctype<wchar_t>).is((__JM_STD::ctype<wchar_t>::mask)(f & char_class_all_base), c))
  459. return true;
  460. if((f & char_class_underscore) && (c == '_'))
  461. return true;
  462. if((f & char_class_blank) && ((c == ' ') || (c == '\t')))
  463. return true;
  464. if((f & char_class_unicode) && (c > (size_type)(uchar_type)255))
  465. return true;
  466. return false;
  467. #else
  468. return JM_MAKE_BOOL(((uchar_type)c < 256) ? (re_unicode_classes[(size_type)(uchar_type)c] & f) : re_iswclass(c, f));
  469. #endif
  470. }
  471. static int RE_CALL toi(wchar_t c
  472. #ifdef RE_LOCALE_CPP
  473. , const __JM_STD::locale& l
  474. #endif
  475. )
  476. {
  477. return re_toi(c MAYBE_PASS_LOCALE(l));
  478. }
  479. static int RE_CALL toi(const wchar_t*& first, const wchar_t* last, int radix
  480. #ifdef RE_LOCALE_CPP
  481. , const __JM_STD::locale& l
  482. #endif
  483. )
  484. {
  485. return re_toi(first, last, radix MAYBE_PASS_LOCALE(l));
  486. }
  487. static jm_uintfast32_t RE_CALL lookup_classname(const wchar_t* first, const wchar_t* last
  488. #ifdef RE_LOCALE_CPP
  489. , const __JM_STD::locale& l
  490. #endif
  491. )
  492. {
  493. #ifdef RE_LOCALE_CPP
  494. return JM_USE_FACET(l, regfacet<wchar_t>).lookup_classname(first, last);
  495. #else
  496. return re_lookup_class(first, last);
  497. #endif
  498. }
  499. static bool RE_CALL lookup_collatename(re_str<wchar_t>& s, const wchar_t* first, const wchar_t* last
  500. #ifdef RE_LOCALE_CPP
  501. , const __JM_STD::locale& l
  502. #endif
  503. )
  504. {
  505. #ifdef RE_LOCALE_CPP
  506. re_str<wchar_t> n(first, last);
  507. return JM_USE_FACET(l, regfacet<wchar_t>).lookup_collatename(s, n);
  508. #else
  509. return re_lookup_collate(s, first, last);
  510. #endif
  511. }
  512. };
  513. #endif
  514. //
  515. // class char_regex_traits_i
  516. // provides case insensitive traits classes:
  517. template <class charT>
  518. class char_regex_traits_i : public char_regex_traits<charT> {};
  519. JM_TEMPLATE_SPECIALISE
  520. class char_regex_traits_i<char> : public char_regex_traits<char>
  521. {
  522. public:
  523. typedef char char_type;
  524. typedef unsigned char uchar_type;
  525. typedef unsigned int size_type;
  526. typedef char_regex_traits<char> base_type;
  527. static char RE_CALL translate(char c, bool
  528. #ifdef RE_LOCALE_CPP
  529. , const __JM_STD::locale& l
  530. #endif
  531. )
  532. {
  533. #ifdef RE_LOCALE_CPP
  534. return JM_USE_FACET(l, __JM_STD::ctype<char>).tolower((char_type)c);
  535. #else
  536. return re_lower_case_map[(size_type)(uchar_type)c];
  537. #endif
  538. }
  539. };
  540. #ifndef JM_NO_WCSTRING
  541. JM_TEMPLATE_SPECIALISE
  542. class char_regex_traits_i<wchar_t> : public char_regex_traits<wchar_t>
  543. {
  544. public:
  545. typedef wchar_t char_type;
  546. typedef unsigned short uchar_type;
  547. typedef unsigned int size_type;
  548. typedef char_regex_traits<wchar_t> base_type;
  549. static wchar_t RE_CALL translate(wchar_t c, bool
  550. #ifdef RE_LOCALE_CPP
  551. , const __JM_STD::locale& l
  552. #endif
  553. )
  554. {
  555. #ifdef RE_LOCALE_CPP
  556. return JM_USE_FACET(l, __JM_STD::ctype<wchar_t>).tolower((char_type)c);
  557. #else
  558. return (c < 256) ? re_lower_case_map_w[(uchar_type)c] : re_wtolower(c);
  559. #endif
  560. }
  561. static jm_uintfast32_t RE_CALL lookup_classname(const wchar_t* first, const wchar_t* last
  562. #ifdef RE_LOCALE_CPP
  563. , const __JM_STD::locale& l
  564. #endif
  565. )
  566. {
  567. jm_uintfast32_t result = char_regex_traits<wchar_t>::lookup_classname(first, last MAYBE_PASS_LOCALE(l));
  568. if((result & char_class_upper) == char_class_upper)
  569. result |= char_class_alpha;
  570. return result;
  571. }
  572. };
  573. #endif
  574. enum mask_type
  575. {
  576. mask_take = 1,
  577. mask_skip = 2,
  578. mask_any = mask_skip | mask_take,
  579. mask_all = mask_any
  580. };
  581. struct __narrow_type{};
  582. struct __wide_type{};
  583. template <class charT>
  584. class is_byte;
  585. JM_TEMPLATE_SPECIALISE
  586. class is_byte<char>
  587. {
  588. public:
  589. typedef __narrow_type width_type;
  590. };
  591. JM_TEMPLATE_SPECIALISE
  592. class is_byte<unsigned char>
  593. {
  594. public:
  595. typedef __narrow_type width_type;
  596. };
  597. JM_TEMPLATE_SPECIALISE
  598. class is_byte<signed char>
  599. {
  600. public:
  601. typedef __narrow_type width_type;
  602. };
  603. template <class charT>
  604. class is_byte
  605. {
  606. public:
  607. typedef __wide_type width_type;
  608. };
  609. //
  610. // compiled structures
  611. //
  612. // the following defs describe the format of the compiled string
  613. //
  614. //
  615. // enum syntax_element_type
  616. // describes the type of a record
  617. enum syntax_element_type
  618. {
  619. syntax_element_startmark = 0,
  620. syntax_element_endmark = syntax_element_startmark + 1,
  621. syntax_element_literal = syntax_element_endmark + 1,
  622. syntax_element_start_line = syntax_element_literal + 1,
  623. syntax_element_end_line = syntax_element_start_line + 1,
  624. syntax_element_wild = syntax_element_end_line + 1,
  625. syntax_element_match = syntax_element_wild + 1,
  626. syntax_element_word_boundary = syntax_element_match + 1,
  627. syntax_element_within_word = syntax_element_word_boundary + 1,
  628. syntax_element_word_start = syntax_element_within_word + 1,
  629. syntax_element_word_end = syntax_element_word_start + 1,
  630. syntax_element_buffer_start = syntax_element_word_end + 1,
  631. syntax_element_buffer_end = syntax_element_buffer_start + 1,
  632. syntax_element_backref = syntax_element_buffer_end + 1,
  633. syntax_element_long_set = syntax_element_backref + 1,
  634. syntax_element_set = syntax_element_long_set + 1,
  635. syntax_element_jump = syntax_element_set + 1,
  636. syntax_element_alt = syntax_element_jump + 1,
  637. syntax_element_rep = syntax_element_alt + 1,
  638. syntax_element_combining = syntax_element_rep + 1,
  639. syntax_element_soft_buffer_end = syntax_element_combining + 1,
  640. syntax_element_restart_continue = syntax_element_soft_buffer_end + 1
  641. };
  642. union offset_type
  643. {
  644. re_syntax_base* p;
  645. unsigned i;
  646. };
  647. //
  648. // struct re_syntax_base
  649. // base class for all syntax types:
  650. struct re_syntax_base
  651. {
  652. syntax_element_type type;
  653. offset_type next;
  654. unsigned int can_be_null;
  655. };
  656. //
  657. // struct re_brace
  658. // marks start or end of (...)
  659. struct re_brace : public re_syntax_base
  660. {
  661. unsigned int index;
  662. };
  663. //
  664. // struct re_literal
  665. // marks a literal string and
  666. // is followed by an array of charT[length]:
  667. struct re_literal : public re_syntax_base
  668. {
  669. unsigned int length;
  670. };
  671. //
  672. // struct re_long_set
  673. // provides data for sets [...] containing
  674. // wide characters
  675. struct re_set_long : public re_syntax_base
  676. {
  677. unsigned int csingles, cranges, cequivalents;
  678. jm_uintfast32_t cclasses;
  679. bool isnot;
  680. };
  681. //
  682. // struct re_set
  683. // provides a map of bools for sets containing
  684. // narrow, single byte characters.
  685. struct re_set : public re_syntax_base
  686. {
  687. unsigned char __map[256];
  688. };
  689. //
  690. // struct re_jump
  691. // provides alternative next destination
  692. struct re_jump : public re_syntax_base
  693. {
  694. offset_type alt;
  695. unsigned char __map[256];
  696. };
  697. //
  698. // struct re_repeat
  699. // provides repeat expressions
  700. struct re_repeat : public re_jump
  701. {
  702. unsigned min, max;
  703. int id;
  704. bool leading;
  705. };
  706. //
  707. // enum re_jump_size_type
  708. // provides compiled size of re_jump
  709. // allowing for trailing alignment
  710. // provide this so we know how many
  711. // bytes to insert
  712. enum re_jump_size_type
  713. {
  714. re_jump_size = (sizeof(re_jump) + sizeof(padding) - 1) & ~(sizeof(padding) - 1),
  715. re_repeater_size = (sizeof(re_repeat) + sizeof(padding) - 1) & ~(sizeof(padding) - 1)
  716. };
  717. //
  718. // class basic_regex
  719. // handles error codes and flags
  720. class JM_IX_DECL regbase
  721. {
  722. protected:
  723. #ifdef RE_LOCALE_CPP
  724. __JM_STD::locale locale_inst;
  725. #endif
  726. jm_uintfast32_t _flags;
  727. unsigned int code;
  728. public:
  729. enum flag_type
  730. {
  731. escape_in_lists = 1, // '\' special inside [...]
  732. char_classes = escape_in_lists << 1, // [[:CLASS:]] allowed
  733. intervals = char_classes << 1, // {x,y} allowed
  734. limited_ops = intervals << 1, // all of + ? and | are normal characters
  735. newline_alt = limited_ops << 1, // \n is the same as |
  736. bk_plus_qm = newline_alt << 1, // uses \+ and \?
  737. bk_braces = bk_plus_qm << 1, // uses \{ and \}
  738. bk_parens = bk_braces << 1, // uses \( and \)
  739. bk_refs = bk_parens << 1, // \d allowed
  740. bk_vbar = bk_refs << 1, // uses \|
  741. use_except = bk_vbar << 1, // exception on error
  742. failbit = use_except << 1, // error flag
  743. literal = failbit << 1, // all characters are literals
  744. icase = literal << 1, // characters are matched regardless of case
  745. nocollate = icase << 1, // don't use locale specific collation
  746. basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs,
  747. extended = char_classes | intervals | bk_refs,
  748. normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate
  749. };
  750. enum restart_info
  751. {
  752. restart_any = 0,
  753. restart_word = 1,
  754. restart_line = 2,
  755. restart_buf = 3,
  756. restart_continue = 4,
  757. restart_lit = 5,
  758. restart_fixed_lit = 6
  759. };
  760. unsigned int RE_CALL error_code()const
  761. {
  762. return code;
  763. }
  764. void RE_CALL fail(unsigned int err);
  765. jm_uintfast32_t RE_CALL flags()const
  766. {
  767. return _flags;
  768. }
  769. #ifdef RE_LOCALE_CPP
  770. __JM_STD::string RE_CALL errmsg()const
  771. {
  772. return re_get_error_str(code, locale_inst);
  773. }
  774. #else
  775. const char* RE_CALL errmsg()const
  776. {
  777. return re_get_error_str(code);
  778. }
  779. #endif
  780. regbase();
  781. regbase(const regbase& b);
  782. #ifdef RE_LOCALE_CPP
  783. __JM_STD::locale RE_CALL imbue(const __JM_STD::locale& l);
  784. const __JM_STD::locale& RE_CALL locale()const
  785. {
  786. return locale_inst;
  787. }
  788. #endif
  789. };
  790. //
  791. // some forward declarations:
  792. template <class iterator, class Allocator JM_DEF_ALLOC_PARAM(iterator) >
  793. class reg_match;
  794. template <class iterator, class Allocator>
  795. class __priv_match_data;
  796. //
  797. // class reg_expression
  798. // represents the compiled
  799. // regular expression:
  800. //
  801. #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
  802. //
  803. // Ugly ugly hack,
  804. // template don't merge if they contain switch statements so declare these
  805. // templates in unnamed namespace (ie with internal linkage), each translation
  806. // unit then gets its own local copy, it works seemlessly but bloats the app.
  807. namespace{
  808. #endif
  809. template <class charT, class traits JM_TRICKY_DEFAULT_PARAM(char_regex_traits<charT>), class Allocator JM_DEF_ALLOC_PARAM(charT) >
  810. class reg_expression : public regbase
  811. {
  812. public:
  813. // typedefs:
  814. typedef Allocator alloc_type;
  815. typedef typename REBIND_TYPE(charT, alloc_type)::size_type size_type;
  816. typedef charT value_type;
  817. typedef charT char_type;
  818. typedef traits traits_type;
  819. typedef typename traits_type::size_type traits_size_type;
  820. typedef typename traits_type::uchar_type traits_uchar_type;
  821. private:
  822. #if defined(RE_LOCALE_C) || defined(RE_LOCALE_W32)
  823. re_initialiser<charT> locale_initialiser;
  824. #endif
  825. raw_storage<Allocator> data;
  826. unsigned _restart_type;
  827. unsigned marks;
  828. int repeats;
  829. unsigned char* startmap;
  830. charT* _expression;
  831. unsigned int _leading_len;
  832. const charT* _leading_string;
  833. unsigned int _leading_string_len;
  834. kmp_info<charT>* pkmp;
  835. void RE_CALL compile_maps();
  836. void RE_CALL compile_map(re_syntax_base* node, unsigned char* __map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal = NULL)const;
  837. bool RE_CALL probe_start(re_syntax_base* node, charT c, re_syntax_base* terminal)const;
  838. bool RE_CALL probe_start_null(re_syntax_base* node, re_syntax_base* terminal)const;
  839. void RE_CALL fixup_apply(re_syntax_base* b, unsigned cbraces);
  840. void RE_CALL move_offsets(re_syntax_base* j, unsigned size);
  841. re_syntax_base* RE_CALL compile_set(const charT*& first, const charT* last);
  842. re_syntax_base* RE_CALL compile_set_aux(jstack<re_str<charT>, Allocator>& singles, jstack<re_str<charT>, Allocator>& ranges, jstack<jm_uintfast32_t, Allocator>& classes, jstack<re_str<charT>, Allocator>& equivalents, bool isnot, const __narrow_type&);
  843. re_syntax_base* RE_CALL compile_set_aux(jstack<re_str<charT>, Allocator>& singles, jstack<re_str<charT>, Allocator>& ranges, jstack<jm_uintfast32_t, Allocator>& classes, jstack<re_str<charT>, Allocator>& equivalents, bool isnot, const __wide_type&);
  844. re_syntax_base* RE_CALL compile_set_simple(re_syntax_base* dat, unsigned long cls, bool isnot = false);
  845. unsigned int RE_CALL parse_inner_set(const charT*& first, const charT* last);
  846. re_syntax_base* RE_CALL add_simple(re_syntax_base* dat, syntax_element_type type, unsigned int size = sizeof(re_syntax_base));
  847. re_syntax_base* RE_CALL add_literal(re_syntax_base* dat, charT c);
  848. charT RE_CALL parse_escape(const charT*& first, const charT* last);
  849. void RE_CALL parse_range(const charT*& first, const charT* last, unsigned& min, unsigned& max);
  850. bool RE_CALL skip_space(const charT*& first, const charT* last);
  851. unsigned int RE_CALL probe_restart(re_syntax_base* dat);
  852. unsigned int RE_CALL fixup_leading_rep(re_syntax_base* dat, re_syntax_base* end);
  853. public:
  854. unsigned int RE_CALL set_expression(const charT* p, const charT* end, jm_uintfast32_t f = regbase::normal);
  855. unsigned int RE_CALL set_expression(const charT* p, jm_uintfast32_t f = regbase::normal) { return set_expression(p, p + traits_type::length(p), f); }
  856. reg_expression(const Allocator& a = Allocator());
  857. reg_expression(const charT* p, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator());
  858. reg_expression(const charT* p1, const charT* p2, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator());
  859. reg_expression(const charT* p, size_type len, jm_uintfast32_t f, const Allocator& a = Allocator());
  860. reg_expression(const reg_expression&);
  861. ~reg_expression();
  862. reg_expression& RE_CALL operator=(const reg_expression&);
  863. #ifndef JM_NO_MEMBER_TEMPLATES
  864. template <class ST, class SA>
  865. unsigned int RE_CALL set_expression(const __JM_STD::basic_string<charT, ST, SA>& p, jm_uintfast32_t f = regbase::normal)
  866. { return set_expression(p.data(), p.data() + p.size(), f); }
  867. template <class ST, class SA>
  868. reg_expression(const __JM_STD::basic_string<charT, ST, SA>& p, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator())
  869. : data(a), pkmp(0) { set_expression(p, f); }
  870. #elif !defined(JM_NO_STRING_DEF_ARGS)
  871. unsigned int RE_CALL set_expression(const __JM_STD::basic_string<charT>& p, jm_uintfast32_t f = regbase::normal)
  872. { return set_expression(p.data(), p.data() + p.size(), f); }
  873. reg_expression(const __JM_STD::basic_string<charT>& p, jm_uintfast32_t f = regbase::normal, const Allocator& a = Allocator())
  874. : data(a), pkmp(0) { set_expression(p, f); }
  875. #endif
  876. bool RE_CALL operator==(const reg_expression&);
  877. bool RE_CALL operator<(const reg_expression&);
  878. alloc_type RE_CALL allocator()const;
  879. const charT* RE_CALL expression()const { return _expression; }
  880. unsigned RE_CALL mark_count()const { return marks; }
  881. #if !defined(JM_NO_TEMPLATE_FRIEND) && (!defined(JM_NO_TEMPLATE_SWITCH_MERGE) || defined(JM_NO_NAMESPACES))
  882. #if 0
  883. template <class Predicate, class I, class charT, class traits, class A, class A2>
  884. friend unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a);
  885. template <class I, class A, class charT, class traits, class A2>
  886. friend bool query_match(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags);
  887. template <class I, class A, class charT, class traits, class A2>
  888. friend bool query_match_aux(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e,
  889. unsigned flags, __priv_match_data<I, A>& pd, I* restart);
  890. template <class I, class A, class charT, class traits, class A2>
  891. friend bool reg_search(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags);
  892. private:
  893. #endif
  894. #endif
  895. int RE_CALL repeat_count() const { return repeats; }
  896. unsigned int RE_CALL restart_type()const { return _restart_type; }
  897. const re_syntax_base* RE_CALL first()const { return (const re_syntax_base*)data.data(); }
  898. const unsigned char* RE_CALL get_map()const { return startmap; }
  899. unsigned int RE_CALL leading_length()const { return _leading_len; }
  900. const kmp_info<charT>* get_kmp()const { return pkmp; }
  901. static bool RE_CALL can_start(charT c, const unsigned char* __map, unsigned char mask, const __wide_type&);
  902. static bool RE_CALL can_start(charT c, const unsigned char* __map, unsigned char mask, const __narrow_type&);
  903. };
  904. #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
  905. } // namespace
  906. #endif
  907. //
  908. // class reg_match and reg_match_base
  909. // handles what matched where
  910. template <class iterator>
  911. struct sub_match
  912. {
  913. iterator first;
  914. iterator second;
  915. bool matched;
  916. #ifndef JM_NO_MEMBER_TEMPLATES
  917. template <class charT, class traits, class Allocator>
  918. operator __JM_STD::basic_string<charT, traits, Allocator> ()const;
  919. #elif !defined(JM_NO_STRING_DEF_ARGS)
  920. operator __JM_STD::basic_string<char> ()const;
  921. operator __JM_STD::basic_string<wchar_t> ()const;
  922. #endif
  923. operator int()const;
  924. operator unsigned int()const;
  925. operator short()const
  926. {
  927. return (short)(int)(*this);
  928. }
  929. operator unsigned short()const
  930. {
  931. return (unsigned short)(unsigned int)(*this);
  932. }
  933. sub_match() { matched = false; }
  934. sub_match(iterator i) : first(i), second(i), matched(false) {}
  935. };
  936. #ifndef JM_NO_MEMBER_TEMPLATES
  937. template <class iterator>
  938. template <class charT, class traits, class Allocator>
  939. sub_match<iterator>::operator __JM_STD::basic_string<charT, traits, Allocator> ()const
  940. {
  941. #if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO)
  942. if(typeid(charT) != typeid(*first))
  943. throw __JM_STD::bad_cast();
  944. #endif
  945. __JM_STD::basic_string<charT, traits, Allocator> result;
  946. iterator i = first;
  947. while(i != second)
  948. {
  949. result.append(1, *i);
  950. ++i;
  951. }
  952. return result;
  953. }
  954. #elif !defined(JM_NO_STRING_DEF_ARGS)
  955. template <class iterator>
  956. sub_match<iterator>::operator __JM_STD::basic_string<char> ()const
  957. {
  958. #if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO)
  959. if(typeid(char) != typeid(*first))
  960. throw __JM_STD::bad_cast();
  961. #endif
  962. __JM_STD::basic_string<char> result;
  963. iterator i = first;
  964. while(i != second)
  965. {
  966. result.append(1, *i);
  967. ++i;
  968. }
  969. return result;
  970. }
  971. template <class iterator>
  972. sub_match<iterator>::operator __JM_STD::basic_string<wchar_t> ()const
  973. {
  974. #if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO)
  975. if(typeid(wchar_t) != typeid(*first))
  976. throw __JM_STD::bad_cast();
  977. #endif
  978. __JM_STD::basic_string<wchar_t> result;
  979. iterator i = first;
  980. while(i != second)
  981. {
  982. result.append(1, *i);
  983. ++i;
  984. }
  985. return result;
  986. }
  987. #endif
  988. template <class iterator>
  989. sub_match<iterator>::operator int()const
  990. {
  991. iterator i = first;
  992. int neg = 1;
  993. if((i != second) && (*i == '-'))
  994. {
  995. neg = -1;
  996. ++i;
  997. }
  998. neg *= (int)re_toi(i, second, 10 MAYBE_PASS_LOCALE(__JM_STD::locale()));
  999. #if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO)
  1000. if(i != second)
  1001. {
  1002. throw __JM_STD::bad_cast();
  1003. }
  1004. #endif
  1005. return neg;
  1006. }
  1007. template <class iterator>
  1008. sub_match<iterator>::operator unsigned int()const
  1009. {
  1010. iterator i = first;
  1011. unsigned int result = (int)re_toi(i, second, 10 MAYBE_PASS_LOCALE(__JM_STD::locale()));
  1012. #if !defined(JM_NO_EXCEPTIONS) && !defined(JM_NO_TYPEINFO)
  1013. if(i != second)
  1014. {
  1015. throw __JM_STD::bad_cast();
  1016. }
  1017. #endif
  1018. return result;
  1019. }
  1020. template <class iterator, class Allocator JM_DEF_ALLOC_PARAM(iterator) >
  1021. class reg_match_base
  1022. {
  1023. public:
  1024. typedef Allocator alloc_type;
  1025. typedef typename REBIND_TYPE(iterator, Allocator)::size_type size_type;
  1026. typedef JM_MAYBE_TYPENAME REBIND_TYPE(char, Allocator) c_alloc;
  1027. typedef iterator value_type;
  1028. protected:
  1029. struct reference : public c_alloc
  1030. {
  1031. unsigned int cmatches;
  1032. unsigned count;
  1033. sub_match<iterator> head, tail, null;
  1034. unsigned int lines;
  1035. iterator line_pos;
  1036. reference(const Allocator& a) : c_alloc(a) { }
  1037. };
  1038. reference* ref;
  1039. void RE_CALL cow();
  1040. // protected contructor for derived class...
  1041. reg_match_base(bool){}
  1042. void RE_CALL free();
  1043. public:
  1044. reg_match_base(const Allocator& a = Allocator());
  1045. reg_match_base(const reg_match_base& m)
  1046. {
  1047. ref = m.ref;
  1048. ++(ref->count);
  1049. }
  1050. reg_match_base& RE_CALL operator=(const reg_match_base& m);
  1051. ~reg_match_base()
  1052. {
  1053. free();
  1054. }
  1055. size_type RE_CALL size()const
  1056. {
  1057. return ref->cmatches;
  1058. }
  1059. const sub_match<iterator>& RE_CALL operator[](int n) const
  1060. {
  1061. if((n >= 0) && ((unsigned int)n < ref->cmatches))
  1062. return *(sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>)*n);
  1063. return (n == -1) ? ref->head : (n == -2) ? ref->tail : ref->null;
  1064. }
  1065. Allocator RE_CALL allocator()const;
  1066. size_t RE_CALL length()const
  1067. {
  1068. jm_assert(ref->cmatches);
  1069. size_t n = 0;
  1070. JM_DISTANCE(((sub_match<iterator>*)(ref+1))->first, ((sub_match<iterator>*)(ref+1))->second, n);
  1071. return n;
  1072. }
  1073. unsigned int RE_CALL line()const
  1074. {
  1075. return ref->lines;
  1076. }
  1077. iterator RE_CALL line_start()const
  1078. {
  1079. return ref->line_pos;
  1080. }
  1081. void swap(reg_match_base& that)
  1082. {
  1083. reference* t = that.ref;
  1084. that.ref = ref;
  1085. ref = t;
  1086. }
  1087. friend class reg_match<iterator, Allocator>;
  1088. #if !defined(JM_NO_TEMPLATE_FRIEND) && (!defined(JM_NO_TEMPLATE_SWITCH_MERGE) || defined(JM_NO_NAMESPACES))
  1089. private:
  1090. template <class Predicate, class I, class charT, class traits, class A, class A2>
  1091. friend unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a);
  1092. template <class I, class A, class charT, class traits, class A2>
  1093. friend bool query_match(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags);
  1094. template <class I, class A, class charT, class traits, class A2>
  1095. friend bool query_match_aux(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e,
  1096. unsigned flags, __priv_match_data<I, A>& pd, I* restart);
  1097. template <class I, class A, class charT, class traits, class A2>
  1098. friend bool reg_search(I first, I last, reg_match<I, A>& m, const reg_expression<charT, traits, A2>& e, unsigned flags);
  1099. #endif
  1100. void RE_CALL set_size(size_type n);
  1101. void RE_CALL set_size(size_type n, iterator i, iterator j);
  1102. void RE_CALL maybe_assign(const reg_match_base& m);
  1103. void RE_CALL init_fail(iterator i, iterator j);
  1104. void RE_CALL set_first(iterator i)
  1105. {
  1106. cow();
  1107. ((sub_match<iterator>*)(ref+1))->first = i;
  1108. ref->head.second = i;
  1109. ref->head.matched = (ref->head.first == ref->head.second) ? false : true;
  1110. }
  1111. void RE_CALL set_first(iterator i, size_t pos)
  1112. {
  1113. cow();
  1114. ((sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>) * pos))->first = i;
  1115. if(pos == 0)
  1116. {
  1117. ref->head.second = i;
  1118. ref->head.matched = (ref->head.first == ref->head.second) ? false : true;
  1119. }
  1120. }
  1121. void RE_CALL set_second(iterator i)
  1122. {
  1123. cow();
  1124. ((sub_match<iterator>*)(ref+1))->second = i;
  1125. ((sub_match<iterator>*)(ref+1))->matched = true;
  1126. ref->tail.first = i;
  1127. ref->tail.matched = (ref->tail.first == ref->tail.second) ? false : true;
  1128. }
  1129. void RE_CALL set_second(iterator i, size_t pos)
  1130. {
  1131. cow();
  1132. ((sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>) * pos))->second = i;
  1133. ((sub_match<iterator>*)((char*)ref + sizeof(reference) + sizeof(sub_match<iterator>) * pos))->matched = true;
  1134. if(pos == 0)
  1135. {
  1136. ref->tail.first = i;
  1137. ref->tail.matched = (ref->tail.first == ref->tail.second) ? false : true;
  1138. }
  1139. }
  1140. void RE_CALL set_line(unsigned int i, iterator pos)
  1141. {
  1142. ref->lines = i;
  1143. ref->line_pos = pos;
  1144. }
  1145. };
  1146. template <class iterator, class Allocator>
  1147. reg_match_base<iterator, Allocator>::reg_match_base(const Allocator& a)
  1148. {
  1149. ref = (reference*)c_alloc(a).allocate(sizeof(sub_match<iterator>) + sizeof(reference));
  1150. #ifndef JM_NO_EXCEPTIONS
  1151. try
  1152. {
  1153. #endif
  1154. new (ref) reference(a);
  1155. ref->cmatches = 1;
  1156. ref->count = 1;
  1157. // construct the sub_match<iterator>:
  1158. #ifndef JM_NO_EXCEPTIONS
  1159. try
  1160. {
  1161. #endif
  1162. new ((sub_match<iterator>*)(ref+1)) sub_match<iterator>();
  1163. #ifndef JM_NO_EXCEPTIONS
  1164. }
  1165. catch(...)
  1166. {
  1167. jm_destroy(ref);
  1168. throw;
  1169. }
  1170. #endif
  1171. #ifndef JM_NO_EXCEPTIONS
  1172. }
  1173. catch(...)
  1174. {
  1175. c_alloc(a).deallocate((char*)(void*)ref, sizeof(sub_match<iterator>) + sizeof(reference));
  1176. throw;
  1177. }
  1178. #endif
  1179. }
  1180. template <class iterator, class Allocator>
  1181. Allocator RE_CALL reg_match_base<iterator, Allocator>::allocator()const
  1182. {
  1183. return *((c_alloc*)ref);
  1184. }
  1185. template <class iterator, class Allocator>
  1186. inline reg_match_base<iterator, Allocator>& RE_CALL reg_match_base<iterator, Allocator>::operator=(const reg_match_base<iterator, Allocator>& m)
  1187. {
  1188. if(ref != m.ref)
  1189. {
  1190. free();
  1191. ref = m.ref;
  1192. ++(ref->count);
  1193. }
  1194. return *this;
  1195. }
  1196. template <class iterator, class Allocator>
  1197. void RE_CALL reg_match_base<iterator, Allocator>::free()
  1198. {
  1199. if(--(ref->count) == 0)
  1200. {
  1201. c_alloc a(*ref);
  1202. sub_match<iterator>* p1, *p2;
  1203. p1 = (sub_match<iterator>*)(ref+1);
  1204. p2 = p1 + ref->cmatches;
  1205. while(p1 != p2)
  1206. {
  1207. jm_destroy(p1);
  1208. ++p1;
  1209. }
  1210. jm_destroy(ref);
  1211. a.deallocate((char*)(void*)ref, sizeof(sub_match<iterator>) * ref->cmatches + sizeof(reference));
  1212. }
  1213. }
  1214. template <class iterator, class Allocator>
  1215. void RE_CALL reg_match_base<iterator, Allocator>::set_size(size_type n)
  1216. {
  1217. if(ref->cmatches != n)
  1218. {
  1219. reference* newref = (reference*)ref->allocate(sizeof(sub_match<iterator>) * n + sizeof(reference));
  1220. #ifndef JM_NO_EXCEPTIONS
  1221. try
  1222. {
  1223. #endif
  1224. new (newref) reference(*ref);
  1225. newref->count = 1;
  1226. newref->cmatches = n;
  1227. sub_match<iterator>* p1, *p2;
  1228. p1 = (sub_match<iterator>*)(newref+1);
  1229. p2 = p1 + newref->cmatches;
  1230. #ifndef JM_NO_EXCEPTIONS
  1231. try
  1232. {
  1233. #endif
  1234. while(p1 != p2)
  1235. {
  1236. new (p1) sub_match<iterator>();
  1237. ++p1;
  1238. }
  1239. free();
  1240. #ifndef JM_NO_EXCEPTIONS
  1241. }
  1242. catch(...)
  1243. {
  1244. p2 = (sub_match<iterator>*)(newref+1);
  1245. while(p2 != p1)
  1246. {
  1247. jm_destroy(p2);
  1248. ++p2;
  1249. }
  1250. jm_destroy(ref);
  1251. throw;
  1252. }
  1253. #endif
  1254. ref = newref;
  1255. #ifndef JM_NO_EXCEPTIONS
  1256. }
  1257. catch(...)
  1258. {
  1259. ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * n + sizeof(reference));
  1260. throw;
  1261. }
  1262. #endif
  1263. }
  1264. }
  1265. template <class iterator, class Allocator>
  1266. void RE_CALL reg_match_base<iterator, Allocator>::set_size(size_type n, iterator i, iterator j)
  1267. {
  1268. if(ref->cmatches != n)
  1269. {
  1270. reference* newref = (reference*)ref->allocate(sizeof(sub_match<iterator>) * n + sizeof(reference));;
  1271. #ifndef JM_NO_EXCEPTIONS
  1272. try{
  1273. #endif
  1274. new (newref) reference(*ref);
  1275. newref->count = 1;
  1276. newref->cmatches = n;
  1277. sub_match<iterator>* p1, *p2;
  1278. p1 = (sub_match<iterator>*)(newref+1);
  1279. p2 = p1 + newref->cmatches;
  1280. #ifndef JM_NO_EXCEPTIONS
  1281. try
  1282. {
  1283. #endif
  1284. while(p1 != p2)
  1285. {
  1286. new (p1) sub_match<iterator>(j);
  1287. ++p1;
  1288. }
  1289. free();
  1290. #ifndef JM_NO_EXCEPTIONS
  1291. }
  1292. catch(...)
  1293. {
  1294. p2 = (sub_match<iterator>*)(newref+1);
  1295. while(p2 != p1)
  1296. {
  1297. jm_destroy(p2);
  1298. ++p2;
  1299. }
  1300. jm_destroy(ref);
  1301. throw;
  1302. }
  1303. #endif
  1304. ref = newref;
  1305. #ifndef JM_NO_EXCEPTIONS
  1306. }
  1307. catch(...)
  1308. {
  1309. ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * n + sizeof(reference));
  1310. throw;
  1311. }
  1312. #endif
  1313. }
  1314. else
  1315. {
  1316. cow();
  1317. // set iterators to be i, matched to false:
  1318. sub_match<iterator>* p1, *p2;
  1319. p1 = (sub_match<iterator>*)(ref+1);
  1320. p2 = p1 + ref->cmatches;
  1321. while(p1 != p2)
  1322. {
  1323. p1->first = j;
  1324. p1->second = j;
  1325. p1->matched = false;
  1326. ++p1;
  1327. }
  1328. }
  1329. ref->head.first = i;
  1330. ref->tail.second = j;
  1331. ref->head.matched = ref->tail.matched = true;
  1332. ref->null.first = ref->null.second = j;
  1333. ref->null.matched = false;
  1334. }
  1335. template <class iterator, class Allocator>
  1336. inline void RE_CALL reg_match_base<iterator, Allocator>::init_fail(iterator i, iterator j)
  1337. {
  1338. set_size(ref->cmatches, i, j);
  1339. }
  1340. template <class iterator, class Allocator>
  1341. void RE_CALL reg_match_base<iterator, Allocator>::maybe_assign(const reg_match_base<iterator, Allocator>& m)
  1342. {
  1343. sub_match<iterator>* p1, *p2;
  1344. p1 = (sub_match<iterator>*)(ref+1);
  1345. p2 = (sub_match<iterator>*)(m.ref+1);
  1346. unsigned int len1, len2;
  1347. unsigned int i;
  1348. for(i = 0; i < ref->cmatches; ++i)
  1349. {
  1350. len1 = len2 = 0;
  1351. JM_DISTANCE(p1->first, p1->second, len1);
  1352. JM_DISTANCE(p2->first, p2->second, len2);
  1353. if((len1 != len2) || ((p1->matched == false) && (p2->matched == true)))
  1354. break;
  1355. if((p1->matched == true) && (p2->matched == false))
  1356. return;
  1357. ++p1;
  1358. ++p2;
  1359. }
  1360. if(i == ref->cmatches)
  1361. return;
  1362. if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) )
  1363. *this = m;
  1364. }
  1365. template <class iterator, class Allocator>
  1366. void RE_CALL reg_match_base<iterator, Allocator>::cow()
  1367. {
  1368. if(ref->count > 1)
  1369. {
  1370. reference* newref = (reference*)ref->allocate(sizeof(sub_match<iterator>) * ref->cmatches + sizeof(reference));
  1371. #ifndef JM_NO_EXCEPTIONS
  1372. try{
  1373. #endif
  1374. new (newref) reference(*ref);
  1375. newref->count = 1;
  1376. sub_match<iterator>* p1, *p2, *p3;
  1377. p1 = (sub_match<iterator>*)(newref+1);
  1378. p2 = p1 + newref->cmatches;
  1379. p3 = (sub_match<iterator>*)(ref+1);
  1380. #ifndef JM_NO_EXCEPTIONS
  1381. try{
  1382. #endif
  1383. while(p1 != p2)
  1384. {
  1385. new (p1) sub_match<iterator>(*p3);
  1386. ++p1;
  1387. ++p3;
  1388. }
  1389. #ifndef JM_NO_EXCEPTIONS
  1390. }
  1391. catch(...)
  1392. {
  1393. p2 = (sub_match<iterator>*)(newref+1);
  1394. while(p2 != p1)
  1395. {
  1396. jm_destroy(p2);
  1397. ++p2;
  1398. }
  1399. jm_destroy(ref);
  1400. throw;
  1401. }
  1402. #endif
  1403. --(ref->count);
  1404. ref = newref;
  1405. #ifndef JM_NO_EXCEPTIONS
  1406. }
  1407. catch(...)
  1408. {
  1409. ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * ref->cmatches + sizeof(reference));
  1410. throw;
  1411. }
  1412. #endif
  1413. }
  1414. }
  1415. //
  1416. // class reg_match
  1417. // encapsulates reg_match_base, does a deep copy rather than
  1418. // reference counting to ensure thread safety when copying
  1419. // other reg_match instances
  1420. template <class iterator, class Allocator>
  1421. class reg_match : public reg_match_base<iterator, Allocator>
  1422. {
  1423. public:
  1424. reg_match(const Allocator& a = Allocator())
  1425. : reg_match_base<iterator, Allocator>(a){}
  1426. reg_match(const reg_match_base<iterator, Allocator>& m)
  1427. : reg_match_base<iterator, Allocator>(m){}
  1428. reg_match& operator=(const reg_match_base<iterator, Allocator>& m)
  1429. {
  1430. // shallow copy
  1431. reg_match_base<iterator, Allocator>::operator=(m);
  1432. return *this;
  1433. }
  1434. reg_match(const reg_match& m);
  1435. reg_match& operator=(const reg_match& m);
  1436. };
  1437. template <class iterator, class Allocator>
  1438. reg_match<iterator, Allocator>::reg_match(const reg_match<iterator, Allocator>& m)
  1439. : reg_match_base<iterator, Allocator>(false)
  1440. {
  1441. reg_match_base<iterator, Allocator>::ref = (typename reg_match_base<iterator, Allocator>::reference *)m.ref->allocate(sizeof(sub_match<iterator>) * m.ref->cmatches + sizeof(typename reg_match_base<iterator, Allocator>::reference));
  1442. #ifndef JM_NO_EXCEPTIONS
  1443. try{
  1444. #endif
  1445. new (reg_match_base<iterator, Allocator>::ref) typename reg_match_base<iterator, Allocator>::reference(*m.ref);
  1446. reg_match_base<iterator, Allocator>::ref->count = 1;
  1447. sub_match<iterator>* p1, *p2, *p3;
  1448. p1 = (sub_match<iterator>*)(reg_match_base<iterator, Allocator>::ref+1);
  1449. p2 = p1 + reg_match_base<iterator, Allocator>::ref->cmatches;
  1450. p3 = (sub_match<iterator>*)(m.ref+1);
  1451. #ifndef JM_NO_EXCEPTIONS
  1452. try{
  1453. #endif
  1454. while(p1 != p2)
  1455. {
  1456. new (p1) sub_match<iterator>(*p3);
  1457. ++p1;
  1458. ++p3;
  1459. }
  1460. #ifndef JM_NO_EXCEPTIONS
  1461. }
  1462. catch(...)
  1463. {
  1464. p2 = (sub_match<iterator>*)(reg_match_base<iterator, Allocator>::ref+1);
  1465. while(p2 != p1)
  1466. {
  1467. jm_destroy(p2);
  1468. ++p2;
  1469. }
  1470. jm_destroy(ref);
  1471. throw;
  1472. }
  1473. }
  1474. catch(...)
  1475. {
  1476. m.ref->deallocate((char*)(void*)reg_match_base<iterator, Allocator>::ref, sizeof(sub_match<iterator>) * m.ref->cmatches + sizeof(typename reg_match_base<iterator, Allocator>::reference));
  1477. throw;
  1478. }
  1479. #endif
  1480. }
  1481. template <class iterator, class Allocator>
  1482. reg_match<iterator, Allocator>& reg_match<iterator, Allocator>::operator=(const reg_match<iterator, Allocator>& m)
  1483. {
  1484. reg_match<iterator, Allocator> t(m);
  1485. this->swap(t);
  1486. return *this;
  1487. }
  1488. template <class iterator, class charT, class traits_type, class Allocator>
  1489. iterator RE_CALL re_is_set_member(iterator next,
  1490. iterator last,
  1491. re_set_long* set,
  1492. const reg_expression<charT, traits_type, Allocator>& e);
  1493. JM_END_NAMESPACE // namespace regex
  1494. #include <jm/regcomp.h>
  1495. JM_NAMESPACE(__JM)
  1496. typedef reg_expression<char, char_regex_traits<char>, JM_DEF_ALLOC(char)> regex;
  1497. #ifndef JM_NO_WCSTRING
  1498. typedef reg_expression<wchar_t, char_regex_traits<wchar_t>, JM_DEF_ALLOC(wchar_t)> wregex;
  1499. #endif
  1500. typedef reg_match<const char*, regex::alloc_type> cmatch;
  1501. #ifndef JM_NO_WCSTRING
  1502. typedef reg_match<const wchar_t*, wregex::alloc_type> wcmatch;
  1503. #endif
  1504. JM_END_NAMESPACE // namespace regex
  1505. #include <jm/regmatch.h>
  1506. #include <jm/regfmt.h>
  1507. #if !defined(JM_NO_NAMESPACES) && !defined(JM_NO_USING)
  1508. #ifndef JM_NO_EXCEPTIONS
  1509. using __JM::bad_expression;
  1510. #endif
  1511. using __JM::char_regex_traits;
  1512. using __JM::char_regex_traits_i;
  1513. using __JM::regbase;
  1514. using __JM::reg_expression;
  1515. using __JM::reg_match;
  1516. using __JM::reg_match_base;
  1517. using __JM::sub_match;
  1518. using __JM::regex;
  1519. using __JM::cmatch;
  1520. #ifndef JM_NO_WCSTRING
  1521. using __JM::wregex;
  1522. using __JM::wcmatch;
  1523. #endif
  1524. using __JM::query_match;
  1525. using __JM::reg_search;
  1526. using __JM::reg_grep;
  1527. using __JM::reg_format;
  1528. using __JM::reg_merge;
  1529. using __JM::jm_def_alloc;
  1530. #endif
  1531. #endif // __cplusplus
  1532. #endif // include