|
|
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// $NoKeywords: $
//
//=============================================================================//
/*
* * Copyright (c) 1998-9 * Dr John Maddock * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Dr John Maddock makes no representations * about the suitability of this software for any purpose. * It is provided "as is" without express or implied warranty. * */
/*
* FILE regmatch.h * VERSION 2.12 * regular expression matching algorithms */
#ifndef __REGMATCH_H
#define __REGMATCH_H
JM_NAMESPACE(__JM)
template <class iterator, class charT, class traits_type, class Allocator> iterator RE_CALL re_is_set_member(iterator next, iterator last, re_set_long* set, const reg_expression<charT, traits_type, Allocator>& e) { const charT* p = (const charT*)(set+1); iterator ptr; unsigned int i; bool icase = e.flags() & regbase::icase;
// try and match a single character, could be a multi-character
// collating element...
for(i = 0; i < set->csingles; ++i) { ptr = next; while(*p && (ptr != last)) { if(traits_type::translate(*ptr, icase MAYBE_PASS_LOCALE(e.locale())) != *p) break; ++p; ++ptr; } if(*p == 0) // if null we've matched
return set->isnot ? next : (ptr == next) ? ++next : ptr;
while(*p)++p; ++p; // skip null
}
charT col = traits_type::translate(*next, icase MAYBE_PASS_LOCALE(e.locale()));
if(set->cranges || set->cequivalents) { re_str<charT> s2(col); re_str<charT> s1; //
// try and match a range, NB only a single character can match
if(set->cranges) { if(e.flags() & regbase::nocollate) s1 = s2; else traits_type::transform(s1, s2 MAYBE_PASS_LOCALE(e.locale())); for(i = 0; i < set->cranges; ++i) { if(s1 <= p) { while(*p)++p; ++p; if(s1 >= p) return set->isnot ? next : ++next; } else { // skip first string
while(*p)++p; ++p; } // skip second string
while(*p)++p; ++p; } } //
// try and match an equivalence class, NB only a single character can match
if(set->cequivalents) { traits_type::transform_primary(s1, s2 MAYBE_PASS_LOCALE(e.locale())); for(i = 0; i < set->cequivalents; ++i) { if(s1 == p) return set->isnot ? next : ++next; // skip string
while(*p)++p; ++p; } } }
if(traits_type::is_class(col, set->cclasses MAYBE_PASS_LOCALE(e.locale())) == true) return set->isnot ? next : ++next; return set->isnot ? ++next : next; }
template <class iterator, class Allocator> class __priv_match_data { public: typedef JM_MAYBE_TYPENAME REBIND_TYPE(int, Allocator) i_alloc; typedef JM_MAYBE_TYPENAME REBIND_TYPE(iterator, Allocator) it_alloc;
reg_match_base<iterator, Allocator> temp_match; // failure stacks:
jstack<reg_match_base<iterator, Allocator>, Allocator> matches; jstack<iterator, Allocator> prev_pos; jstack<const re_syntax_base*, Allocator> prev_record; jstack<int, Allocator> prev_acc; int* accumulators; unsigned int caccumulators; iterator* loop_starts;
__priv_match_data(const reg_match_base<iterator, Allocator>&); ~__priv_match_data() { free(); } void free(); void set_accumulator_size(unsigned int size); int* get_accumulators() { return accumulators; } iterator* get_loop_starts() { return loop_starts; } };
template <class iterator, class Allocator> __priv_match_data<iterator, Allocator>::__priv_match_data(const reg_match_base<iterator, Allocator>& m) : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator()) { accumulators = 0; caccumulators = 0; loop_starts = 0; }
template <class iterator, class Allocator> void __priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size) { if(size > caccumulators) { free(); caccumulators = size; accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators); loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators); for(unsigned i = 0; i < caccumulators; ++i) new (loop_starts + i) iterator(); } }
template <class iterator, class Allocator> void __priv_match_data<iterator, Allocator>::free() { if(caccumulators) { //REBIND_INSTANCE(int, Allocator, temp_match.allocator()).deallocate(accumulators, caccumulators);
i_alloc temp1(temp_match.allocator()); temp1.deallocate(accumulators, caccumulators); for(unsigned i = 0; i < caccumulators; ++i) jm_destroy(loop_starts + i); //REBIND_INSTANCE(iterator, Allocator, temp_match.allocator()).deallocate(loop_starts, caccumulators);
it_alloc temp2(temp_match.allocator()); temp2.deallocate(loop_starts, caccumulators); } }
//
// proc query_match
// returns true if the specified regular expression matches
// at position first. Fills in what matched in m.
//
template <class iterator, class Allocator, class charT, class traits, class Allocator2> bool query_match(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) { // prepare m for failure:
if((flags & match_init) == 0) { m.set_size(e.mark_count(), first, last); } __priv_match_data<iterator, Allocator> pd(m); iterator restart; return query_match_aux(first, last, m, e, flags, pd, &restart); }
//
// query_match convenience interfaces:
#ifndef JM_NO_PARTIAL_FUNC_SPEC
//
// this isn't really a partial specialisation, but template function
// overloading - if the compiler doesn't support partial specialisation
// then it really won't support this either:
template <class charT, class Allocator, class traits, class Allocator2> inline bool query_match(const charT* str, reg_match<const charT*, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) { return query_match(str, str + traits::length(str), m, e, flags); }
#ifndef JM_NO_STRING_H
template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2> inline bool query_match(const __JM_STD::basic_string<charT, ST, SA>& s, reg_match<typename __JM_STD::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) { return query_match(s.begin(), s.end(), m, e, flags); } #endif
#else // partial specialisation
inline bool query_match(const char* str, cmatch& m, const regex& e, unsigned flags = match_default) { return query_match(str, str + regex::traits_type::length(str), m, e, flags); } #ifndef JM_NO_WCSTRING
inline bool query_match(const wchar_t* str, wcmatch& m, const wregex& e, unsigned flags = match_default) { return query_match(str, str + wregex::traits_type::length(str), m, e, flags); } #endif
#ifndef JM_NO_STRING_H
inline bool query_match(const __JM_STD::string& s, reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, const regex& e, unsigned flags = match_default) { return query_match(s.begin(), s.end(), m, e, flags); } #if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING)
inline bool query_match(const __JM_STD::basic_string<wchar_t>& s, reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>& m, const wregex& e, unsigned flags = match_default) { return query_match(s.begin(), s.end(), m, e, flags); } #endif
#endif
#endif
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
//
// Ugly ugly hack,
// template don't merge if they contain switch statements so declare these
// templates in unnamed namespace (ie with internal linkage), each translation
// unit then gets its own local copy, it works seemlessly but bloats the app.
namespace{ #endif
template <class iterator, class Allocator, class charT, class traits, class Allocator2> bool query_match_aux(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags, __priv_match_data<iterator, Allocator>& pd, iterator* restart) { if(e.flags() & regbase::failbit) return false;
typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; typedef typename is_byte<charT>::width_type width_type; #ifdef RE_LOCALE_CPP
const __JM_STD::locale& locale_inst = e.locale(); #endif
// declare some local aliases to reduce pointer loads
// good optimising compilers should make this unnecessary!!
jstack<reg_match_base<iterator, Allocator>, Allocator>& matches = pd.matches; jstack<iterator, Allocator>& prev_pos = pd.prev_pos; jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record; jstack<int, Allocator>& prev_acc = pd.prev_acc; reg_match_base<iterator, Allocator>& temp_match = pd.temp_match; temp_match.set_first(first);
//temp_match.set_size(e.mark_count(), first, last);
register const re_syntax_base* ptr = e.first(); bool match_found = false; bool need_push_match = (e.mark_count() > 1); int cur_acc = -1; // no active accumulator
pd.set_accumulator_size(e.repeat_count()); int* accumulators = pd.get_accumulators(); iterator* start_loop = pd.get_loop_starts(); int k; // for loops
bool icase = e.flags() & regbase::icase; *restart = first; iterator base = first;
// prepare m for failure:
/*
if((flags & match_init) == 0) { m.init_fail(first, last); } */
retry:
while(first != last) { jm_assert(ptr); switch(ptr->type) { case syntax_element_match: match_jump: { // match found, save then fallback in case we missed a
// longer one.
if((flags & match_not_null) && (first == temp_match[0].first)) goto failure; temp_match.set_second(first); m.maybe_assign(temp_match); match_found = true; if((flags & match_any) || ((first == last) && (need_push_match == false))) { // either we don't care what we match or we've matched
// the whole string and can't match anything longer.
while(matches.empty() == false) matches.pop(); while(prev_pos.empty() == false) prev_pos.pop(); while(prev_record.empty() == false) prev_record.pop(); while(prev_acc.empty() == false) prev_acc.pop(); return true; } } goto failure; case syntax_element_startmark: temp_match.set_first(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_endmark: temp_match.set_second(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_literal: { unsigned int len = ((re_literal*)ptr)->length; charT* what = (charT*)(((re_literal*)ptr) + 1); //
// compare string with what we stored in
// our records:
for(unsigned int i = 0; i < len; ++i, ++first) { if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != what[i])) goto failure; } ptr = ptr->next.p; break; } case syntax_element_start_line: outer_line_check: if(first == temp_match[0].first) { // we're at the start of the buffer
if(flags & match_prev_avail) { inner_line_check: // check the previous value even though its before
// the start of our "buffer".
iterator t(first); --t; if(traits::is_separator(*t) && !((*t == '\r') && (*first == '\n')) ) { ptr = ptr->next.p; continue; } goto failure; } if((flags & match_not_bol) == 0) { ptr = ptr->next.p; continue; } goto failure; } // we're in the middle of the string
goto inner_line_check; case syntax_element_end_line: // we're not yet at the end so *first is always valid:
if(traits::is_separator(*first)) { if((first != base) || (flags & match_prev_avail)) { // check that we're not in the middle of \r\n sequence
iterator t(first); --t; if((*t == '\r') && (*first == '\n')) { goto failure; } } ptr = ptr->next.p; continue; } goto failure; case syntax_element_wild: // anything except possibly NULL or \n:
if(traits::is_separator(*first)) { if(flags & match_not_dot_newline) goto failure; ptr = ptr->next.p; ++first; continue; } if(*first == charT(0)) { if(flags & match_not_dot_null) goto failure; ptr = ptr->next.p; ++first; continue; } ptr = ptr->next.p; ++first; break; case syntax_element_word_boundary: { // prev and this character must be opposites:
bool b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) { if(flags & match_not_bow) b ^= true; else b ^= false; } else { --first; b ^= traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); ++first; } if(b) { ptr = ptr->next.p; continue; } goto failure; } case syntax_element_within_word: // both prev and this character must be char_class_word:
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { bool b; if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) b = false; else { --first; b = traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)); ++first; } if(b) { ptr = ptr->next.p; continue; } } goto failure; case syntax_element_word_start: if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) { // start of buffer:
if(flags & match_not_bow) goto failure; if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { ptr = ptr->next.p; continue; } goto failure; } // otherwise inside buffer:
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { iterator t(first); --t; if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) { ptr = ptr->next.p; continue; } } goto failure; // if we fall through to here then we've failed
case syntax_element_word_end: if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0)) goto failure; // start of buffer can't be end of word
// otherwise inside buffer:
if(traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false) { iterator t(first); --t; if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { ptr = ptr->next.p; continue; } } goto failure; // if we fall through to here then we've failed
case syntax_element_buffer_start: if((first != temp_match[0].first) || (flags & match_not_bob)) goto failure; // OK match:
ptr = ptr->next.p; break; case syntax_element_buffer_end: if((first != last) || (flags & match_not_eob)) goto failure; // OK match:
ptr = ptr->next.p; break; case syntax_element_backref: { // compare with what we previously matched:
iterator i = temp_match[((re_brace*)ptr)->index].first; iterator j = temp_match[((re_brace*)ptr)->index].second; while(i != j) { if((first == last) || (traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)) != traits::translate(*i, icase MAYBE_PASS_LOCALE(locale_inst)))) goto failure; ++i; ++first; } ptr = ptr->next.p; break; } case syntax_element_long_set: { // let the traits class do the work:
iterator t = re_is_set_member(first, last, (re_set_long*)ptr, e); if(t != first) { ptr = ptr->next.p; first = t; continue; } goto failure; } case syntax_element_set: // lookup character in table:
if(((re_set*)ptr)->__map[(traits_uchar_type)traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))]) { ptr = ptr->next.p; ++first; continue; } goto failure; case syntax_element_jump: ptr = ((re_jump*)ptr)->alt.p; continue; case syntax_element_alt: { // alt_jump:
if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, (unsigned char)mask_take, width_type())) { // we can take the first alternative,
// see if we need to push next alternative:
if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) { if(need_push_match) matches.push(temp_match); for(k = 0; k <= cur_acc; ++k) prev_pos.push(start_loop[k]); prev_pos.push(first); prev_record.push(ptr); for(k = 0; k <= cur_acc; ++k) prev_acc.push(accumulators[k]); prev_acc.push(cur_acc); } ptr = ptr->next.p; continue; } if(reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_jump*)ptr)->__map, mask_skip, width_type())) { ptr = ((re_jump*)ptr)->alt.p; continue; } goto failure; // neither option is possible
} case syntax_element_rep: { // repeater_jump:
// if we're moving to a higher id (nested repeats etc)
// zero out our accumualtors:
if(cur_acc < ((re_repeat*)ptr)->id) { cur_acc = ((re_repeat*)ptr)->id; accumulators[cur_acc] = 0; start_loop[cur_acc] = iterator(); }
cur_acc = ((re_repeat*)ptr)->id;
if(((re_repeat*)ptr)->leading) *restart = first;
//charT c = traits::translate(*first MAYBE_PASS_LOCALE(locale_inst));
// first of all test for special case where this is last element,
// if that is the case then repeat as many times as possible:
if(((re_repeat*)ptr)->alt.p->type == syntax_element_match) { // see if we can take the repeat:
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) { // push terminating match as fallback:
if((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) { if((prev_record.empty() == false) && (prev_record.peek() == ((re_repeat*)ptr)->alt.p)) { // we already have the required fallback
// don't add any more, just update this one:
if(need_push_match) matches.peek() = temp_match; prev_pos.peek() = first; } else { if(need_push_match) matches.push(temp_match); prev_pos.push(first); prev_record.push(((re_repeat*)ptr)->alt.p); } } // move to next item in list:
if(first != start_loop[cur_acc]) { ++accumulators[cur_acc]; ptr = ptr->next.p; start_loop[cur_acc] = first; continue; } goto failure; } // see if we can skip the repeat:
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) { ptr = ((re_repeat*)ptr)->alt.p; continue; } // otherwise fail:
goto failure; }
// see if we can skip the repeat:
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_skip, width_type())) { // see if we can push failure info:
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type())) { // check to see if the last loop matched a NULL string
// if so then we really don't want to loop again:
if(((unsigned int)accumulators[cur_acc] == ((re_repeat*)ptr)->min) || (first != start_loop[cur_acc])) { if(need_push_match) matches.push(temp_match); prev_pos.push(first); prev_record.push(ptr); for(k = 0; k <= cur_acc; ++k) prev_acc.push(accumulators[k]); //prev_acc.push(cur_acc);
} } ptr = ((re_repeat*)ptr)->alt.p; continue; }
// otherwise see if we can take the repeat:
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && reg_expression<charT, traits, Allocator2>::can_start(*first, ((re_repeat*)ptr)->__map, mask_take, width_type()) && (first != start_loop[cur_acc])) { // move to next item in list:
++accumulators[cur_acc]; ptr = ptr->next.p; start_loop[cur_acc] = first; continue; }
// if we get here then neither option is allowed so fail:
goto failure;
} case syntax_element_combining: if(traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) goto failure; ++first; while((first != last) && traits::is_combining(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++first; ptr = ptr->next.p; continue; case syntax_element_soft_buffer_end: { if(flags & match_not_eob) goto failure; iterator p(first); while((p != last) && traits::is_separator(traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst))))++p; if(p != last) goto failure; ptr = ptr->next.p; continue; } case syntax_element_restart_continue: if(first != temp_match[-1].first) goto failure; ptr = ptr->next.p; continue; default: jm_assert(0); // should never get to here!!
return false; } }
//
// if we get to here then we've run out of characters to match against,
// we could however still have non-character regex items left
if(ptr->can_be_null == 0) goto failure; while(true) { jm_assert(ptr); switch(ptr->type) { case syntax_element_match: goto match_jump; case syntax_element_startmark: temp_match.set_first(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_endmark: temp_match.set_second(first, ((re_brace*)ptr)->index); ptr = ptr->next.p; break; case syntax_element_start_line: goto outer_line_check; case syntax_element_end_line: // we're at the end so *first is never valid:
if((flags & match_not_eol) == 0) { ptr = ptr->next.p; continue; } goto failure; case syntax_element_word_boundary: case syntax_element_word_end: if(((flags & match_not_eow) == 0) && (first != temp_match[0].first)) { iterator t(first); --t; if(traits::is_class(*t, char_class_word MAYBE_PASS_LOCALE(locale_inst))) { ptr = ptr->next.p; continue; } } goto failure; case syntax_element_buffer_end: case syntax_element_soft_buffer_end: if(flags & match_not_eob) goto failure; // OK match:
ptr = ptr->next.p; break; case syntax_element_jump: ptr = ((re_jump*)ptr)->alt.p; continue; case syntax_element_alt: if(ptr->can_be_null & mask_take) { // we can test the first alternative,
// see if we need to push next alternative:
if(ptr->can_be_null & mask_skip) { if(need_push_match) matches.push(temp_match); for(k = 0; k <= cur_acc; ++k) prev_pos.push(start_loop[k]); prev_pos.push(first); prev_record.push(ptr); for(k = 0; k <= cur_acc; ++k) prev_acc.push(accumulators[k]); prev_acc.push(cur_acc); } ptr = ptr->next.p; continue; } if(ptr->can_be_null & mask_skip) { ptr = ((re_jump*)ptr)->alt.p; continue; } goto failure; // neither option is possible
case syntax_element_rep: // if we're moving to a higher id (nested repeats etc)
// zero out our accumualtors:
if(cur_acc < ((re_repeat*)ptr)->id) { cur_acc = ((re_repeat*)ptr)->id; accumulators[cur_acc] = 0; start_loop[cur_acc] = first; }
cur_acc = ((re_repeat*)ptr)->id;
// see if we can skip the repeat:
if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min) && (ptr->can_be_null & mask_skip)) { // don't push failure info, there's no point:
ptr = ((re_repeat*)ptr)->alt.p; continue; }
// otherwise see if we can take the repeat:
if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max) && ((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip))) { // move to next item in list:
++accumulators[cur_acc]; ptr = ptr->next.p; start_loop[cur_acc] = first; continue; }
// if we get here then neither option is allowed so fail:
goto failure; case syntax_element_restart_continue: if(first != temp_match[-1].first) goto failure; ptr = ptr->next.p; continue; default: goto failure; } }
failure:
if(prev_record.empty() == false) { ptr = prev_record.peek(); switch(ptr->type) { case syntax_element_alt: // get next alternative:
ptr = ((re_jump*)ptr)->alt.p; if(need_push_match) matches.pop(temp_match); prev_acc.pop(cur_acc); for(k = cur_acc; k >= 0; --k) prev_acc.pop(accumulators[k]); prev_pos.pop(first); for(k = cur_acc; k >= 0; --k) prev_pos.pop(start_loop[k]); prev_record.pop(); goto retry; case syntax_element_rep: // we're doing least number of repeats first,
// increment count and repeat again:
if(need_push_match) matches.pop(temp_match); prev_pos.pop(first); cur_acc = ((re_repeat*)ptr)->id; for(k = cur_acc; k >= 0; --k) prev_acc.pop(accumulators[k]); prev_record.pop(); if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max) goto failure; // repetions exhausted.
ptr = ptr->next.p; start_loop[cur_acc] = first; goto retry; case syntax_element_match: if(need_push_match) matches.pop(temp_match); prev_pos.pop(first); prev_record.pop(); goto retry; default: jm_assert(0); // mustn't get here!!
} }
if(match_found) return true;
// if we get to here then everything has failed
// and no match was found:
return false; } #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
} // namespace
#endif
template <class iterator> void __skip_and_inc(unsigned int& clines, iterator& last_line, iterator& first, const iterator last) { while(first != last) { if(*first == '\n') { last_line = ++first; ++clines; } else ++first; } }
template <class iterator> void __skip_and_dec(unsigned int& clines, iterator& last_line, iterator& first, iterator base, unsigned int len) { bool need_line = false; for(unsigned int i = 0; i < len; ++i) { --first; if(*first == '\n') { need_line = true; --clines; } }
if(need_line) { last_line = first;
if(last_line != base) --last_line; else return;
while((last_line != base) && (*last_line != '\n')) --last_line; if(*last_line == '\n') ++last_line; } }
template <class iterator> inline void __inc_one(unsigned int& clines, iterator& last_line, iterator& first) { if(*first == '\n') { last_line = ++first; ++clines; } else ++first; }
template <class iterator, class Allocator> struct grep_search_predicate { reg_match<iterator, Allocator>* pm; grep_search_predicate(reg_match<iterator, Allocator>* p) : pm(p) {} bool operator()(const reg_match<iterator, Allocator>& m) { *pm = static_cast<const reg_match_base<iterator, Allocator>&>(m); return false; } };
#if !defined(JM_NO_TEMPLATE_RETURNS) && !defined(JM_NO_PARTIAL_FUNC_SPEC)
template <class iterator, class Allocator> inline const reg_match_base<iterator, Allocator>& grep_out_type(const grep_search_predicate<iterator, Allocator>& o, const Allocator&) { return *(o.pm); }
#endif
template <class T, class Allocator> inline const Allocator& grep_out_type(const T&, const Allocator& a) { return a; }
#if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
//
// Ugly ugly hack,
// template don't merge if they contain switch statements so declare these
// templates in unnamed namespace (ie with internal linkage), each translation
// unit then gets its own local copy, it works seemlessly but bloats the app.
namespace{ #endif
//
// reg_grep2:
// find all non-overlapping matches within the sequence first last:
//
template <class Predicate, class I, class charT, class traits, class A, class A2> unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a) { if(e.flags() & regbase::failbit) return 0;
typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type; typedef typename is_byte<charT>::width_type width_type;
reg_match<I, A2> m(grep_out_type(foo, a)); I restart; m.set_size(e.mark_count(), first, last); m.set_line(1, first);
#ifdef RE_LOCALE_CPP
const __JM_STD::locale& locale_inst = e.locale(); #endif
unsigned int clines = 1; unsigned int cmatches = 0; I last_line = first; I next_base; I base = first; bool need_init;
flags |= match_init;
__priv_match_data<I, A2> pd(m);
const unsigned char* __map = e.get_map(); unsigned int type;
if(first == last) { // special case, only test if can_be_null,
// don't dereference any pointers!!
if(e.first()->can_be_null) if(query_match_aux(first, last, m, e, flags, pd, &restart)) { foo(m); ++cmatches; } return cmatches; }
// try one time whatever:
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type() ) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched
// trying to match again with match_not_null set if this
// is a null match...
need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); flags |= match_prev_avail | match_not_bob;
// depending on what the first record is we may be able to
// optimise the search:
type = (flags & match_continuous) ? regbase::restart_continue : e.restart_type();
if(type == regbase::restart_buf) return cmatches;
switch(type) { case regbase::restart_lit: case regbase::restart_fixed_lit: { const kmp_info<charT>* info = e.get_kmp(); int len = info->len; const charT* x = info->pstr; int j = 0; bool icase = e.flags() & regbase::icase; while (first != last) { while((j > -1) && (x[j] != traits::translate(*first, icase MAYBE_PASS_LOCALE(locale_inst)))) j = info->kmp_next[j]; __inc_one(clines, last_line, first); ++j; if(j >= len) { if(type == regbase::restart_fixed_lit) { __skip_and_dec(clines, last_line, first, base, j); restart = first; restart += len; m.set_first(first); m.set_second(restart); m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; __skip_and_inc(clines, last_line, first, restart); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); j = 0; } else { restart = first; __skip_and_dec(clines, last_line, first, base, j); if(query_match_aux(first, last, m, e, flags, pd, &restart)) {
m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched
__skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); j = 0; } else { for(int k = 0; (restart != first) && (k < j); ++k, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); j = 0; //we could do better than this...
} } } } break; } case regbase::restart_any: { while(first != last) { if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) {
m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched
// trying to match again with match_not_null set if this
// is a null match...
need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } continue; } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); } } break; case regbase::restart_word: { // do search optimised for word starts:
while(first != last) { --first; if(*first == '\n') --clines; // skip the word characters:
while((first != last) && traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst))) ++first; // now skip the white space:
while((first != last) && (traits::is_class(*first, char_class_word MAYBE_PASS_LOCALE(locale_inst)) == false)) __inc_one(clines, last_line, first); if(first == last) break;
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched
// trying to match again with match_not_null set if this
// is a null match...
need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); } } break; case regbase::restart_line: { // do search optimised for line starts:
while(first != last) { // find first charcter after a line break:
--first; if(*first == '\n') --clines; while((first != last) && (*first != '\n')) ++first; if(first == last) break; ++first; if(first == last) break;
++clines; last_line = first;
if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched
// trying to match again with match_not_null set if this
// is a null match...
need_init = true; if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else { need_init = false; for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } if(need_init) { __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); } } else { for(unsigned int i = 0; (restart != first) && (i < e.leading_length()); ++i, --restart); if(restart != last) ++restart; __skip_and_inc(clines, last_line, first, restart); } } else __inc_one(clines, last_line, first); } } break; case regbase::restart_continue: { while(first != last) { if( reg_expression<charT, traits, A>::can_start(*first, __map, (unsigned char)mask_any, width_type()) ) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; // update to end of what matched
// trying to match again with match_not_null set if this
// is a null match...
if(first == m[0].second) { next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } else return cmatches; // can't continue from null match
} __skip_and_inc(clines, last_line, first, m[0].second); next_base = m[0].second; pd.temp_match.init_fail(next_base, last); m.init_fail(next_base, last); continue; } } return cmatches; } } break; }
// finally check trailing null string:
if(e.first()->can_be_null) { if(query_match_aux(first, last, m, e, flags, pd, &restart)) { m.set_line(clines, last_line); ++cmatches; if(foo(m) == false) return cmatches; } }
return cmatches; } #if defined(JM_NO_TEMPLATE_SWITCH_MERGE) && !defined(JM_NO_NAMESPACES)
} // namespace
#endif
template <class iterator, class Allocator, class charT, class traits, class Allocator2> bool reg_search(iterator first, iterator last, reg_match<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) { if(e.flags() & regbase::failbit) return false;
typedef typename traits::size_type traits_size_type; typedef typename traits::uchar_type traits_uchar_type;
// prepare m for failure:
if((flags & match_init) == 0) { m.set_size(e.mark_count(), first, last); }
flags |= match_init; return reg_grep2(grep_search_predicate<iterator, Allocator>(&m), first, last, e, flags, m.allocator()); }
//
// reg_search convenience interfaces:
#ifndef JM_NO_PARTIAL_FUNC_SPEC
//
// this isn't really a partial specialisation, but template function
// overloading - if the compiler doesn't support partial specialisation
// then it really won't support this either:
template <class charT, class Allocator, class traits, class Allocator2> inline bool reg_search(const charT* str, reg_match<const charT*, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) { return reg_search(str, str + traits::length(str), m, e, flags); }
#ifndef JM_NO_STRING_H
template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2> inline bool reg_search(const __JM_STD::basic_string<charT, ST, SA>& s, reg_match<typename __JM_STD::basic_string<charT, ST, SA>::const_iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default) { return reg_search(s.begin(), s.end(), m, e, flags); } #endif
#else // partial specialisation
inline bool reg_search(const char* str, cmatch& m, const regex& e, unsigned flags = match_default) { return reg_search(str, str + regex::traits_type::length(str), m, e, flags); } #ifndef JM_NO_WCSTRING
inline bool reg_search(const wchar_t* str, wcmatch& m, const wregex& e, unsigned flags = match_default) { return reg_search(str, str + wregex::traits_type::length(str), m, e, flags); } #endif
#ifndef JM_NO_STRING_H
inline bool reg_search(const __JM_STD::string& s, reg_match<__JM_STD::string::const_iterator, regex::alloc_type>& m, const regex& e, unsigned flags = match_default) { return reg_search(s.begin(), s.end(), m, e, flags); } #if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING)
inline bool reg_search(const __JM_STD::basic_string<wchar_t>& s, reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>& m, const wregex& e, unsigned flags = match_default) { return reg_search(s.begin(), s.end(), m, e, flags); } #endif
#endif
#endif
//
// reg_grep:
// find all non-overlapping matches within the sequence first last:
//
template <class Predicate, class iterator, class charT, class traits, class Allocator> inline unsigned int reg_grep(Predicate foo, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) { return reg_grep2(foo, first, last, e, flags, e.allocator()); }
//
// reg_grep convenience interfaces:
#ifndef JM_NO_PARTIAL_FUNC_SPEC
//
// this isn't really a partial specialisation, but template function
// overloading - if the compiler doesn't support partial specialisation
// then it really won't support this either:
template <class Predicate, class charT, class Allocator, class traits> inline bool reg_grep(Predicate foo, const charT* str, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) { return reg_grep(foo, str, str + traits::length(str), e, flags); }
#ifndef JM_NO_STRING_H
template <class Predicate, class ST, class SA, class Allocator, class charT, class traits> inline bool reg_grep(Predicate foo, const __JM_STD::basic_string<charT, ST, SA>& s, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) { return reg_grep(foo, s.begin(), s.end(), e, flags); } #endif
#else // partial specialisation
inline bool reg_grep(bool (*foo)(const cmatch&), const char* str, const regex& e, unsigned flags = match_default) { return reg_grep(foo, str, str + regex::traits_type::length(str), e, flags); } #ifndef JM_NO_WCSTRING
inline bool reg_grep(bool (*foo)(const wcmatch&), const wchar_t* str, const wregex& e, unsigned flags = match_default) { return reg_grep(foo, str, str + wregex::traits_type::length(str), e, flags); } #endif
#ifndef JM_NO_STRING_H
inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::string::const_iterator, regex::alloc_type>&), const __JM_STD::string& s, const regex& e, unsigned flags = match_default) { return reg_grep(foo, s.begin(), s.end(), e, flags); } #if !defined(JM_NO_STRING_DEF_ARGS) && !defined(JM_NO_WCSTRING)
inline bool reg_grep(bool (*foo)(const reg_match<__JM_STD::basic_string<wchar_t>::const_iterator, wregex::alloc_type>&), const __JM_STD::basic_string<wchar_t>& s, const wregex& e, unsigned flags = match_default) { return reg_grep(foo, s.begin(), s.end(), e, flags); } #endif
#endif
#endif
//
// finally for compatablity with version 1.x of the library
// we need a form of reg_grep that takes an output iterator
// as its first argument:
//
//
// struct grep_match:
// stores what matched during a reg_grep,
// the output iterator type passed to reg_grep must have an
// operator*() that returns a type with an
// operator=(const grep_match<iterator, Allocator>&);
//
template <class iterator, class Allocator> struct grep_match { unsigned int line; iterator line_start; reg_match<iterator, Allocator> what;
grep_match(Allocator a = Allocator()) : what(a) {}
grep_match(unsigned int l, iterator p1, const reg_match<iterator, Allocator>& m) : what(m) { line = l; line_start = p1; }
bool operator == (const grep_match& ) { return false; }
bool operator < (const grep_match&) { return false; } };
template <class O, class I, class A> struct grep_adaptor { O oi; reg_match<I, A> m; grep_adaptor(O i, A a) : m(a), oi(i) {} bool operator()(const reg_match_base<I, A>& w) { m.what = w; m.line = w.line(); m.line_start = w.line_start(); *oi = m; ++oi; return true; } };
template <class Out, class iterator, class charT, class traits, class Allocator> inline unsigned int reg_grep_old(Out oi, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default) { return reg_grep2(grep_adaptor<Out, iterator, Allocator>(oi, e.allocator()), first, last, e, flags, e.allocator()); }
JM_END_NAMESPACE // namespace regex
#endif // __REGMATCH_H
|