@@ -1569,10 +1569,12 @@ public:
15691569 _Node_end_rep& operator=(const _Node_end_rep&) = delete;
15701570};
15711571
1572- struct _Loop_vals_v2_t { // storage for loop administration
1572+ template <class _Diff>
1573+ struct _Loop_vals_v3_t { // storage for loop administration
15731574 size_t _Loop_frame_idx = 0;
15741575 int _Loop_idx = 0;
15751576 unsigned int _Group_first = 0;
1577+ _Diff _Loop_length{};
15761578};
15771579
15781580class _Node_rep : public _Node_base { // node that marks the beginning of a repetition
@@ -1677,7 +1679,9 @@ enum class _Rx_unwind_ops {
16771679 _Disjunction_eval_alt_always,
16781680 _Do_nothing,
16791681 _Loop_simple_nongreedy,
1680- _Loop_simple_greedy,
1682+ _Loop_simple_greedy_firstrep,
1683+ _Loop_simple_greedy_intermediaterep,
1684+ _Loop_simple_greedy_lastrep,
16811685 _Loop_nongreedy,
16821686 _Loop_greedy,
16831687 _Loop_restore_vals,
@@ -1812,7 +1816,7 @@ public:
18121816private:
18131817 _Tgt_state_t<_It> _Tgt_state;
18141818 _Tgt_state_t<_It> _Res;
1815- vector<_Loop_vals_v2_t > _Loop_vals;
1819+ vector<_Loop_vals_v3_t<_Iter_diff_t<_It>> > _Loop_vals;
18161820 vector<_Rx_state_frame_t<_It>> _Frames;
18171821 size_t _Frames_count;
18181822
@@ -1824,7 +1828,7 @@ private:
18241828 void _Increase_complexity_count();
18251829
18261830 void _Prepare_rep(_Node_rep*);
1827- bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t *);
1831+ bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v3_t<_Iter_diff_t<_It>> *);
18281832 void _Reset_capture_groups(unsigned int _First);
18291833 _It _Do_class(_Node_base*, _It);
18301834 bool _Match_pat(_Node_base*);
@@ -2321,6 +2325,8 @@ template <class _BidIt, class _Alloc, class _Elem, class _RxTraits, class _It>
23212325bool _Regex_match1(_It _First, _It _Last, match_results<_BidIt, _Alloc>* _Matches,
23222326 const basic_regex<_Elem, _RxTraits>& _Re, regex_constants::match_flag_type _Flgs,
23232327 bool _Full) { // try to match regular expression to target text
2328+ static_assert(_Is_ranges_bidi_iter_v<_It>,
2329+ "regex_match requires bidirectional iterators or stronger. See N5014 [re.alg.match]/1.");
23242330 if (_Re._Empty()) {
23252331 return false;
23262332 }
@@ -2389,6 +2395,8 @@ _NODISCARD bool regex_match(const basic_string<_Elem, _StTraits, _StAlloc>& _Str
23892395template <class _BidIt, class _Alloc, class _Elem, class _RxTraits, class _It>
23902396bool _Regex_search2(_It _First, _It _Last, match_results<_BidIt, _Alloc>* _Matches,
23912397 const basic_regex<_Elem, _RxTraits>& _Re, regex_constants::match_flag_type _Flgs, _It _Org) {
2398+ static_assert(_Is_ranges_bidi_iter_v<_It>,
2399+ "regex_search requires bidirectional iterators or stronger. See N5014 [re.alg.search]/1.");
23922400 // search for regular expression match in target text
23932401 if (_Re._Empty()) {
23942402 return false;
@@ -2491,6 +2499,8 @@ _NODISCARD bool regex_search(const basic_string<_Elem, _StTraits, _StAlloc>& _St
24912499template <class _OutIt, class _BidIt, class _RxTraits, class _Elem, class _Traits, class _Alloc>
24922500_OutIt _Regex_replace1(_OutIt _Result, _BidIt _First, _BidIt _Last, const basic_regex<_Elem, _RxTraits>& _Re,
24932501 const basic_string<_Elem, _Traits, _Alloc>& _Fmt, regex_constants::match_flag_type _Flgs) {
2502+ static_assert(_Is_ranges_bidi_iter_v<_BidIt>,
2503+ "regex_replace requires bidirectional iterators or stronger. See N5014 [re.alg.replace].");
24942504 // search and replace
24952505 match_results<_BidIt> _Matches;
24962506 _BidIt _Pos = _First;
@@ -3422,7 +3432,7 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_coun
34223432
34233433template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34243434void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _Node) {
3425- _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3435+ const auto _Psav = &_Loop_vals[_Node->_Loop_number];
34263436
34273437 // Determine first capture group in repetition for later capture group reset, if not done so previously.
34283438 // No capture group reset is performed for POSIX regexes,
@@ -3436,7 +3446,7 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _
34363446
34373447template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34383448bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Find_first_inner_capture_group(
3439- _Node_base* _Nx, _Loop_vals_v2_t * _Loop_state) {
3449+ _Node_base* _Nx, _Loop_vals_v3_t<_Iter_diff_t<_It>> * _Loop_state) {
34403450 if (0 < _Max_stack_count && --_Max_stack_count <= 0) {
34413451 _Xregex_error(regex_constants::error_stack);
34423452 }
@@ -3491,8 +3501,8 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Find_first_inner_capture
34913501
34923502 case _N_rep:
34933503 {
3494- _Node_rep* _Inner_rep = static_cast<_Node_rep*>(_Nx);
3495- _Loop_vals_v2_t* _Inner_loop_state = &_Loop_vals[_Inner_rep->_Loop_number];
3504+ const auto _Inner_rep = static_cast<_Node_rep*>(_Nx);
3505+ const auto _Inner_loop_state = &_Loop_vals[_Inner_rep->_Loop_number];
34963506 if (_Find_first_inner_capture_group(_Inner_rep->_Next, _Inner_loop_state)) {
34973507 _Loop_state->_Group_first = _Inner_loop_state->_Group_first;
34983508 _Found_group = true;
@@ -4078,15 +4088,16 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
40784088 auto& _Sav = _Loop_vals[_Node->_Loop_number];
40794089
40804090 if (_Node->_Simple_loop == 1) {
4081- _Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing, nullptr );
4091+ _Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing, _Node );
40824092 _Increase_complexity_count();
40834093 if (_Node->_Min > 0 || (_Greedy && !_Longest && _Node->_Max != 0)) { // try a rep first
40844094 _Sav._Loop_idx = 1;
40854095 // _Next is already assigned correctly for matching a rep
40864096
40874097 // set up stack unwinding for greedy matching if no rep is allowed
40884098 if (_Node->_Min == 0) {
4089- _Push_frame(_Rx_unwind_ops::_Loop_simple_greedy, _Node);
4099+ auto& _Frame = _Frames[_Sav._Loop_frame_idx];
4100+ _Frame._Code = _Rx_unwind_ops::_Loop_simple_greedy_firstrep;
40904101 }
40914102 } else { // try tail first
40924103 _Sav._Loop_idx = 0;
@@ -4136,37 +4147,79 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41364147 auto& _Sav = _Loop_vals[_Nr->_Loop_number];
41374148 bool _Greedy = (_Nr->_Flags & _Fl_greedy) != 0;
41384149 if (_Nr->_Simple_loop != 0) {
4139- if (_Sav._Loop_idx == 1
4140- && _Tgt_state._Cur == _Frames[_Sav._Loop_frame_idx]._Pos) { // initial match empty
4141- // loop is branchless, so it will only ever match empty strings
4142- // -> we only try tail for POSIX or if minimum number of reps is non-zero
4143- // _Next is already assigned correctly for matching tail
4144-
4145- if (!(_Sflags & regex_constants::_Any_posix) && _Nr->_Min == 0) {
4146- _Failed = true;
4150+ if (_Sav._Loop_idx == 1) {
4151+ auto& _Base_frame = _Frames[_Sav._Loop_frame_idx];
4152+ _Sav._Loop_length = _STD distance(_Base_frame._Pos, _Tgt_state._Cur);
4153+
4154+ if (_Sav._Loop_length == _Iter_diff_t<_It>{}) { // initial match empty
4155+ // loop is branchless, so it will only ever match empty strings
4156+ // -> we only try tail for POSIX or if minimum number of reps is non-zero
4157+ // _Next is already assigned correctly for matching tail
4158+ if (!(_Sflags & regex_constants::_Any_posix) && _Nr->_Min == 0) {
4159+ _Failed = true;
4160+ } else {
4161+ _Increase_complexity_count();
4162+ }
4163+ break;
41474164 }
4148- } else if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
4165+
4166+ // allocate stack frame holding loop-specific unwinding opcode for second rep and beyond
4167+ auto _New_frame_code = _Base_frame._Code == _Rx_unwind_ops::_Loop_simple_greedy_firstrep
4168+ ? _Rx_unwind_ops::_Loop_simple_greedy_lastrep
4169+ : _Rx_unwind_ops::_Do_nothing;
4170+ auto _New_frame_idx = _Push_frame(_New_frame_code, _Nr);
4171+ _Frames[_New_frame_idx]._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4172+ _Sav._Loop_frame_idx = _New_frame_idx;
4173+ } else { // discard stack frames for capturing group changes generated by this rep
4174+ _Frames_count = _Sav._Loop_frame_idx + 1U;
4175+ }
4176+
4177+ if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
41494178 _Next = _Nr->_Next;
41504179 ++_Sav._Loop_idx;
4151- } else if (_Greedy && !_Longest && _Sav._Loop_idx != _Nr->_Max) { // one more rep to try next
4152- // set up stack unwinding for greedy matching
4153- _Push_frame(_Rx_unwind_ops::_Loop_simple_greedy, _Nr);
4180+ } else if (_Greedy && !_Longest) { // greedy matching
4181+ auto& _Frame = _Frames[_Sav._Loop_frame_idx];
4182+ if (_Frame._Code == _Rx_unwind_ops::_Do_nothing) { // min reps reached
4183+ _Frame._Code = _Rx_unwind_ops::_Loop_simple_greedy_lastrep;
4184+ // set iterator in base frame to start of prior rep
4185+ // (so to start of rep before reaching min reps)
4186+ auto& _Before_unwind_pos = _Frames[_Frame._Loop_frame_idx_sav]._Pos;
4187+ _Before_unwind_pos = _Tgt_state._Cur;
4188+ _STD advance(_Before_unwind_pos, -_Sav._Loop_length);
4189+ } else {
4190+ _STL_INTERNAL_CHECK(_Frame._Code == _Rx_unwind_ops::_Loop_simple_greedy_lastrep);
4191+ }
4192+ _Frame._Pos = _Tgt_state._Cur;
41544193
4155- _Next = _Nr->_Next;
4156- if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
4157- ++_Sav._Loop_idx;
4194+ if (_Sav._Loop_idx != _Nr->_Max) { // try one more rep
4195+ _Next = _Nr->_Next;
4196+ if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
4197+ ++_Sav._Loop_idx;
4198+ }
4199+ } else { // try tail
4200+ _STD advance(_Frame._Pos, -_Sav._Loop_length);
4201+ if (_Frame._Pos != _Frames[_Frame._Loop_frame_idx_sav]._Pos) {
4202+ // capturing groups must be shifted when backtracking from tail
4203+ _Frame._Code = _Rx_unwind_ops::_Loop_simple_greedy_intermediaterep;
4204+ } else {
4205+ --_Frames_count;
4206+ }
4207+ // _Next is already assigned correctly for matching tail
41584208 }
4159- } else { // non-greedy matching or greedy matching with maximum reached
4209+ } else { // non-greedy matching
41604210 // set up stack unwinding for non-greedy matching if one more rep is allowed
41614211 if (_Sav._Loop_idx != _Nr->_Max) {
4162- _Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Nr);
4212+ auto& _Frame = _Frames[_Sav._Loop_frame_idx];
4213+ _Frame._Pos = _Tgt_state._Cur;
4214+ _Frame._Code = _Rx_unwind_ops::_Loop_simple_nongreedy;
4215+ _Frame._Node = _Nr;
4216+ } else {
4217+ --_Frames_count;
41634218 }
41644219 // _Next is already assigned correctly for matching tail
41654220 }
41664221
4167- if (!_Failed) {
4168- _Increase_complexity_count();
4169- }
4222+ _Increase_complexity_count();
41704223 } else {
41714224 const bool _Progress = _Frames[_Sav._Loop_frame_idx]._Pos != _Tgt_state._Cur;
41724225 if (_Sav._Loop_idx < _Nr->_Min) { // try another required match
@@ -4327,8 +4380,8 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
43274380 }
43284381 break;
43294382
4330- case _Rx_unwind_ops::_Loop_simple_greedy :
4331- // try tail if matching one more rep failed
4383+ case _Rx_unwind_ops::_Loop_simple_greedy_firstrep :
4384+ // try tail after backtracking from first rep
43324385 if (_Failed) {
43334386 auto _Node = static_cast<_Node_rep*>(_Frame._Node);
43344387
@@ -4339,6 +4392,50 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
43394392 }
43404393 break;
43414394
4395+ case _Rx_unwind_ops::_Loop_simple_greedy_intermediaterep:
4396+ // shift capturing groups, set up unwinding prior rep and try tail
4397+ // when backtracking between the second and the last attempted rep
4398+ if (_Failed) {
4399+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4400+
4401+ // adjust capturing group begin and end iterators by rep length
4402+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4403+ for (auto _Capture_frame_idx = _Frame._Loop_frame_idx_sav + 1U; _Capture_frame_idx != _Frames_count;
4404+ ++_Capture_frame_idx) {
4405+ const auto& _Capture_frame = _Frames[_Capture_frame_idx];
4406+ _STL_INTERNAL_CHECK(_Capture_frame._Code == _Rx_unwind_ops::_Capture_restore_begin
4407+ || _Capture_frame._Code == _Rx_unwind_ops::_Capture_restore_matched_end
4408+ || _Capture_frame._Code == _Rx_unwind_ops::_Capture_restore_unmatched_end);
4409+ auto& _Grp = _Tgt_state._Grps[_Capture_frame._Capture_idx];
4410+ _STD advance(
4411+ _Capture_frame._Code == _Rx_unwind_ops::_Capture_restore_begin ? _Grp._Begin : _Grp._End,
4412+ -_Sav._Loop_length);
4413+ }
4414+ }
4415+ _FALLTHROUGH;
4416+
4417+ case _Rx_unwind_ops::_Loop_simple_greedy_lastrep:
4418+ // set up unwinding prior rep and try tail
4419+ // when backtracking from last attempted rep
4420+ if (_Failed) {
4421+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4422+
4423+ _Increase_complexity_count();
4424+ _Nx = _Node->_End_rep->_Next;
4425+ _Tgt_state._Cur = _Frame._Pos;
4426+ _Failed = false;
4427+
4428+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4429+ _STD advance(_Frame._Pos, -_Sav._Loop_length);
4430+
4431+ // set up unwinding if prior rep is not first or minimum rep
4432+ if (_Frames[_Frame._Loop_frame_idx_sav]._Pos != _Frame._Pos) {
4433+ _Frame._Code = _Rx_unwind_ops::_Loop_simple_greedy_intermediaterep;
4434+ ++_Frames_count;
4435+ }
4436+ }
4437+ break;
4438+
43424439 case _Rx_unwind_ops::_Loop_greedy:
43434440 // try tail if matching one more rep failed
43444441 if (_Failed) {
0 commit comments