@@ -144,6 +144,48 @@ class BacktrackStack {
144144 DISALLOW_COPY_AND_ASSIGN (BacktrackStack);
145145};
146146
147+ // Registers used during interpreter execution. These consist of output
148+ // registers in indices [0, output_register_count[ which will contain matcher
149+ // results as a {start,end} index tuple for each capture (where the whole match
150+ // counts as implicit capture 0); and internal registers in indices
151+ // [output_register_count, total_register_count[.
152+ class InterpreterRegisters {
153+ public:
154+ using RegisterT = int ;
155+
156+ InterpreterRegisters (int total_register_count, RegisterT* output_registers,
157+ int output_register_count)
158+ : registers_(total_register_count),
159+ output_registers_ (output_registers),
160+ output_register_count_(output_register_count) {
161+ // TODO(jgruber): Use int32_t consistently for registers. Currently, CSA
162+ // uses int32_t while runtime uses int.
163+ STATIC_ASSERT (sizeof (int ) == sizeof (int32_t ));
164+ DCHECK_GE (output_register_count, 2 ); // At least 2 for the match itself.
165+ DCHECK_GE (total_register_count, output_register_count);
166+ DCHECK_LE (total_register_count, RegExpMacroAssembler::kMaxRegisterCount );
167+ DCHECK_NOT_NULL (output_registers);
168+
169+ // Initialize the output register region to -1 signifying 'no match'.
170+ std::memset (registers_.data (), -1 ,
171+ output_register_count * sizeof (RegisterT));
172+ }
173+
174+ const RegisterT& operator [](size_t index) const { return registers_[index]; }
175+ RegisterT& operator [](size_t index) { return registers_[index]; }
176+
177+ void CopyToOutputRegisters () {
178+ MemCopy (output_registers_, registers_.data (),
179+ output_register_count_ * sizeof (RegisterT));
180+ }
181+
182+ private:
183+ static constexpr int kStaticCapacity = 64 ; // Arbitrary.
184+ base::SmallVector<RegisterT, kStaticCapacity > registers_;
185+ RegisterT* const output_registers_;
186+ const int output_register_count_;
187+ };
188+
147189IrregexpInterpreter::Result ThrowStackOverflow (Isolate* isolate,
148190 RegExp::CallOrigin call_origin) {
149191 CHECK (call_origin == RegExp::CallOrigin::kFromRuntime );
@@ -305,12 +347,12 @@ bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
305347#endif // DEBUG
306348
307349template <typename Char>
308- IrregexpInterpreter::Result RawMatch (Isolate* isolate, ByteArray code_array,
309- String subject_string,
310- Vector<const Char> subject, int * registers ,
311- int current, uint32_t current_char ,
312- RegExp::CallOrigin call_origin,
313- const uint32_t backtrack_limit) {
350+ IrregexpInterpreter::Result RawMatch (
351+ Isolate* isolate, ByteArray code_array, String subject_string,
352+ Vector<const Char> subject, int * output_registers ,
353+ int output_register_count, int total_register_count, int current ,
354+ uint32_t current_char, RegExp::CallOrigin call_origin,
355+ const uint32_t backtrack_limit) {
314356 DisallowHeapAllocation no_gc;
315357
316358#if V8_USE_COMPUTED_GOTO
@@ -364,6 +406,8 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
364406 const byte* pc = code_array.GetDataStartAddress ();
365407 const byte* code_base = pc;
366408
409+ InterpreterRegisters registers (total_register_count, output_registers,
410+ output_register_count);
367411 BacktrackStack backtrack_stack;
368412
369413 uint32_t backtrack_count = 0 ;
@@ -471,6 +515,7 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
471515 BYTECODE (SUCCEED ) {
472516 isolate->counters ()->regexp_backtracks ()->AddSample (
473517 static_cast <int >(backtrack_count));
518+ registers.CopyToOutputRegisters ();
474519 return IrregexpInterpreter::SUCCESS ;
475520 }
476521 BYTECODE (ADVANCE_CP ) {
@@ -952,24 +997,25 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
952997
953998// static
954999IrregexpInterpreter::Result IrregexpInterpreter::Match (
955- Isolate* isolate, JSRegExp regexp, String subject_string, int * registers,
956- int registers_length, int start_position, RegExp::CallOrigin call_origin) {
957- if (FLAG_regexp_tier_up) {
958- regexp.TierUpTick ();
959- }
1000+ Isolate* isolate, JSRegExp regexp, String subject_string,
1001+ int * output_registers, int output_register_count, int start_position,
1002+ RegExp::CallOrigin call_origin) {
1003+ if (FLAG_regexp_tier_up) regexp.TierUpTick ();
9601004
9611005 bool is_one_byte = String::IsOneByteRepresentationUnderneath (subject_string);
9621006 ByteArray code_array = ByteArray::cast (regexp.Bytecode (is_one_byte));
1007+ int total_register_count = regexp.MaxRegisterCount ();
9631008
964- return MatchInternal (isolate, code_array, subject_string, registers ,
965- registers_length, start_position, call_origin ,
966- regexp.BacktrackLimit ());
1009+ return MatchInternal (isolate, code_array, subject_string, output_registers ,
1010+ output_register_count, total_register_count ,
1011+ start_position, call_origin, regexp.BacktrackLimit ());
9671012}
9681013
9691014IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal (
9701015 Isolate* isolate, ByteArray code_array, String subject_string,
971- int * registers, int registers_length, int start_position,
972- RegExp::CallOrigin call_origin, uint32_t backtrack_limit) {
1016+ int * output_registers, int output_register_count, int total_register_count,
1017+ int start_position, RegExp::CallOrigin call_origin,
1018+ uint32_t backtrack_limit) {
9731019 DCHECK (subject_string.IsFlat ());
9741020
9751021 // Note: Heap allocation *is* allowed in two situations if calling from
@@ -980,27 +1026,23 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
9801026 // after interrupts have run.
9811027 DisallowHeapAllocation no_gc;
9821028
983- // Reset registers to -1 (=undefined).
984- // This is necessary because registers are only written when a
985- // capture group matched.
986- // Resetting them ensures that previous matches are cleared.
987- memset (registers, -1 , sizeof (registers[0 ]) * registers_length);
988-
9891029 uc16 previous_char = ' \n ' ;
9901030 String::FlatContent subject_content = subject_string.GetFlatContent (no_gc);
9911031 if (subject_content.IsOneByte ()) {
9921032 Vector<const uint8_t > subject_vector = subject_content.ToOneByteVector ();
9931033 if (start_position != 0 ) previous_char = subject_vector[start_position - 1 ];
9941034 return RawMatch (isolate, code_array, subject_string, subject_vector,
995- registers, start_position, previous_char, call_origin,
996- backtrack_limit);
1035+ output_registers, output_register_count,
1036+ total_register_count, start_position, previous_char,
1037+ call_origin, backtrack_limit);
9971038 } else {
9981039 DCHECK (subject_content.IsTwoByte ());
9991040 Vector<const uc16> subject_vector = subject_content.ToUC16Vector ();
10001041 if (start_position != 0 ) previous_char = subject_vector[start_position - 1 ];
10011042 return RawMatch (isolate, code_array, subject_string, subject_vector,
1002- registers, start_position, previous_char, call_origin,
1003- backtrack_limit);
1043+ output_registers, output_register_count,
1044+ total_register_count, start_position, previous_char,
1045+ call_origin, backtrack_limit);
10041046 }
10051047}
10061048
@@ -1009,11 +1051,11 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
10091051// This method is called through an external reference from RegExpExecInternal
10101052// builtin.
10111053IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs (
1012- Address subject, int32_t start_position, Address, Address, int * registers,
1013- int32_t registers_length, Address, RegExp::CallOrigin call_origin ,
1014- Isolate* isolate, Address regexp) {
1054+ Address subject, int32_t start_position, Address, Address,
1055+ int * output_registers, int32_t output_register_count, Address ,
1056+ RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
10151057 DCHECK_NOT_NULL (isolate);
1016- DCHECK_NOT_NULL (registers );
1058+ DCHECK_NOT_NULL (output_registers );
10171059 DCHECK (call_origin == RegExp::CallOrigin::kFromJs );
10181060
10191061 DisallowHeapAllocation no_gc;
@@ -1028,38 +1070,18 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
10281070 return IrregexpInterpreter::RETRY ;
10291071 }
10301072
1031- // In generated code, registers are allocated on the stack. The given
1032- // `registers` argument is only guaranteed to hold enough space for permanent
1033- // registers (i.e. for captures), and not for temporary registers used only
1034- // during matcher execution. We match that behavior in the interpreter by
1035- // using a SmallVector as internal register storage.
1036- static constexpr int kBaseRegisterArraySize = 64 ; // Arbitrary.
1037- const int internal_register_count =
1038- Smi::ToInt (regexp_obj.DataAt (JSRegExp::kIrregexpMaxRegisterCountIndex ));
1039- base::SmallVector<int , kBaseRegisterArraySize > internal_registers (
1040- internal_register_count);
1041-
1042- Result result =
1043- Match (isolate, regexp_obj, subject_string, internal_registers.data (),
1044- internal_register_count, start_position, call_origin);
1045-
1046- // Copy capture registers to the output array.
1047- if (result == IrregexpInterpreter::SUCCESS ) {
1048- CHECK_GE (internal_registers.size (), registers_length);
1049- MemCopy (registers, internal_registers.data (),
1050- registers_length * sizeof (registers[0 ]));
1051- }
1052-
1053- return result;
1073+ return Match (isolate, regexp_obj, subject_string, output_registers,
1074+ output_register_count, start_position, call_origin);
10541075}
10551076
10561077#endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
10571078
10581079IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime (
10591080 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
1060- int * registers, int registers_length, int start_position) {
1061- return Match (isolate, *regexp, *subject_string, registers, registers_length,
1062- start_position, RegExp::CallOrigin::kFromRuntime );
1081+ int * output_registers, int output_register_count, int start_position) {
1082+ return Match (isolate, *regexp, *subject_string, output_registers,
1083+ output_register_count, start_position,
1084+ RegExp::CallOrigin::kFromRuntime );
10631085}
10641086
10651087} // namespace internal
0 commit comments