@@ -51,40 +51,16 @@ public static CharSequence rewrite(CharSequence regex, boolean replaceAnchors) {
5151 @ SuppressWarnings ({"CyclomaticComplexity" , "NPathComplexity" })
5252 static CharSequence replaceAnchors (CharSequence input ) {
5353 StringBuilder result = new StringBuilder ();
54- boolean inCharClass = false ;
55- boolean inLiteral = false ;
56- int backslashCount = 0 ;
54+ RegexParseState parse = new RegexParseState (input );
5755
5856 for (int i = 0 ; i < input .length (); i ++) {
59- char c = input .charAt (i );
60-
61- // Count the number of backslashes preceding this character
62- boolean escaped = (backslashCount % 2 ) != 0 ;
63- if (c == '\\' ) {
64- backslashCount += 1 ;
65- } else {
66- backslashCount = 0 ;
67- }
57+ char c = parse .next (i );
6858
69- if (!inLiteral && !inCharClass && !escaped && c == '"' ) {
70- inLiteral = true ;
71- } else if (inLiteral && c == '"' ) {
72- inLiteral = false ;
73- }
74-
75- if (!inLiteral && !escaped ) {
76- if (c == '[' ) {
77- inCharClass = true ;
78- } else if (c == ']' && inCharClass ) {
79- inCharClass = false ;
80- }
81- }
82-
83- if (inLiteral ) {
59+ if (parse .inLiteral ) {
8460 result .append (c );
85- } else if (!inCharClass && !escaped && c == '^' ) {
61+ } else if (!parse . inCharClass && !parse . escaped && c == '^' ) {
8662 result .append (START_ANCHOR_MARKER );
87- } else if (!inCharClass && !escaped && c == '$' ) {
63+ } else if (!parse . inCharClass && !parse . escaped && c == '$' ) {
8864 result .append (END_ANCHOR_MARKER );
8965 } else {
9066 result .append (c );
@@ -134,54 +110,77 @@ private static String expandCharClass(CharSequence charClass) {
134110 @ SuppressFBWarnings (value = "MUI_CONTAINSKEY_BEFORE_GET" , justification = "More obviously correct this way" )
135111 static CharSequence replaceCharClasses (CharSequence input ) {
136112 StringBuilder result = new StringBuilder ();
113+ RegexParseState parse = new RegexParseState (input );
137114 int charClassStart = -1 ;
138- boolean inCharClass = false ;
139- boolean inLiteral = false ;
140- int backslashCount = 0 ;
141115
142116 for (int i = 0 ; i < input .length (); i ++) {
143- char c = input . charAt (i );
117+ char c = parse . next (i );
144118
145- // Count the number of backslashes preceding this character
146- boolean escaped = (backslashCount % 2 ) != 0 ;
147- if (c == '\\' ) {
148- backslashCount += 1 ;
149- } else {
150- backslashCount = 0 ;
151- }
152-
153- if (!inLiteral && !inCharClass && !escaped && c == '"' ) {
154- inLiteral = true ;
155- } else if (inLiteral && c == '"' ) {
156- inLiteral = false ;
157- }
158-
159- if (inLiteral ) {
119+ if (parse .inLiteral ) {
160120 result .append (c );
161- } else if (!inCharClass && !escaped && c == '[' ) {
162- inCharClass = true ;
121+ } else if (parse .inCharClass && charClassStart == -1 ) {
163122 charClassStart = i + 1 ;
164- } else if (inCharClass && ! escaped && c == ']' ) {
123+ } else if (! parse . inCharClass && charClassStart != - 1 ) {
165124 // While expansion could be mixed into this function, it does a similar walk, it
166125 // seemed less confusing to separate into a dedicated routine.
167126 result .append (expandCharClass (input .subSequence (charClassStart , i )));
168- inCharClass = false ;
169127 charClassStart = -1 ;
170- } else if (!inCharClass && !escaped && c == '.' ) {
128+ } else if (!parse . inCharClass && !parse . escaped && c == '.' ) {
171129 // . must not match the anchors
172130 result .append ("[^" ).append (START_ANCHOR_MARKER ).append (END_ANCHOR_MARKER ).append (']' );
173- } else if (!inCharClass && escaped && CHAR_CLASSES .containsKey (c )) {
131+ } else if (!parse . inCharClass && parse . escaped && CHAR_CLASSES .containsKey (c )) {
174132 result .setLength (result .length () - 1 );
175133 result .append ('[' ).append (CHAR_CLASSES .get (c )).append (']' );
176- } else if (!inCharClass ) {
134+ } else if (!parse . inCharClass ) {
177135 result .append (c );
178136 }
179137 }
180- if (inCharClass ) {
138+ if (parse . inCharClass ) {
181139 // unclosed char class
182140 result .append ('[' ).append (input .subSequence (charClassStart , input .length ()));
183141 }
184142
185143 return result .toString ();
186144 }
145+
146+ private static class RegexParseState {
147+ final CharSequence input ;
148+ boolean escaped ;
149+ boolean inCharClass ;
150+ boolean inLiteral ;
151+ int backslashCount ;
152+
153+ RegexParseState (CharSequence input ) {
154+ this .input = input ;
155+ }
156+
157+ @ SuppressWarnings ({"CyclomaticComplexity" })
158+ char next (int i ) {
159+ char c = input .charAt (i );
160+
161+ // Count the number of backslashes preceding this character
162+ escaped = (backslashCount % 2 ) != 0 ;
163+ if (c == '\\' ) {
164+ backslashCount += 1 ;
165+ } else {
166+ backslashCount = 0 ;
167+ }
168+
169+ if (!inLiteral && !inCharClass && !escaped && c == '"' ) {
170+ inLiteral = true ;
171+ } else if (inLiteral && c == '"' ) {
172+ inLiteral = false ;
173+ }
174+
175+ if (!inLiteral && !escaped ) {
176+ if (c == '[' ) {
177+ inCharClass = true ;
178+ } else if (c == ']' && inCharClass ) {
179+ inCharClass = false ;
180+ }
181+ }
182+
183+ return c ;
184+ }
185+ }
187186}
0 commit comments