Skip to content

Commit 354954c

Browse files
committed
changes based on review
1 parent e3ec67d commit 354954c

File tree

2 files changed

+37
-38
lines changed

2 files changed

+37
-38
lines changed

javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import javascript
1717
/**
1818
* A configuration for which parts of a regular expression should be considered relevant for
1919
* the different predicates in `ReDoS.qll`.
20-
* Used to adjust the computations for either superliniear or exponential backtracking.
20+
* Used to adjust the computations for either superlinear or exponential backtracking.
2121
*/
2222
abstract class ReDoSConfiguration extends string {
2323
bindingset[this]
@@ -124,12 +124,12 @@ private class RegexpCharacterConstant extends RegExpConstant {
124124
}
125125

126126
/**
127-
* Holds if `term` is the chosen cannonical representative for all terms with string representation `str`.
127+
* Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
128128
*
129-
* Using cannonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
129+
* Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
130130
* The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
131131
*/
132-
private predicate isCannonicalTerm(RegExpTerm term, string str) {
132+
private predicate isCanonicalTerm(RegExpTerm term, string str) {
133133
term =
134134
rank[1](RegExpTerm t, Location loc, File file |
135135
loc = t.getLocation() and
@@ -154,7 +154,7 @@ private newtype TInputSymbol =
154154
*/
155155
CharClass(string charClassString) {
156156
exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and
157-
exists(RegExpTerm recc | isCannonicalTerm(recc, charClassString) |
157+
exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) |
158158
recc instanceof RegExpCharacterClass and
159159
not recc.(RegExpCharacterClass).isUniversalClass()
160160
or
@@ -168,6 +168,13 @@ private newtype TInputSymbol =
168168
/** An epsilon transition in the automaton. */
169169
Epsilon()
170170

171+
/**
172+
* Gets the canonical CharClass for `term`.
173+
*/
174+
CharClass getCanonicalCharClass(RegExpTerm term) {
175+
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
176+
}
177+
171178
/**
172179
* Holds if `a` and `b` are input symbols from the same regexp.
173180
* (And not a `Dot()`, `Any()` or `Epsilon()`)
@@ -251,10 +258,7 @@ private module CharacterClasses {
251258
*/
252259
pragma[noinline]
253260
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
254-
exists(string str |
255-
isCannonicalTerm(cc, str) and
256-
exists(CharClass(str))
257-
) and
261+
exists(getCanonicalCharClass(cc)) and
258262
exists(RegExpTerm child | child = cc.getAChild() |
259263
char = child.(RegexpCharacterConstant).getValue()
260264
or
@@ -350,9 +354,7 @@ private module CharacterClasses {
350354
private class PositiveCharacterClass extends CharacterClass {
351355
RegExpCharacterClass cc;
352356

353-
PositiveCharacterClass() {
354-
exists(string str | isCannonicalTerm(cc, str) | this = CharClass(str) and not cc.isInverted())
355-
}
357+
PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
356358

357359
override string getARelevantChar() { result = getAMentionedChar(cc) }
358360

@@ -365,9 +367,7 @@ private module CharacterClasses {
365367
private class InvertedCharacterClass extends CharacterClass {
366368
RegExpCharacterClass cc;
367369

368-
InvertedCharacterClass() {
369-
exists(string str | isCannonicalTerm(cc, str) | this = CharClass(str) and cc.isInverted())
370-
}
370+
InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
371371

372372
override string getARelevantChar() {
373373
result = nextChar(getAMentionedChar(cc)) or
@@ -405,9 +405,7 @@ private module CharacterClasses {
405405
RegExpCharacterClassEscape cc;
406406

407407
PositiveCharacterClassEscape() {
408-
exists(string str | isCannonicalTerm(cc, str) |
409-
this = CharClass(str) and cc.getValue() = ["d", "s", "w"]
410-
)
408+
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
411409
}
412410

413411
override string getARelevantChar() {
@@ -442,9 +440,7 @@ private module CharacterClasses {
442440
RegExpCharacterClassEscape cc;
443441

444442
NegativeCharacterClassEscape() {
445-
exists(string str | isCannonicalTerm(cc, str) |
446-
this = CharClass(str) and cc.getValue() = ["D", "S", "W"]
447-
)
443+
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
448444
}
449445

450446
override string getARelevantChar() {
@@ -802,14 +798,14 @@ private module PrefixConstruction {
802798
result = prefix(prev) and delta(prev, Epsilon(), state)
803799
or
804800
not delta(prev, Epsilon(), state) and
805-
result = prefix(prev) + getMinimumEdgeChar(prev, state)
801+
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
806802
)
807803
}
808804

809805
/**
810-
* Gets the minimum char for which there exists a transition from `prev` to `next` in the NFA.
806+
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
811807
*/
812-
private string getMinimumEdgeChar(State prev, State next) {
808+
private string getCanonicalEdgeChar(State prev, State next) {
813809
result =
814810
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
815811
}
@@ -882,6 +878,7 @@ private module SuffixConstruction {
882878
// all edges (at least one) with some char leads to another state that is rejectable.
883879
// the `next` states might not share a common suffix, which can cause FPs.
884880
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
881+
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
885882
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
886883
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
887884
)
@@ -935,7 +932,7 @@ private module SuffixConstruction {
935932
}
936933

937934
/**
938-
* Holds if there is not edge from `s` labeled with "|", "\n", or "Z" in our NFA.
935+
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
939936
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
940937
*/
941938
private predicate hasSimpleRejectEdge(State s) {
@@ -953,6 +950,8 @@ private module SuffixConstruction {
953950
exists(string char, InputSymbol sym |
954951
char = w.charAt(i) and
955952
deltaClosed(prev, sym, result) and
953+
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
954+
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
956955
sym = getAProcessInputSymbol(char)
957956
)
958957
)
@@ -1015,8 +1014,8 @@ private string rotate(string str, int i) {
10151014
}
10161015

10171016
/**
1018-
* Holds if `term` may cause superliniear backtracking on strings containing many repetitions of `pump`.
1019-
* Gets the minimum possible string that causes superliniear backtracking.
1017+
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
1018+
* Gets the shortest string that causes superlinear backtracking.
10201019
*/
10211020
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
10221021
exists(int i, string c | s = Match(term, i) |

javascript/ql/src/semmle/javascript/security/performance/SuperlinearBackTracking.qll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@ import ReDoSUtil
4444
*/
4545

4646
/**
47-
* An instantiaion of `ReDoSConfiguration` for superliniear ReDoS.
47+
* An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
4848
*/
49-
class SuperLiniearReDoSConfiguration extends ReDoSConfiguration {
50-
SuperLiniearReDoSConfiguration() { this = "SuperLiniearReDoSConfiguration" }
49+
class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
50+
SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
5151

5252
override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
5353
}
@@ -109,23 +109,23 @@ private module FeasibleTuple {
109109
pragma[inline]
110110
predicate isFeasibleTuple(State r1, State r2, State r3) {
111111
// The first element is either inside a repetition (or the start state itself)
112-
isRepeitionOrStart(r1) and
112+
isRepetitionOrStart(r1) and
113113
// The last element is inside a repetition
114114
stateInsideRepetition(r3) and
115115
// The states are reachable in the NFA in the order r1 -> r2 -> r3
116116
delta+(r1) = r2 and
117117
delta+(r2) = r3 and
118-
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
119-
canReachATarget(r3) and
120118
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
121-
canReachABeginning(r1)
119+
canReachABeginning(r1) and
120+
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
121+
canReachATarget(r3)
122122
}
123123

124124
/**
125125
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
126126
*/
127127
pragma[noinline]
128-
private predicate isRepeitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
128+
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
129129

130130
/**
131131
* Holds if state `s` might be inside a backtracking repetition.
@@ -155,15 +155,15 @@ private module FeasibleTuple {
155155
/**
156156
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
157157
*
158-
* There is a slight implementation difference compared to the paper: this predicate require that `pivot != succ`.
158+
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
159159
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
160160
*/
161161
predicate isStartLoops(State pivot, State succ) {
162162
pivot != succ and
163163
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
164164
delta+(pivot) = succ and
165165
(
166-
pivot.getRepr() = any(InfiniteRepetitionQuantifier i)
166+
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
167167
or
168168
pivot = mkMatch(any(RegExpRoot root))
169169
)
@@ -333,7 +333,7 @@ StateTuple getAnEndTuple(State pivot, State succ) {
333333
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
334334
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
335335
*
336-
* This predicate is used by `SuperLiniearReDoSConfiguration`, and the final results are
336+
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
337337
* available in the `hasReDoSResult` predicate.
338338
*/
339339
predicate isPumpable(State pivot, State succ, string pump) {

0 commit comments

Comments
 (0)