Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 78 additions & 41 deletions src/main/java/graphql/parser/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import graphql.parser.antlr.GraphqlParser;
import graphql.parser.exceptions.ParseCancelledException;
import graphql.parser.exceptions.ParseCancelledTooDeepException;
import graphql.parser.exceptions.ParseCancelledTooManyCharsException;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CodePointCharStream;
Expand All @@ -25,6 +26,7 @@
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.jetbrains.annotations.NotNull;

import java.io.IOException;
import java.io.Reader;
Expand All @@ -33,6 +35,7 @@
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;

/**
* This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an
Expand Down Expand Up @@ -259,6 +262,57 @@ private Node<?> parseImpl(ParserEnvironment environment, BiFunction<GraphqlParse
ParserOptions parserOptions = environment.getParserOptions();
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());

MultiSourceReader multiSourceReader = setupMultiSourceReader(environment, parserOptions);

SafeTokenReader safeTokenReader = setupSafeTokenReader(environment, parserOptions, multiSourceReader);

CodePointCharStream charStream = setupCharStream(safeTokenReader);

GraphqlLexer lexer = setupGraphqlLexer(environment, multiSourceReader, charStream);

// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
SafeTokenSource safeTokenSource = getSafeTokenSource(environment, parserOptions, multiSourceReader, lexer);

CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);

GraphqlParser parser = new GraphqlParser(tokens);
parser.removeErrorListeners();
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);

ExtendedBailStrategy bailStrategy = new ExtendedBailStrategy(multiSourceReader, environment);
parser.setErrorHandler(bailStrategy);

// preserve old protected call semantics - remove at some point
GraphqlAntlrToLanguage toLanguage = getAntlrToLanguage(tokens, multiSourceReader, environment);

setupParserListener(environment, multiSourceReader, parser, toLanguage);


//
// parsing starts ...... now!
//
Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
Node<?> node = (Node<?>) contextAndNode[1];

Token stop = parserRuleContext.getStop();
List<Token> allTokens = tokens.getTokens();
if (stop != null && allTokens != null && !allTokens.isEmpty()) {
Token last = allTokens.get(allTokens.size() - 1);
//
// do we have more tokens in the stream than we consumed in the parse?
// if yes then it's invalid. We make sure it's the same channel
boolean notEOF = last.getType() != Token.EOF;
boolean lastGreaterThanDocument = last.getTokenIndex() > stop.getTokenIndex();
boolean sameChannel = last.getChannel() == stop.getChannel();
if (notEOF && lastGreaterThanDocument && sameChannel) {
throw bailStrategy.mkMoreTokensException(last);
}
}
return node;
}

private static MultiSourceReader setupMultiSourceReader(ParserEnvironment environment, ParserOptions parserOptions) {
MultiSourceReader multiSourceReader;
Reader reader = environment.getDocument();
if (reader instanceof MultiSourceReader) {
Expand All @@ -269,13 +323,31 @@ private Node<?> parseImpl(ParserEnvironment environment, BiFunction<GraphqlParse
.trackData(parserOptions.isReaderTrackData())
.build();
}
return multiSourceReader;
}

@NotNull
private static SafeTokenReader setupSafeTokenReader(ParserEnvironment environment, ParserOptions parserOptions, MultiSourceReader multiSourceReader) {
int maxCharacters = parserOptions.getMaxCharacters();
Consumer<Integer> onTooManyCharacters = it -> {
throw new ParseCancelledTooManyCharsException(environment.getI18N(), maxCharacters);
};
return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters);
}

@NotNull
private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) {
CodePointCharStream charStream;
try {
charStream = CharStreams.fromReader(multiSourceReader);
charStream = CharStreams.fromReader(safeTokenReader);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return charStream;
}

@NotNull
private static GraphqlLexer setupGraphqlLexer(ParserEnvironment environment, MultiSourceReader multiSourceReader, CodePointCharStream charStream) {
GraphqlLexer lexer = new GraphqlLexer(charStream);
lexer.removeErrorListeners();
lexer.addErrorListener(new BaseErrorListener() {
Expand All @@ -296,8 +368,11 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
throw new InvalidSyntaxException(msg, sourceLocation, null, preview, null);
}
});
return lexer;
}

// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
@NotNull
private SafeTokenSource getSafeTokenSource(ParserEnvironment environment, ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) {
int maxTokens = parserOptions.getMaxTokens();
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
Expand All @@ -306,45 +381,7 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
maxTokenCount,
multiSourceReader,
ParseCancelledException.class);
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);

CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);

GraphqlParser parser = new GraphqlParser(tokens);
parser.removeErrorListeners();
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);

ExtendedBailStrategy bailStrategy = new ExtendedBailStrategy(multiSourceReader, environment);
parser.setErrorHandler(bailStrategy);

// preserve old protected call semantics - remove at some point
GraphqlAntlrToLanguage toLanguage = getAntlrToLanguage(tokens, multiSourceReader, environment);

setupParserListener(environment, multiSourceReader, parser, toLanguage);


//
// parsing starts ...... now!
//
Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
Node<?> node = (Node<?>) contextAndNode[1];

Token stop = parserRuleContext.getStop();
List<Token> allTokens = tokens.getTokens();
if (stop != null && allTokens != null && !allTokens.isEmpty()) {
Token last = allTokens.get(allTokens.size() - 1);
//
// do we have more tokens in the stream than we consumed in the parse?
// if yes then it's invalid. We make sure it's the same channel
boolean notEOF = last.getType() != Token.EOF;
boolean lastGreaterThanDocument = last.getTokenIndex() > stop.getTokenIndex();
boolean sameChannel = last.getChannel() == stop.getChannel();
if (notEOF && lastGreaterThanDocument && sameChannel) {
throw bailStrategy.mkMoreTokensException(last);
}
}
return node;
return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
}

private void setupParserListener(ParserEnvironment environment, MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {
Expand Down
37 changes: 36 additions & 1 deletion src/main/java/graphql/parser/ParserOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,20 @@
*/
@PublicApi
public class ParserOptions {
/**
* A graphql hacking vector is to send nonsensical queries with large tokens that contain a repeated characters
* that burn lots of parsing CPU time and burn memory representing a document that won't ever execute.
* To prevent this for most users, graphql-java sets this value to 1MB.
* ANTLR parsing time is linear to the number of characters presented. The more you
* allow the longer it takes.
* <p>
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB

/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* A graphql hacking vector is to send nonsensical queries with lots of tokens that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* sets this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
* allow the longer it takes.
Expand Down Expand Up @@ -47,6 +58,7 @@ public class ParserOptions {
.captureSourceLocation(true)
.captureLineComments(true)
.readerTrackData(true)
.maxCharacters(MAX_QUERY_CHARACTERS)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.maxRuleDepth(MAX_RULE_DEPTH)
Expand All @@ -57,6 +69,7 @@ public class ParserOptions {
.captureSourceLocation(true)
.captureLineComments(false) // #comments are not useful in query parsing
.readerTrackData(true)
.maxCharacters(MAX_QUERY_CHARACTERS)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.maxRuleDepth(MAX_RULE_DEPTH)
Expand All @@ -67,6 +80,7 @@ public class ParserOptions {
.captureSourceLocation(true)
.captureLineComments(true) // #comments are useful in SDL parsing
.readerTrackData(true)
.maxCharacters(Integer.MAX_VALUE)
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
.maxWhitespaceTokens(Integer.MAX_VALUE)
.maxRuleDepth(Integer.MAX_VALUE)
Expand Down Expand Up @@ -171,6 +185,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
private final boolean captureSourceLocation;
private final boolean captureLineComments;
private final boolean readerTrackData;
private final int maxCharacters;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final int maxRuleDepth;
Expand All @@ -181,6 +196,7 @@ private ParserOptions(Builder builder) {
this.captureSourceLocation = builder.captureSourceLocation;
this.captureLineComments = builder.captureLineComments;
this.readerTrackData = builder.readerTrackData;
this.maxCharacters = builder.maxCharacters;
this.maxTokens = builder.maxTokens;
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
this.maxRuleDepth = builder.maxRuleDepth;
Expand Down Expand Up @@ -233,6 +249,18 @@ public boolean isReaderTrackData() {
return readerTrackData;
}

/**
* A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* sets this value to 1MB.
*
* @return the maximum number of characters the parser will accept, after which an exception will be thrown.
*/
public int getMaxCharacters() {
return maxCharacters;
}


/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
Expand Down Expand Up @@ -287,6 +315,7 @@ public static class Builder {
private boolean captureLineComments = true;
private boolean readerTrackData = true;
private ParsingListener parsingListener = ParsingListener.NOOP;
private int maxCharacters = MAX_QUERY_CHARACTERS;
private int maxTokens = MAX_QUERY_TOKENS;
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
private int maxRuleDepth = MAX_RULE_DEPTH;
Expand All @@ -298,6 +327,7 @@ public static class Builder {
this.captureIgnoredChars = parserOptions.captureIgnoredChars;
this.captureSourceLocation = parserOptions.captureSourceLocation;
this.captureLineComments = parserOptions.captureLineComments;
this.maxCharacters = parserOptions.maxCharacters;
this.maxTokens = parserOptions.maxTokens;
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
this.maxRuleDepth = parserOptions.maxRuleDepth;
Expand All @@ -324,6 +354,11 @@ public Builder readerTrackData(boolean readerTrackData) {
return this;
}

public Builder maxCharacters(int maxCharacters) {
this.maxCharacters = maxCharacters;
return this;
}

public Builder maxTokens(int maxTokens) {
this.maxTokens = maxTokens;
return this;
Expand Down
95 changes: 95 additions & 0 deletions src/main/java/graphql/parser/SafeTokenReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package graphql.parser;

import graphql.Internal;
import org.jetbrains.annotations.NotNull;

import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;
import java.util.function.Consumer;

/**
* This reader will only emit a maximum number of characters from it. This is used to protect us from evil input.
* <p>
* If a graphql system does not have some max HTTP input limit, then this will help protect the system. This is a limit
* of last resort. Ideally the http input should be limited, but if its not, we have this.
*/
@Internal
public class SafeTokenReader extends Reader {

private final Reader delegate;
private final int maxCharacters;
private final Consumer<Integer> whenMaxCharactersExceeded;
private int count;

public SafeTokenReader(Reader delegate, int maxCharacters, Consumer<Integer> whenMaxCharactersExceeded) {
this.delegate = delegate;
this.maxCharacters = maxCharacters;
this.whenMaxCharactersExceeded = whenMaxCharactersExceeded;
count = 0;
}

private int checkHowMany(int read, int howMany) {
if (read != -1) {
count += howMany;
if (count > maxCharacters) {
whenMaxCharactersExceeded.accept(maxCharacters);
}
}
return read;
}

@Override
public int read(char @NotNull [] buff, int off, int len) throws IOException {
int howMany = delegate.read(buff, off, len);
return checkHowMany(howMany, howMany);
}

@Override
public int read() throws IOException {
int ch = delegate.read();
return checkHowMany(ch, 1);
}

@Override
public int read(@NotNull CharBuffer target) throws IOException {
int howMany = delegate.read(target);
return checkHowMany(howMany, howMany);
}

@Override
public int read( char @NotNull [] buff) throws IOException {
int howMany = delegate.read(buff);
return checkHowMany(howMany, howMany);
}

@Override
public void close() throws IOException {
delegate.close();
}

@Override
public long skip(long n) throws IOException {
return delegate.skip(n);
}

@Override
public boolean ready() throws IOException {
return delegate.ready();
}

@Override
public boolean markSupported() {
return delegate.markSupported();
}

@Override
public void mark(int readAheadLimit) throws IOException {
delegate.mark(readAheadLimit);
}

@Override
public void reset() throws IOException {
delegate.reset();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package graphql.parser.exceptions;

import graphql.Internal;
import graphql.i18n.I18n;
import graphql.parser.InvalidSyntaxException;
import org.jetbrains.annotations.NotNull;

@Internal
public class ParseCancelledTooManyCharsException extends InvalidSyntaxException {

@Internal
public ParseCancelledTooManyCharsException(@NotNull I18n i18N, int maxCharacters) {
super(i18N.msg("ParseCancelled.tooManyChars", maxCharacters),
null, null, null, null);
}
}
Loading