Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package graphql.parser;

import graphql.Internal;

@Internal
public class ParseCancelledTooManyCharsException extends InvalidSyntaxException {

@Internal
public ParseCancelledTooManyCharsException(String msg, int maxCharacters) {
super(null, msg, null, null, null);
}
}
109 changes: 73 additions & 36 deletions src/main/java/graphql/parser/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.jetbrains.annotations.NotNull;

import java.io.IOException;
import java.io.Reader;
Expand All @@ -29,17 +30,18 @@
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;

/**
* This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an
* Abstract Syntax Tree (AST) represented by a {@link Document}
* <p>
* You should not generally need to call this class as the {@link graphql.GraphQL} code sets this up for you
* but if you are doing specific graphql utilities this class is essential.
*
* <p>
* Graphql syntax has a series of characters, such as spaces, new lines and commas that are not considered relevant
* to the syntax. However they can be captured and associated with the AST elements they belong to.
*
* <p>
* This costs more memory but for certain use cases (like editors) this maybe be useful. We have chosen to no capture
* ignored characters by default but you can turn this on, either per parse or statically for the whole JVM
* via {@link ParserOptions#setDefaultParserOptions(ParserOptions)} ()}}
Expand Down Expand Up @@ -205,43 +207,18 @@ private Type<?> parseTypeImpl(String input) throws InvalidSyntaxException {
}

private Node<?> parseImpl(Reader reader, BiFunction<GraphqlParser, GraphqlAntlrToLanguage, Object[]> nodeFunction, ParserOptions parserOptions) throws InvalidSyntaxException {
MultiSourceReader multiSourceReader;
if (reader instanceof MultiSourceReader) {
multiSourceReader = (MultiSourceReader) reader;
} else {
multiSourceReader = MultiSourceReader.newMultiSourceReader()
.reader(reader, null).build();
}
CodePointCharStream charStream;
try {
charStream = CharStreams.fromReader(multiSourceReader);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());

GraphqlLexer lexer = new GraphqlLexer(charStream);
lexer.removeErrorListeners();
lexer.addErrorListener(new BaseErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
String preview = AntlrHelper.createPreview(multiSourceReader, line);
throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
}
});
MultiSourceReader multiSourceReader = setupMultiSourceReader(reader, parserOptions);

// default in the parser options if they are not set
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
SafeTokenReader safeTokenReader = setupSafeTokenReader(parserOptions, multiSourceReader);

CodePointCharStream charStream = setupCharStream(safeTokenReader);

GraphqlLexer lexer = setupGraphqlLexer(multiSourceReader, charStream);

// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
int maxTokens = parserOptions.getMaxTokens();
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
token,
maxTokenCount,
multiSourceReader,
ParseCancelledException.class);
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
SafeTokenSource safeTokenSource = getSafeTokenSource(parserOptions, multiSourceReader, lexer);

CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);

Expand Down Expand Up @@ -285,6 +262,65 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
return node;
}

private static MultiSourceReader setupMultiSourceReader(Reader reader, ParserOptions parserOptions) {
MultiSourceReader multiSourceReader;
if (reader instanceof MultiSourceReader) {
multiSourceReader = (MultiSourceReader) reader;
} else {
multiSourceReader = MultiSourceReader.newMultiSourceReader()
.reader(reader, null).build();
}
return multiSourceReader;
}

@NotNull
private static SafeTokenReader setupSafeTokenReader(ParserOptions parserOptions, MultiSourceReader multiSourceReader) {
int maxCharacters = parserOptions.getMaxCharacters();
Consumer<Integer> onTooManyCharacters = it -> {
String msg = String.format("More than %d characters have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxCharacters);
throw new ParseCancelledTooManyCharsException(msg, maxCharacters);
};
return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters);
}

@NotNull
private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) {
CodePointCharStream charStream;
try {
charStream = CharStreams.fromReader(safeTokenReader);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return charStream;
}

@NotNull
private static GraphqlLexer setupGraphqlLexer(MultiSourceReader multiSourceReader, CodePointCharStream charStream) {
GraphqlLexer lexer = new GraphqlLexer(charStream);
lexer.removeErrorListeners();
lexer.addErrorListener(new BaseErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
String preview = AntlrHelper.createPreview(multiSourceReader, line);
throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
}
});
return lexer;
}

@NotNull
private SafeTokenSource getSafeTokenSource(ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) {
int maxTokens = parserOptions.getMaxTokens();
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
token,
maxTokenCount,
multiSourceReader,
ParseCancelledException.class);
return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
}

private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {
ParserOptions parserOptions = toLanguage.getParserOptions();
ParsingListener parsingListener = parserOptions.getParsingListener();
Expand Down Expand Up @@ -365,7 +401,8 @@ private void throwIfTokenProblems(Token token, int maxLimit, MultiSourceReader m
throw new ParseCancelledTooDeepException(msg, sourceLocation, offendingToken, maxLimit, tokenType);
}
String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxLimit, tokenType);
throw new ParseCancelledException(msg, sourceLocation, offendingToken); }
throw new ParseCancelledException(msg, sourceLocation, offendingToken);
}

/**
* Allows you to override the ANTLR to AST code.
Expand Down
37 changes: 36 additions & 1 deletion src/main/java/graphql/parser/ParserOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,20 @@
*/
@PublicApi
public class ParserOptions {
/**
* A graphql hacking vector is to send nonsensical queries with large tokens that contain a repeated characters
* that burn lots of parsing CPU time and burn memory representing a document that won't ever execute.
* To prevent this for most users, graphql-java sets this value to 1MB.
* ANTLR parsing time is linear to the number of characters presented. The more you
* allow the longer it takes.
* <p>
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB

/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* A graphql hacking vector is to send nonsensical queries with lots of tokens that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* sets this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
* allow the longer it takes.
Expand Down Expand Up @@ -46,6 +57,7 @@ public class ParserOptions {
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true)
.maxCharacters(MAX_QUERY_CHARACTERS)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.maxRuleDepth(MAX_RULE_DEPTH)
Expand All @@ -55,6 +67,7 @@ public class ParserOptions {
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(false) // #comments are not useful in query parsing
.maxCharacters(MAX_QUERY_CHARACTERS)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.maxRuleDepth(MAX_RULE_DEPTH)
Expand All @@ -64,6 +77,7 @@ public class ParserOptions {
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true) // #comments are useful in SDL parsing
.maxCharacters(Integer.MAX_VALUE)
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
.maxWhitespaceTokens(Integer.MAX_VALUE)
.maxRuleDepth(Integer.MAX_VALUE)
Expand Down Expand Up @@ -167,6 +181,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
private final boolean captureIgnoredChars;
private final boolean captureSourceLocation;
private final boolean captureLineComments;
private final int maxCharacters;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final int maxRuleDepth;
Expand All @@ -176,6 +191,7 @@ private ParserOptions(Builder builder) {
this.captureIgnoredChars = builder.captureIgnoredChars;
this.captureSourceLocation = builder.captureSourceLocation;
this.captureLineComments = builder.captureLineComments;
this.maxCharacters = builder.maxCharacters;
this.maxTokens = builder.maxTokens;
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
this.maxRuleDepth = builder.maxRuleDepth;
Expand Down Expand Up @@ -219,6 +235,18 @@ public boolean isCaptureLineComments() {
return captureLineComments;
}

/**
* A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* sets this value to 1MB.
*
* @return the maximum number of characters the parser will accept, after which an exception will be thrown.
*/
public int getMaxCharacters() {
return maxCharacters;
}


/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
Expand Down Expand Up @@ -272,6 +300,7 @@ public static class Builder {
private boolean captureSourceLocation = true;
private boolean captureLineComments = true;
private ParsingListener parsingListener = ParsingListener.NOOP;
private int maxCharacters = MAX_QUERY_CHARACTERS;
private int maxTokens = MAX_QUERY_TOKENS;
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
private int maxRuleDepth = MAX_RULE_DEPTH;
Expand All @@ -283,6 +312,7 @@ public static class Builder {
this.captureIgnoredChars = parserOptions.captureIgnoredChars;
this.captureSourceLocation = parserOptions.captureSourceLocation;
this.captureLineComments = parserOptions.captureLineComments;
this.maxCharacters = parserOptions.maxCharacters;
this.maxTokens = parserOptions.maxTokens;
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
this.maxRuleDepth = parserOptions.maxRuleDepth;
Expand All @@ -304,6 +334,11 @@ public Builder captureLineComments(boolean captureLineComments) {
return this;
}

public Builder maxCharacters(int maxCharacters) {
this.maxCharacters = maxCharacters;
return this;
}

public Builder maxTokens(int maxTokens) {
this.maxTokens = maxTokens;
return this;
Expand Down
95 changes: 95 additions & 0 deletions src/main/java/graphql/parser/SafeTokenReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package graphql.parser;

import graphql.Internal;
import org.jetbrains.annotations.NotNull;

import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;
import java.util.function.Consumer;

/**
* This reader will only emit a maximum number of characters from it. This is used to protect us from evil input.
* <p>
* If a graphql system does not have some max HTTP input limit, then this will help protect the system. This is a limit
* of last resort. Ideally the http input should be limited, but if its not, we have this.
*/
@Internal
public class SafeTokenReader extends Reader {

private final Reader delegate;
private final int maxCharacters;
private final Consumer<Integer> whenMaxCharactersExceeded;
private int count;

public SafeTokenReader(Reader delegate, int maxCharacters, Consumer<Integer> whenMaxCharactersExceeded) {
this.delegate = delegate;
this.maxCharacters = maxCharacters;
this.whenMaxCharactersExceeded = whenMaxCharactersExceeded;
count = 0;
}

private int checkHowMany(int read, int howMany) {
if (read != -1) {
count += howMany;
if (count > maxCharacters) {
whenMaxCharactersExceeded.accept(maxCharacters);
}
}
return read;
}

@Override
public int read(char @NotNull [] buff, int off, int len) throws IOException {
int howMany = delegate.read(buff, off, len);
return checkHowMany(howMany, howMany);
}

@Override
public int read() throws IOException {
int ch = delegate.read();
return checkHowMany(ch, 1);
}

@Override
public int read(@NotNull CharBuffer target) throws IOException {
int howMany = delegate.read(target);
return checkHowMany(howMany, howMany);
}

@Override
public int read( char @NotNull [] buff) throws IOException {
int howMany = delegate.read(buff);
return checkHowMany(howMany, howMany);
}

@Override
public void close() throws IOException {
delegate.close();
}

@Override
public long skip(long n) throws IOException {
return delegate.skip(n);
}

@Override
public boolean ready() throws IOException {
return delegate.ready();
}

@Override
public boolean markSupported() {
return delegate.markSupported();
}

@Override
public void mark(int readAheadLimit) throws IOException {
delegate.mark(readAheadLimit);
}

@Override
public void reset() throws IOException {
delegate.reset();
}
}
Loading