Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/java/graphql/ParseAndValidate.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import java.util.List;
import java.util.function.Predicate;

import static java.util.Optional.ofNullable;

/**
* This class allows you to parse and validate a graphql query without executing it. It will tell you
* if its syntactically valid and also semantically valid according to the graphql specification
Expand Down Expand Up @@ -58,6 +60,8 @@ public static ParseAndValidateResult parse(ExecutionInput executionInput) {
//
// we allow the caller to specify new parser options by context
ParserOptions parserOptions = executionInput.getGraphQLContext().get(ParserOptions.class);
// we use the query parser options by default if they are not specified
parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultOperationParserOptions());
Parser parser = new Parser();
Document document = parser.parseDocument(executionInput.getQuery(), parserOptions);
return ParseAndValidateResult.newResult().document(document).variables(executionInput.getVariables()).build();
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,16 @@
import static graphql.Assert.assertShouldNeverHappen;
import static graphql.collect.ImmutableKit.emptyList;
import static graphql.collect.ImmutableKit.map;
import static graphql.parser.Parser.CHANNEL_COMMENTS;
import static graphql.parser.Parser.CHANNEL_WHITESPACE;
import static graphql.parser.StringValueParsing.parseSingleQuotedString;
import static graphql.parser.StringValueParsing.parseTripleQuotedString;
import static java.util.Optional.ofNullable;

@Internal
public class GraphqlAntlrToLanguage {

private static final List<Comment> NO_COMMENTS = ImmutableKit.emptyList();
private static final int CHANNEL_COMMENTS = 2;
private static final int CHANNEL_IGNORED_CHARS = 3;
private final CommonTokenStream tokens;
private final MultiSourceReader multiSourceReader;
private final ParserOptions parserOptions;
Expand All @@ -97,7 +98,7 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS
public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiSourceReader, ParserOptions parserOptions) {
this.tokens = tokens;
this.multiSourceReader = multiSourceReader;
this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions;
this.parserOptions = ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
}

public ParserOptions getParserOptions() {
Expand Down Expand Up @@ -791,12 +792,12 @@ private void addIgnoredChars(ParserRuleContext ctx, NodeBuilder nodeBuilder) {
}
Token start = ctx.getStart();
int tokenStartIndex = start.getTokenIndex();
List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_IGNORED_CHARS);
List<Token> leftChannel = tokens.getHiddenTokensToLeft(tokenStartIndex, CHANNEL_WHITESPACE);
List<IgnoredChar> ignoredCharsLeft = mapTokenToIgnoredChar(leftChannel);

Token stop = ctx.getStop();
int tokenStopIndex = stop.getTokenIndex();
List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_IGNORED_CHARS);
List<Token> rightChannel = tokens.getHiddenTokensToRight(tokenStopIndex, CHANNEL_WHITESPACE);
List<IgnoredChar> ignoredCharsRight = mapTokenToIgnoredChar(rightChannel);

nodeBuilder.ignoredChars(new IgnoredChars(ignoredCharsLeft, ignoredCharsRight));
Expand Down
44 changes: 34 additions & 10 deletions src/main/java/graphql/parser/Parser.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package graphql.parser;

import graphql.Internal;
import graphql.PublicApi;
import graphql.language.Document;
import graphql.language.Node;
Expand All @@ -24,6 +25,8 @@
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;

/**
Expand All @@ -45,6 +48,11 @@
@PublicApi
public class Parser {

@Internal
public static final int CHANNEL_COMMENTS = 2;
@Internal
public static final int CHANNEL_WHITESPACE = 3;

/**
* Parses a string input into a graphql AST {@link Document}
*
Expand Down Expand Up @@ -195,7 +203,16 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
}
});

CommonTokenStream tokens = new CommonTokenStream(lexer);
// default in the parser options if they are not set
parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());

// this lexer wrapper allows us to stop lexing when too many tokens are in place. This prevents DOS attacks.
int maxTokens = parserOptions.getMaxTokens();
int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwCancelParseIfTooManyTokens(token, maxTokenCount, multiSourceReader);
SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);

CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);

GraphqlParser parser = new GraphqlParser(tokens);
parser.removeErrorListeners();
Expand Down Expand Up @@ -268,21 +285,28 @@ public int getCharPositionInLine() {

count++;
if (count > maxTokens) {
String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens);
SourceLocation sourceLocation = null;
String offendingToken = null;
if (token != null) {
offendingToken = node.getText();
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
}

throw new ParseCancelledException(msg, sourceLocation, offendingToken);
throwCancelParseIfTooManyTokens(token, maxTokens, multiSourceReader);
}
}
};
parser.addParseListener(listener);
}

private void throwCancelParseIfTooManyTokens(Token token, int maxTokens, MultiSourceReader multiSourceReader) throws ParseCancelledException {
String tokenType = "grammar";
SourceLocation sourceLocation = null;
String offendingToken = null;
if (token != null) {
int channel = token.getChannel();
tokenType = channel == CHANNEL_WHITESPACE ? "whitespace" : (channel == CHANNEL_COMMENTS ? "comments" : "grammar");

offendingToken = token.getText();
sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
}
String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens, tokenType);
throw new ParseCancelledException(msg, sourceLocation, offendingToken);
}

/**
* Allows you to override the ANTLR to AST code.
*
Expand Down
120 changes: 113 additions & 7 deletions src/main/java/graphql/parser/ParserOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,57 @@
public class ParserOptions {

/**
* An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* memory representing a document that wont ever execute. To prevent this for most users, graphql-java
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* memory representing a document that won't ever execute. To prevent this for most users, graphql-java
* set this value to 15000. ANTLR parsing time is linear to the number of tokens presented. The more you
* allow the longer it takes.
*
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_QUERY_TOKENS = 15000;
public static final int MAX_QUERY_TOKENS = 15_000;
/**
* Another graphql hacking vector is to send large amounts of whitespace in operations that burn lots of parsing CPU time and burn
* memory representing a document. Whitespace token processing in ANTLR is 2 orders of magnitude faster than grammar token processing
* however it still takes some time to happen.
*
* If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
* JVM wide.
*/
public static final int MAX_WHITESPACE_TOKENS = 200_000;

private static ParserOptions defaultJvmParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true)
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.build();

private static ParserOptions defaultJvmOperationParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(false) // #comments are not useful in query parsing
.maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
.maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
.build();

private static ParserOptions defaultJvmSdlParserOptions = newParserOptions()
.captureIgnoredChars(false)
.captureSourceLocation(true)
.captureLineComments(true) // #comments are useful in SDL parsing
.maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
.maxWhitespaceTokens(Integer.MAX_VALUE)
.build();

/**
* By default the Parser will not capture ignored characters. A static holds this default
* By default, the Parser will not capture ignored characters. A static holds this default
* value in a JVM wide basis options object.
*
* Significant memory savings can be made if we do NOT capture ignored characters,
* especially in SDL parsing.
*
* @return the static default value on whether to capture ignored chars
* @return the static default JVM value
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
Expand All @@ -48,7 +73,7 @@ public static ParserOptions getDefaultParserOptions() {
}

/**
* By default the Parser will not capture ignored characters. A static holds this default
* By default, the Parser will not capture ignored characters. A static holds this default
* value in a JVM wide basis options object.
*
* Significant memory savings can be made if we do NOT capture ignored characters,
Expand All @@ -65,17 +90,80 @@ public static void setDefaultParserOptions(ParserOptions options) {
defaultJvmParserOptions = assertNotNull(options);
}


/**
* By default, for operation parsing, the Parser will not capture ignored characters, and it will not capture line comments into AST
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
*
* @return the static default JVM value for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static ParserOptions getDefaultOperationParserOptions() {
return defaultJvmOperationParserOptions;
}

/**
* By default, the Parser will not capture ignored characters or line comments. A static holds this default
* value in a JVM wide basis options object for operation parsing.
*
* This static can be set to true to allow the behavior of version 16.x or before.
*
* @param options - the new default JVM parser options for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static void setDefaultOperationParserOptions(ParserOptions options) {
defaultJvmOperationParserOptions = assertNotNull(options);
}

/**
* By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
* elements. The SDL default options allow unlimited tokens and whitespace, since a DOS attack vector is
* not commonly available via schema SDL parsing.
*
* A static holds this default value for SDL parsing in a JVM wide basis options object.
*
* @return the static default JVM value for SDL parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
* @see graphql.schema.idl.SchemaParser
*/
public static ParserOptions getDefaultSdlParserOptions() {
return defaultJvmSdlParserOptions;
}

/**
* By default, for SDL parsing, the Parser will not capture ignored characters, but it will capture line comments into AST
* elements . A static holds this default value for operation parsing in a JVM wide basis options object.
*
* This static can be set to true to allow the behavior of version 16.x or before.
*
* @param options - the new default JVM parser options for operation parsing
*
* @see graphql.language.IgnoredChar
* @see graphql.language.SourceLocation
*/
public static void setDefaultSdlParserOptions(ParserOptions options) {
defaultJvmSdlParserOptions = assertNotNull(options);
}

private final boolean captureIgnoredChars;
private final boolean captureSourceLocation;
private final boolean captureLineComments;
private final int maxTokens;
private final int maxWhitespaceTokens;
private final ParsingListener parsingListener;

private ParserOptions(Builder builder) {
this.captureIgnoredChars = builder.captureIgnoredChars;
this.captureSourceLocation = builder.captureSourceLocation;
this.captureLineComments = builder.captureLineComments;
this.maxTokens = builder.maxTokens;
this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
this.parsingListener = builder.parsingListener;
}

Expand Down Expand Up @@ -117,7 +205,7 @@ public boolean isCaptureLineComments() {
}

/**
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
* A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
* memory representing a document that won't ever execute. To prevent this you can set a maximum number of parse
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
*
Expand All @@ -127,6 +215,17 @@ public int getMaxTokens() {
return maxTokens;
}

/**
* A graphql hacking vector is to send larges amounts of whitespace that burn lots of parsing CPU time and burn
* memory representing a document. To prevent this you can set a maximum number of whitespace parse
* tokens that will be accepted before an exception is thrown and the parsing is stopped.
*
* @return the maximum number of raw whitespace tokens the parser will accept, after which an exception will be thrown.
*/
public int getMaxWhitespaceTokens() {
return maxWhitespaceTokens;
}

public ParsingListener getParsingListener() {
return parsingListener;
}
Expand All @@ -148,6 +247,7 @@ public static class Builder {
private boolean captureLineComments = true;
private int maxTokens = MAX_QUERY_TOKENS;
private ParsingListener parsingListener = ParsingListener.NOOP;
private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;

Builder() {
}
Expand All @@ -157,6 +257,7 @@ public static class Builder {
this.captureSourceLocation = parserOptions.captureSourceLocation;
this.captureLineComments = parserOptions.captureLineComments;
this.maxTokens = parserOptions.maxTokens;
this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
this.parsingListener = parserOptions.parsingListener;
}

Expand All @@ -180,6 +281,11 @@ public Builder maxTokens(int maxTokens) {
return this;
}

public Builder maxWhitespaceTokens(int maxWhitespaceTokens) {
this.maxWhitespaceTokens = maxWhitespaceTokens;
return this;
}

public Builder parsingListener(ParsingListener parsingListener) {
this.parsingListener = assertNotNull(parsingListener);
return this;
Expand Down
Loading