graphql-java · bbakerman · Sep 18, 2021 · Sep 15, 2021 · Sep 15, 2021 · Sep 17, 2021
diff --git a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
@@ -99,6 +99,10 @@ public GraphqlAntlrToLanguage(CommonTokenStream tokens, MultiSourceReader multiS
         this.parserOptions = parserOptions == null ? ParserOptions.getDefaultParserOptions() : parserOptions;
     }
 
+    public ParserOptions getParserOptions() {
+        return parserOptions;
+    }
+
     //MARKER START: Here GraphqlOperation.g4 specific methods begin
 
 

diff --git a/src/main/java/graphql/parser/ParseCancelledException.java b/src/main/java/graphql/parser/ParseCancelledException.java
@@ -0,0 +1,12 @@
+package graphql.parser;
+
+import graphql.PublicApi;
+import graphql.language.SourceLocation;
+
+@PublicApi
+public class ParseCancelledException extends InvalidSyntaxException {
+
+    public ParseCancelledException(String msg, SourceLocation sourceLocation, String offendingToken) {
+        super(sourceLocation, msg, null, offendingToken, null);
+    }
+}
diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java
@@ -5,6 +5,7 @@
 import graphql.language.Node;
 import graphql.language.SourceLocation;
 import graphql.language.Value;
+import graphql.parser.antlr.GraphqlBaseListener;
 import graphql.parser.antlr.GraphqlLexer;
 import graphql.parser.antlr.GraphqlParser;
 import org.antlr.v4.runtime.BaseErrorListener;
@@ -16,6 +17,8 @@
 import org.antlr.v4.runtime.Recognizer;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.atn.PredictionMode;
+import org.antlr.v4.runtime.tree.ParseTreeListener;
+import org.antlr.v4.runtime.tree.TerminalNode;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -144,7 +147,7 @@ public Document parseDocument(Reader reader, ParserOptions parserOptions) throws
         return parseDocumentImpl(reader, parserOptions);
     }
 
-    private Document parseDocumentImpl(Reader reader, ParserOptions parserOptions) throws InvalidSyntaxException {
+    private Document parseDocumentImpl(Reader reader, ParserOptions parserOptions) throws InvalidSyntaxException, ParseCancelledException {
         BiFunction<GraphqlParser, GraphqlAntlrToLanguage, Object[]> nodeFunction = (parser, toLanguage) -> {
             GraphqlParser.DocumentContext documentContext = parser.document();
             Document doc = toLanguage.createDocument(documentContext);
@@ -188,7 +191,7 @@ private Node<?> parseImpl(Reader reader, BiFunction<GraphqlParser, GraphqlAntlrT
             public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
                 SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
                 String preview = AntlrHelper.createPreview(multiSourceReader, line);
-                throw new InvalidSyntaxException(sourceLocation, "Invalid syntax: " + msg, preview, null, null);
+                throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
             }
         });
 
@@ -206,6 +209,13 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
         if (toLanguage == null) {
             toLanguage = getAntlrToLanguage(tokens, multiSourceReader, parserOptions);
         }
+
+        setupParserListener(multiSourceReader, parser, toLanguage);
+
+
+        //
+        // parsing starts ...... now!
+        //
         Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
         ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
         Node<?> node = (Node<?>) contextAndNode[1];
@@ -227,6 +237,31 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
         return node;
     }
 
+    private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {
+        int maxTokens = toLanguage.getParserOptions().getMaxTokens();
+        // prevent a billion laugh attacks by restricting how many tokens we allow
+        ParseTreeListener listener = new GraphqlBaseListener() {
+            int count = 0;
+
+            @Override
+            public void visitTerminal(TerminalNode node) {
+                count++;
+                if (count > maxTokens) {
+                    String msg = String.format("More than %d parse tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxTokens);
+                    SourceLocation sourceLocation = null;
+                    String offendingToken = null;
+                    if (node.getSymbol() != null) {
+                        offendingToken = node.getText();
+                        sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, node.getSymbol().getLine(), node.getSymbol().getCharPositionInLine());
+                    }
+
+                    throw new ParseCancelledException(msg, sourceLocation, offendingToken);
+                }
+            }
+        };
+        parser.addParseListener(listener);
+    }
+
     /**
      * Allows you to override the ANTLR to AST code.
      *

diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java
@@ -3,15 +3,30 @@
 import graphql.Assert;
 import graphql.PublicApi;
 
+import java.util.function.Consumer;
+
 /**
  * Options that control how the {@link Parser} behaves.
  */
 @PublicApi
 public class ParserOptions {
 
+    /**
+     * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+     * memory representing a document that wont ever execute.  To prevent this for most users, graphql-java
+     * set this value to 15000.  ANTLR parsing time is linear to the number of tokens presented.  The more you
+     * allow the longer it takes.
+     *
+     * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
+     * JVM wide.
+     */
+    public static int MAX_QUERY_TOKENS = 15000;
+
     private static ParserOptions defaultJvmParserOptions = newParserOptions()
             .captureIgnoredChars(false)
             .captureSourceLocation(true)
+            .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
+
             .build();
 
     /**
@@ -50,10 +65,12 @@ public static void setDefaultParserOptions(ParserOptions options) {
 
     private final boolean captureIgnoredChars;
     private final boolean captureSourceLocation;
+    private final int maxTokens;
 
     private ParserOptions(Builder builder) {
         this.captureIgnoredChars = builder.captureIgnoredChars;
         this.captureSourceLocation = builder.captureSourceLocation;
+        this.maxTokens = builder.maxTokens;
     }
 
     /**
@@ -79,6 +96,23 @@ public boolean isCaptureSourceLocation() {
         return captureSourceLocation;
     }
 
+    /**
+     * An graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+     * memory representing a document that wont ever execute.  To prevent this you can set a maximum number of parse
+     * tokens that will be accepted before an exception is thrown and the parsing is stopped.
+     *
+     * @return the maximum number of raw tokens the parser will accept, after which an exception will be thrown.
+     */
+    public int getMaxTokens() {
+        return maxTokens;
+    }
+
+    public ParserOptions transform(Consumer<Builder> builderConsumer) {
+        Builder builder = new Builder(this);
+        builderConsumer.accept(builder);
+        return builder.build();
+    }
+
     public static Builder newParserOptions() {
         return new Builder();
     }
@@ -87,6 +121,16 @@ public static class Builder {
 
         private boolean captureIgnoredChars = false;
         private boolean captureSourceLocation = true;
+        private int maxTokens = MAX_QUERY_TOKENS;
+
+        Builder() {
+        }
+
+        Builder(ParserOptions parserOptions) {
+            this.captureIgnoredChars = parserOptions.captureIgnoredChars;
+            this.captureSourceLocation = parserOptions.captureSourceLocation;
+            this.maxTokens = parserOptions.maxTokens;
+        }
 
         public Builder captureIgnoredChars(boolean captureIgnoredChars) {
             this.captureIgnoredChars = captureIgnoredChars;
@@ -98,6 +142,11 @@ public Builder captureSourceLocation(boolean captureSourceLocation) {
             return this;
         }
 
+        public Builder maxTokens(int maxTokens) {
+            this.maxTokens = maxTokens;
+            return this;
+        }
+
         public ParserOptions build() {
             return new ParserOptions(this);
         }

diff --git a/src/main/java/graphql/schema/idl/SchemaParser.java b/src/main/java/graphql/schema/idl/SchemaParser.java
@@ -8,13 +8,14 @@
 import graphql.language.SDLDefinition;
 import graphql.parser.InvalidSyntaxException;
 import graphql.parser.Parser;
+import graphql.parser.ParserOptions;
 import graphql.schema.idl.errors.NonSDLDefinitionError;
 import graphql.schema.idl.errors.SchemaProblem;
 
-import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
 import java.nio.file.Files;
@@ -71,8 +72,22 @@ public TypeDefinitionRegistry parse(InputStream inputStream) throws SchemaProble
      * @throws SchemaProblem if there are problems compiling the schema definitions
      */
     public TypeDefinitionRegistry parse(Reader reader) throws SchemaProblem {
+        return parse(reader, null);
+    }
+
+    /**
+     * Parse a reader of schema definitions and create a {@link TypeDefinitionRegistry}
+     *
+     * @param reader        the reader to parse
+     * @param parserOptions the parse options to use while parsing
+     *
+     * @return registry of type definitions
+     *
+     * @throws SchemaProblem if there are problems compiling the schema definitions
+     */
+    public TypeDefinitionRegistry parse(Reader reader, ParserOptions parserOptions) throws SchemaProblem {
         try (Reader input = reader) {
-            return parseImpl(input);
+            return parseImpl(input, parserOptions);
         } catch (IOException e) {
             throw new RuntimeException(e);
         }
@@ -92,9 +107,19 @@ public TypeDefinitionRegistry parse(String schemaInput) throws SchemaProblem {
     }
 
     public TypeDefinitionRegistry parseImpl(Reader schemaInput) {
+        // why it this public - (head shake)
+        return parseImpl(schemaInput, null);
+    }
+
+    private TypeDefinitionRegistry parseImpl(Reader schemaInput, ParserOptions parseOptions) {
         try {
+            if (parseOptions == null) {
+                // for SDL we dont stop how many parser tokens there are - its not the attack vector
+                // to be prevented compared to queries
+                parseOptions = ParserOptions.getDefaultParserOptions().transform(opts -> opts.maxTokens(Integer.MAX_VALUE));
+            }
             Parser parser = new Parser();
-            Document document = parser.parseDocument(schemaInput);
+            Document document = parser.parseDocument(schemaInput, parseOptions);
 
             return buildRegistry(document);
         } catch (InvalidSyntaxException e) {

diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy
@@ -1,6 +1,7 @@
 package graphql.parser
 
 
+import graphql.TestUtil
 import graphql.language.Argument
 import graphql.language.ArrayValue
 import graphql.language.AstComparator
@@ -796,7 +797,7 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases"""
         println document
         then:
         def e = thrown(InvalidSyntaxException)
-        e.message.contains("Invalid syntax")
+        e.message.contains("Invalid Syntax")
         e.sourcePreview == input + "\n"
         e.location.line == 3
         e.location.column == 20
@@ -1071,4 +1072,36 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases"""
         document.getSourceLocation() == SourceLocation.EMPTY
         document.getDefinitions()[0].getSourceLocation() == SourceLocation.EMPTY
     }
+
+    def "a billion laughs attack will be prevented by default"() {
+        def lol = "@lol" * 10000 // two tokens = 20000+ tokens
+        def text = "query { f $lol }"
+        when:
+        Parser.parse(text)
+
+        then:
+        def e = thrown(ParseCancelledException)
+        e.getMessage().contains("parsing has been cancelled")
+
+        when: "integration test to prove it cancels by default"
+
+        def sdl = """type Query { f : ID} """
+        def graphQL = TestUtil.graphQL(sdl).build()
+        def er = graphQL.execute(text)
+        then:
+        er.errors.size() == 1
+        er.errors[0].message.contains("parsing has been cancelled")
+    }
+
+    def "they can shoot themselves if they want to with large documents"() {
+        def lol = "@lol" * 10000 // two tokens = 20000+ tokens
+        def text = "query { f $lol }"
+
+        def options = ParserOptions.newParserOptions().maxTokens(30000).build()
+        when:
+        def doc = new Parser().parseDocument(text, options)
+
+        then:
+        doc != null
+    }
 }
diff --git a/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy b/src/test/groovy/graphql/schema/idl/SchemaParserTest.groovy
@@ -4,6 +4,7 @@ import graphql.language.EnumTypeDefinition
 import graphql.language.InterfaceTypeDefinition
 import graphql.language.ObjectTypeDefinition
 import graphql.language.ScalarTypeDefinition
+import graphql.parser.ParserOptions
 import graphql.schema.idl.errors.SchemaProblem
 import spock.lang.Specification
 import spock.lang.Unroll
@@ -338,5 +339,25 @@ class SchemaParserTest extends Specification {
         schemaProblem.getErrors()[2].getMessage().contains("OperationDefinition")
     }
 
+    def "large schema files can be parsed - there is no limit"() {
+        def sdl = "type Query {\n"
+        for (int i = 0; i < 30000; i++) {
+            sdl += " f" + i + " : ID\n"
+        }
+        sdl += "}"
+
+        when:
+        def typeDefinitionRegistry = new SchemaParser().parse(sdl)
+        then:
+        typeDefinitionRegistry != null
 
+
+        when: "options are used they will be respected"
+        def options = ParserOptions.defaultParserOptions.transform({ it.maxTokens(100) })
+        new SchemaParser().parse(new StringReader(sdl), options)
+        then:
+        def e = thrown(SchemaProblem)
+        e.errors[0].message.contains("parsing has been cancelled")
+
+    }
 }