graphql-java · bbakerman · Mar 30, 2023 · Mar 30, 2023
diff --git a/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java b/src/main/java/graphql/parser/ParseCancelledTooManyCharsException.java
@@ -0,0 +1,12 @@
+package graphql.parser;
+
+import graphql.Internal;
+
+@Internal
+public class ParseCancelledTooManyCharsException extends InvalidSyntaxException {
+
+    @Internal
+    public ParseCancelledTooManyCharsException(String msg, int maxCharacters) {
+        super(null, msg, null, null, null);
+    }
+}
diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java
@@ -21,6 +21,7 @@
 import org.antlr.v4.runtime.atn.PredictionMode;
 import org.antlr.v4.runtime.tree.ParseTreeListener;
 import org.antlr.v4.runtime.tree.TerminalNode;
+import org.jetbrains.annotations.NotNull;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -29,17 +30,18 @@
 import java.util.Optional;
 import java.util.function.BiConsumer;
 import java.util.function.BiFunction;
+import java.util.function.Consumer;
 
 /**
  * This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an
  * Abstract Syntax Tree (AST) represented by a {@link Document}
  * <p>
  * You should not generally need to call this class as the {@link graphql.GraphQL} code sets this up for you
  * but if you are doing specific graphql utilities this class is essential.
- *
+ * <p>
  * Graphql syntax has a series of characters, such as spaces, new lines and commas that are not considered relevant
  * to the syntax.  However they can be captured and associated with the AST elements they belong to.
- *
+ * <p>
  * This costs more memory but for certain use cases (like editors) this maybe be useful.  We have chosen to no capture
  * ignored characters by default but you can turn this on, either per parse or statically for the whole JVM
  * via {@link ParserOptions#setDefaultParserOptions(ParserOptions)} ()}}
@@ -205,43 +207,18 @@ private Type<?> parseTypeImpl(String input) throws InvalidSyntaxException {
     }
 
     private Node<?> parseImpl(Reader reader, BiFunction<GraphqlParser, GraphqlAntlrToLanguage, Object[]> nodeFunction, ParserOptions parserOptions) throws InvalidSyntaxException {
-        MultiSourceReader multiSourceReader;
-        if (reader instanceof MultiSourceReader) {
-            multiSourceReader = (MultiSourceReader) reader;
-        } else {
-            multiSourceReader = MultiSourceReader.newMultiSourceReader()
-                    .reader(reader, null).build();
-        }
-        CodePointCharStream charStream;
-        try {
-            charStream = CharStreams.fromReader(multiSourceReader);
-        } catch (IOException e) {
-            throw new UncheckedIOException(e);
-        }
+        parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
 
-        GraphqlLexer lexer = new GraphqlLexer(charStream);
-        lexer.removeErrorListeners();
-        lexer.addErrorListener(new BaseErrorListener() {
-            @Override
-            public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
-                SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
-                String preview = AntlrHelper.createPreview(multiSourceReader, line);
-                throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
-            }
-        });
+        MultiSourceReader multiSourceReader = setupMultiSourceReader(reader, parserOptions);
 
-        // default in the parser options if they are not set
-        parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
+        SafeTokenReader safeTokenReader = setupSafeTokenReader(parserOptions, multiSourceReader);
+
+        CodePointCharStream charStream = setupCharStream(safeTokenReader);
+
+        GraphqlLexer lexer = setupGraphqlLexer(multiSourceReader, charStream);
 
         // this lexer wrapper allows us to stop lexing when too many tokens are in place.  This prevents DOS attacks.
-        int maxTokens = parserOptions.getMaxTokens();
-        int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
-        BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
-                token,
-                maxTokenCount,
-                multiSourceReader,
-                ParseCancelledException.class);
-        SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
+        SafeTokenSource safeTokenSource = getSafeTokenSource(parserOptions, multiSourceReader, lexer);
 
         CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
 
@@ -285,6 +262,65 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
         return node;
     }
 
+    private static MultiSourceReader setupMultiSourceReader(Reader reader, ParserOptions parserOptions) {
+        MultiSourceReader multiSourceReader;
+        if (reader instanceof MultiSourceReader) {
+            multiSourceReader = (MultiSourceReader) reader;
+        } else {
+            multiSourceReader = MultiSourceReader.newMultiSourceReader()
+                    .reader(reader, null).build();
+        }
+        return multiSourceReader;
+    }
+
+    @NotNull
+    private static SafeTokenReader setupSafeTokenReader(ParserOptions parserOptions, MultiSourceReader multiSourceReader) {
+        int maxCharacters = parserOptions.getMaxCharacters();
+        Consumer<Integer> onTooManyCharacters = it -> {
+            String msg = String.format("More than %d characters have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxCharacters);
+            throw new ParseCancelledTooManyCharsException(msg, maxCharacters);
+        };
+        return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters);
+    }
+
+    @NotNull
+    private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) {
+        CodePointCharStream charStream;
+        try {
+            charStream = CharStreams.fromReader(safeTokenReader);
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+        return charStream;
+    }
+
+    @NotNull
+    private static GraphqlLexer setupGraphqlLexer(MultiSourceReader multiSourceReader, CodePointCharStream charStream) {
+        GraphqlLexer lexer = new GraphqlLexer(charStream);
+        lexer.removeErrorListeners();
+        lexer.addErrorListener(new BaseErrorListener() {
+            @Override
+            public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
+                SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, line, charPositionInLine);
+                String preview = AntlrHelper.createPreview(multiSourceReader, line);
+                throw new InvalidSyntaxException(sourceLocation, msg, preview, null, null);
+            }
+        });
+        return lexer;
+    }
+
+    @NotNull
+    private SafeTokenSource getSafeTokenSource(ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) {
+        int maxTokens = parserOptions.getMaxTokens();
+        int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
+        BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
+                token,
+                maxTokenCount,
+                multiSourceReader,
+                ParseCancelledException.class);
+        return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
+    }
+
     private void setupParserListener(MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {
         ParserOptions parserOptions = toLanguage.getParserOptions();
         ParsingListener parsingListener = parserOptions.getParsingListener();
@@ -365,7 +401,8 @@ private void throwIfTokenProblems(Token token, int maxLimit, MultiSourceReader m
             throw new ParseCancelledTooDeepException(msg, sourceLocation, offendingToken, maxLimit, tokenType);
         }
         String msg = String.format("More than %d %s tokens have been presented. To prevent Denial Of Service attacks, parsing has been cancelled.", maxLimit, tokenType);
-        throw new ParseCancelledException(msg, sourceLocation, offendingToken);    }
+        throw new ParseCancelledException(msg, sourceLocation, offendingToken);
+    }
 
     /**
      * Allows you to override the ANTLR to AST code.

diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java
@@ -11,9 +11,20 @@
  */
 @PublicApi
 public class ParserOptions {
+    /**
+     * A graphql hacking vector is to send nonsensical queries with large tokens that contain a repeated characters
+     * that burn lots of parsing CPU time and burn memory representing a document that won't ever execute.
+     * To prevent this for most users, graphql-java sets this value to 1MB.
+     * ANTLR parsing time is linear to the number of characters presented.  The more you
+     * allow the longer it takes.
+     * <p>
+     * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
+     * JVM wide.
+     */
+    public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB
 
     /**
-     * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+     * A graphql hacking vector is to send nonsensical queries with lots of tokens that burn lots of parsing CPU time and burn
      * memory representing a document that won't ever execute.  To prevent this for most users, graphql-java
      * sets this value to 15000.  ANTLR parsing time is linear to the number of tokens presented.  The more you
      * allow the longer it takes.
@@ -46,6 +57,7 @@ public class ParserOptions {
             .captureIgnoredChars(false)
             .captureSourceLocation(true)
             .captureLineComments(true)
+            .maxCharacters(MAX_QUERY_CHARACTERS)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
             .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
             .maxRuleDepth(MAX_RULE_DEPTH)
@@ -55,6 +67,7 @@ public class ParserOptions {
             .captureIgnoredChars(false)
             .captureSourceLocation(true)
             .captureLineComments(false) // #comments are not useful in query parsing
+            .maxCharacters(MAX_QUERY_CHARACTERS)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
             .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
             .maxRuleDepth(MAX_RULE_DEPTH)
@@ -64,6 +77,7 @@ public class ParserOptions {
             .captureIgnoredChars(false)
             .captureSourceLocation(true)
             .captureLineComments(true) // #comments are useful in SDL parsing
+            .maxCharacters(Integer.MAX_VALUE)
             .maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
             .maxWhitespaceTokens(Integer.MAX_VALUE)
             .maxRuleDepth(Integer.MAX_VALUE)
@@ -167,6 +181,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
     private final boolean captureIgnoredChars;
     private final boolean captureSourceLocation;
     private final boolean captureLineComments;
+    private final int maxCharacters;
     private final int maxTokens;
     private final int maxWhitespaceTokens;
     private final int maxRuleDepth;
@@ -176,6 +191,7 @@ private ParserOptions(Builder builder) {
         this.captureIgnoredChars = builder.captureIgnoredChars;
         this.captureSourceLocation = builder.captureSourceLocation;
         this.captureLineComments = builder.captureLineComments;
+        this.maxCharacters = builder.maxCharacters;
         this.maxTokens = builder.maxTokens;
         this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
         this.maxRuleDepth = builder.maxRuleDepth;
@@ -219,6 +235,18 @@ public boolean isCaptureLineComments() {
         return captureLineComments;
     }
 
+    /**
+     * A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
+     * memory representing a document that won't ever execute.  To prevent this for most users, graphql-java
+     * sets this value to 1MB.
+     *
+     * @return the maximum number of characters the parser will accept, after which an exception will be thrown.
+     */
+    public int getMaxCharacters() {
+        return maxCharacters;
+    }
+
+
     /**
      * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
      * memory representing a document that won't ever execute.  To prevent this you can set a maximum number of parse
@@ -272,6 +300,7 @@ public static class Builder {
         private boolean captureSourceLocation = true;
         private boolean captureLineComments = true;
         private ParsingListener parsingListener = ParsingListener.NOOP;
+        private int maxCharacters = MAX_QUERY_CHARACTERS;
         private int maxTokens = MAX_QUERY_TOKENS;
         private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
         private int maxRuleDepth = MAX_RULE_DEPTH;
@@ -283,6 +312,7 @@ public static class Builder {
             this.captureIgnoredChars = parserOptions.captureIgnoredChars;
             this.captureSourceLocation = parserOptions.captureSourceLocation;
             this.captureLineComments = parserOptions.captureLineComments;
+            this.maxCharacters = parserOptions.maxCharacters;
             this.maxTokens = parserOptions.maxTokens;
             this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
             this.maxRuleDepth = parserOptions.maxRuleDepth;
@@ -304,6 +334,11 @@ public Builder captureLineComments(boolean captureLineComments) {
             return this;
         }
 
+        public Builder maxCharacters(int maxCharacters) {
+            this.maxCharacters = maxCharacters;
+            return this;
+        }
+
         public Builder maxTokens(int maxTokens) {
             this.maxTokens = maxTokens;
             return this;

diff --git a/src/main/java/graphql/parser/SafeTokenReader.java b/src/main/java/graphql/parser/SafeTokenReader.java
@@ -0,0 +1,95 @@
+package graphql.parser;
+
+import graphql.Internal;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.CharBuffer;
+import java.util.function.Consumer;
+
+/**
+ * This reader will only emit a maximum number of characters from it.  This is used to protect us from evil input.
+ * <p>
+ * If a graphql system does not have some max HTTP input limit, then this will help protect the system.  This is a limit
+ * of last resort.  Ideally the http input should be limited, but if its not, we have this.
+ */
+@Internal
+public class SafeTokenReader extends Reader {
+
+    private final Reader delegate;
+    private final int maxCharacters;
+    private final Consumer<Integer> whenMaxCharactersExceeded;
+    private int count;
+
+    public SafeTokenReader(Reader delegate, int maxCharacters, Consumer<Integer> whenMaxCharactersExceeded) {
+        this.delegate = delegate;
+        this.maxCharacters = maxCharacters;
+        this.whenMaxCharactersExceeded = whenMaxCharactersExceeded;
+        count = 0;
+    }
+
+    private int checkHowMany(int read, int howMany) {
+        if (read != -1) {
+            count += howMany;
+            if (count > maxCharacters) {
+                whenMaxCharactersExceeded.accept(maxCharacters);
+            }
+        }
+        return read;
+    }
+
+    @Override
+    public int read(char @NotNull [] buff, int off, int len) throws IOException {
+        int howMany = delegate.read(buff, off, len);
+        return checkHowMany(howMany, howMany);
+    }
+
+    @Override
+    public int read() throws IOException {
+        int ch = delegate.read();
+        return checkHowMany(ch, 1);
+    }
+
+    @Override
+    public int read(@NotNull CharBuffer target) throws IOException {
+        int howMany = delegate.read(target);
+        return checkHowMany(howMany, howMany);
+    }
+
+    @Override
+    public int read( char @NotNull [] buff) throws IOException {
+        int howMany = delegate.read(buff);
+        return checkHowMany(howMany, howMany);
+    }
+
+    @Override
+    public void close() throws IOException {
+        delegate.close();
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        return delegate.skip(n);
+    }
+
+    @Override
+    public boolean ready() throws IOException {
+        return delegate.ready();
+    }
+
+    @Override
+    public boolean markSupported() {
+        return delegate.markSupported();
+    }
+
+    @Override
+    public void mark(int readAheadLimit) throws IOException {
+        delegate.mark(readAheadLimit);
+    }
+
+    @Override
+    public void reset() throws IOException {
+        delegate.reset();
+    }
+}