graphql-java · bbakerman · Mar 29, 2023 · Mar 24, 2023 · Mar 26, 2023 · Mar 26, 2023
diff --git a/src/main/java/graphql/parser/Parser.java b/src/main/java/graphql/parser/Parser.java
@@ -14,6 +14,7 @@
 import graphql.parser.antlr.GraphqlParser;
 import graphql.parser.exceptions.ParseCancelledException;
 import graphql.parser.exceptions.ParseCancelledTooDeepException;
+import graphql.parser.exceptions.ParseCancelledTooManyCharsException;
 import org.antlr.v4.runtime.BaseErrorListener;
 import org.antlr.v4.runtime.CharStreams;
 import org.antlr.v4.runtime.CodePointCharStream;
@@ -25,6 +26,7 @@
 import org.antlr.v4.runtime.atn.PredictionMode;
 import org.antlr.v4.runtime.tree.ParseTreeListener;
 import org.antlr.v4.runtime.tree.TerminalNode;
+import org.jetbrains.annotations.NotNull;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -33,6 +35,7 @@
 import java.util.Optional;
 import java.util.function.BiConsumer;
 import java.util.function.BiFunction;
+import java.util.function.Consumer;
 
 /**
  * This can parse graphql syntax, both Query syntax and Schema Definition Language (SDL) syntax, into an
@@ -259,6 +262,57 @@ private Node<?> parseImpl(ParserEnvironment environment, BiFunction<GraphqlParse
         ParserOptions parserOptions = environment.getParserOptions();
         parserOptions = Optional.ofNullable(parserOptions).orElse(ParserOptions.getDefaultParserOptions());
 
+        MultiSourceReader multiSourceReader = setupMultiSourceReader(environment, parserOptions);
+
+        SafeTokenReader safeTokenReader = setupSafeTokenReader(environment, parserOptions, multiSourceReader);
+
+        CodePointCharStream charStream = setupCharStream(safeTokenReader);
+
+        GraphqlLexer lexer = setupGraphqlLexer(environment, multiSourceReader, charStream);
+
+        // this lexer wrapper allows us to stop lexing when too many tokens are in place.  This prevents DOS attacks.
+        SafeTokenSource safeTokenSource = getSafeTokenSource(environment, parserOptions, multiSourceReader, lexer);
+
+        CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
+
+        GraphqlParser parser = new GraphqlParser(tokens);
+        parser.removeErrorListeners();
+        parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
+
+        ExtendedBailStrategy bailStrategy = new ExtendedBailStrategy(multiSourceReader, environment);
+        parser.setErrorHandler(bailStrategy);
+
+        // preserve old protected call semantics - remove at some point
+        GraphqlAntlrToLanguage toLanguage = getAntlrToLanguage(tokens, multiSourceReader, environment);
+
+        setupParserListener(environment, multiSourceReader, parser, toLanguage);
+
+
+        //
+        // parsing starts ...... now!
+        //
+        Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
+        ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
+        Node<?> node = (Node<?>) contextAndNode[1];
+
+        Token stop = parserRuleContext.getStop();
+        List<Token> allTokens = tokens.getTokens();
+        if (stop != null && allTokens != null && !allTokens.isEmpty()) {
+            Token last = allTokens.get(allTokens.size() - 1);
+            //
+            // do we have more tokens in the stream than we consumed in the parse?
+            // if yes then it's invalid.  We make sure it's the same channel
+            boolean notEOF = last.getType() != Token.EOF;
+            boolean lastGreaterThanDocument = last.getTokenIndex() > stop.getTokenIndex();
+            boolean sameChannel = last.getChannel() == stop.getChannel();
+            if (notEOF && lastGreaterThanDocument && sameChannel) {
+                throw bailStrategy.mkMoreTokensException(last);
+            }
+        }
+        return node;
+    }
+
+    private static MultiSourceReader setupMultiSourceReader(ParserEnvironment environment, ParserOptions parserOptions) {
         MultiSourceReader multiSourceReader;
         Reader reader = environment.getDocument();
         if (reader instanceof MultiSourceReader) {
@@ -269,13 +323,31 @@ private Node<?> parseImpl(ParserEnvironment environment, BiFunction<GraphqlParse
                     .trackData(parserOptions.isReaderTrackData())
                     .build();
         }
+        return multiSourceReader;
+    }
+
+    @NotNull
+    private static SafeTokenReader setupSafeTokenReader(ParserEnvironment environment, ParserOptions parserOptions, MultiSourceReader multiSourceReader) {
+        int maxCharacters = parserOptions.getMaxCharacters();
+        Consumer<Integer> onTooManyCharacters = it -> {
+            throw new ParseCancelledTooManyCharsException(environment.getI18N(), maxCharacters);
+        };
+        return new SafeTokenReader(multiSourceReader, maxCharacters, onTooManyCharacters);
+    }
+
+    @NotNull
+    private static CodePointCharStream setupCharStream(SafeTokenReader safeTokenReader) {
         CodePointCharStream charStream;
         try {
-            charStream = CharStreams.fromReader(multiSourceReader);
+            charStream = CharStreams.fromReader(safeTokenReader);
         } catch (IOException e) {
             throw new UncheckedIOException(e);
         }
+        return charStream;
+    }
 
+    @NotNull
+    private static GraphqlLexer setupGraphqlLexer(ParserEnvironment environment, MultiSourceReader multiSourceReader, CodePointCharStream charStream) {
         GraphqlLexer lexer = new GraphqlLexer(charStream);
         lexer.removeErrorListeners();
         lexer.addErrorListener(new BaseErrorListener() {
@@ -296,8 +368,11 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
                 throw new InvalidSyntaxException(msg, sourceLocation, null, preview, null);
             }
         });
+        return lexer;
+    }
 
-        // this lexer wrapper allows us to stop lexing when too many tokens are in place.  This prevents DOS attacks.
+    @NotNull
+    private SafeTokenSource getSafeTokenSource(ParserEnvironment environment, ParserOptions parserOptions, MultiSourceReader multiSourceReader, GraphqlLexer lexer) {
         int maxTokens = parserOptions.getMaxTokens();
         int maxWhitespaceTokens = parserOptions.getMaxWhitespaceTokens();
         BiConsumer<Integer, Token> onTooManyTokens = (maxTokenCount, token) -> throwIfTokenProblems(
@@ -306,45 +381,7 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int
                 maxTokenCount,
                 multiSourceReader,
                 ParseCancelledException.class);
-        SafeTokenSource safeTokenSource = new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
-
-        CommonTokenStream tokens = new CommonTokenStream(safeTokenSource);
-
-        GraphqlParser parser = new GraphqlParser(tokens);
-        parser.removeErrorListeners();
-        parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
-
-        ExtendedBailStrategy bailStrategy = new ExtendedBailStrategy(multiSourceReader, environment);
-        parser.setErrorHandler(bailStrategy);
-
-        // preserve old protected call semantics - remove at some point
-        GraphqlAntlrToLanguage toLanguage = getAntlrToLanguage(tokens, multiSourceReader, environment);
-
-        setupParserListener(environment, multiSourceReader, parser, toLanguage);
-
-
-        //
-        // parsing starts ...... now!
-        //
-        Object[] contextAndNode = nodeFunction.apply(parser, toLanguage);
-        ParserRuleContext parserRuleContext = (ParserRuleContext) contextAndNode[0];
-        Node<?> node = (Node<?>) contextAndNode[1];
-
-        Token stop = parserRuleContext.getStop();
-        List<Token> allTokens = tokens.getTokens();
-        if (stop != null && allTokens != null && !allTokens.isEmpty()) {
-            Token last = allTokens.get(allTokens.size() - 1);
-            //
-            // do we have more tokens in the stream than we consumed in the parse?
-            // if yes then it's invalid.  We make sure it's the same channel
-            boolean notEOF = last.getType() != Token.EOF;
-            boolean lastGreaterThanDocument = last.getTokenIndex() > stop.getTokenIndex();
-            boolean sameChannel = last.getChannel() == stop.getChannel();
-            if (notEOF && lastGreaterThanDocument && sameChannel) {
-                throw bailStrategy.mkMoreTokensException(last);
-            }
-        }
-        return node;
+        return new SafeTokenSource(lexer, maxTokens, maxWhitespaceTokens, onTooManyTokens);
     }
 
     private void setupParserListener(ParserEnvironment environment, MultiSourceReader multiSourceReader, GraphqlParser parser, GraphqlAntlrToLanguage toLanguage) {

diff --git a/src/main/java/graphql/parser/ParserOptions.java b/src/main/java/graphql/parser/ParserOptions.java
@@ -11,9 +11,20 @@
  */
 @PublicApi
 public class ParserOptions {
+    /**
+     * A graphql hacking vector is to send nonsensical queries with large tokens that contain a repeated characters
+     * that burn lots of parsing CPU time and burn memory representing a document that won't ever execute.
+     * To prevent this for most users, graphql-java sets this value to 1MB.
+     * ANTLR parsing time is linear to the number of characters presented.  The more you
+     * allow the longer it takes.
+     * <p>
+     * If you want to allow more, then {@link #setDefaultParserOptions(ParserOptions)} allows you to change this
+     * JVM wide.
+     */
+    public static final int MAX_QUERY_CHARACTERS = 1024 * 1024; // 1 MB
 
     /**
-     * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burn
+     * A graphql hacking vector is to send nonsensical queries with lots of tokens that burn lots of parsing CPU time and burn
      * memory representing a document that won't ever execute.  To prevent this for most users, graphql-java
      * sets this value to 15000.  ANTLR parsing time is linear to the number of tokens presented.  The more you
      * allow the longer it takes.
@@ -47,6 +58,7 @@ public class ParserOptions {
             .captureSourceLocation(true)
             .captureLineComments(true)
             .readerTrackData(true)
+            .maxCharacters(MAX_QUERY_CHARACTERS)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
             .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
             .maxRuleDepth(MAX_RULE_DEPTH)
@@ -57,6 +69,7 @@ public class ParserOptions {
             .captureSourceLocation(true)
             .captureLineComments(false) // #comments are not useful in query parsing
             .readerTrackData(true)
+            .maxCharacters(MAX_QUERY_CHARACTERS)
             .maxTokens(MAX_QUERY_TOKENS) // to prevent a billion laughs style attacks, we set a default for graphql-java
             .maxWhitespaceTokens(MAX_WHITESPACE_TOKENS)
             .maxRuleDepth(MAX_RULE_DEPTH)
@@ -67,6 +80,7 @@ public class ParserOptions {
             .captureSourceLocation(true)
             .captureLineComments(true) // #comments are useful in SDL parsing
             .readerTrackData(true)
+            .maxCharacters(Integer.MAX_VALUE)
             .maxTokens(Integer.MAX_VALUE) // we are less worried about a billion laughs with SDL parsing since the call path is not facing attackers
             .maxWhitespaceTokens(Integer.MAX_VALUE)
             .maxRuleDepth(Integer.MAX_VALUE)
@@ -171,6 +185,7 @@ public static void setDefaultSdlParserOptions(ParserOptions options) {
     private final boolean captureSourceLocation;
     private final boolean captureLineComments;
     private final boolean readerTrackData;
+    private final int maxCharacters;
     private final int maxTokens;
     private final int maxWhitespaceTokens;
     private final int maxRuleDepth;
@@ -181,6 +196,7 @@ private ParserOptions(Builder builder) {
         this.captureSourceLocation = builder.captureSourceLocation;
         this.captureLineComments = builder.captureLineComments;
         this.readerTrackData = builder.readerTrackData;
+        this.maxCharacters = builder.maxCharacters;
         this.maxTokens = builder.maxTokens;
         this.maxWhitespaceTokens = builder.maxWhitespaceTokens;
         this.maxRuleDepth = builder.maxRuleDepth;
@@ -233,6 +249,18 @@ public boolean isReaderTrackData() {
         return readerTrackData;
     }
 
+    /**
+     * A graphql hacking vector is to send nonsensical queries that contain a repeated characters that burn lots of parsing CPU time and burn
+     * memory representing a document that won't ever execute.  To prevent this for most users, graphql-java
+     * sets this value to 1MB.
+     *
+     * @return the maximum number of characters the parser will accept, after which an exception will be thrown.
+     */
+    public int getMaxCharacters() {
+        return maxCharacters;
+    }
+
+
     /**
      * A graphql hacking vector is to send nonsensical queries that burn lots of parsing CPU time and burns
      * memory representing a document that won't ever execute.  To prevent this you can set a maximum number of parse
@@ -287,6 +315,7 @@ public static class Builder {
         private boolean captureLineComments = true;
         private boolean readerTrackData = true;
         private ParsingListener parsingListener = ParsingListener.NOOP;
+        private int maxCharacters = MAX_QUERY_CHARACTERS;
         private int maxTokens = MAX_QUERY_TOKENS;
         private int maxWhitespaceTokens = MAX_WHITESPACE_TOKENS;
         private int maxRuleDepth = MAX_RULE_DEPTH;
@@ -298,6 +327,7 @@ public static class Builder {
             this.captureIgnoredChars = parserOptions.captureIgnoredChars;
             this.captureSourceLocation = parserOptions.captureSourceLocation;
             this.captureLineComments = parserOptions.captureLineComments;
+            this.maxCharacters = parserOptions.maxCharacters;
             this.maxTokens = parserOptions.maxTokens;
             this.maxWhitespaceTokens = parserOptions.maxWhitespaceTokens;
             this.maxRuleDepth = parserOptions.maxRuleDepth;
@@ -324,6 +354,11 @@ public Builder readerTrackData(boolean readerTrackData) {
             return this;
         }
 
+        public Builder maxCharacters(int maxCharacters) {
+            this.maxCharacters = maxCharacters;
+            return this;
+        }
+
         public Builder maxTokens(int maxTokens) {
             this.maxTokens = maxTokens;
             return this;

diff --git a/src/main/java/graphql/parser/SafeTokenReader.java b/src/main/java/graphql/parser/SafeTokenReader.java
@@ -0,0 +1,95 @@
+package graphql.parser;
+
+import graphql.Internal;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.CharBuffer;
+import java.util.function.Consumer;
+
+/**
+ * This reader will only emit a maximum number of characters from it.  This is used to protect us from evil input.
+ * <p>
+ * If a graphql system does not have some max HTTP input limit, then this will help protect the system.  This is a limit
+ * of last resort.  Ideally the http input should be limited, but if its not, we have this.
+ */
+@Internal
+public class SafeTokenReader extends Reader {
+
+    private final Reader delegate;
+    private final int maxCharacters;
+    private final Consumer<Integer> whenMaxCharactersExceeded;
+    private int count;
+
+    public SafeTokenReader(Reader delegate, int maxCharacters, Consumer<Integer> whenMaxCharactersExceeded) {
+        this.delegate = delegate;
+        this.maxCharacters = maxCharacters;
+        this.whenMaxCharactersExceeded = whenMaxCharactersExceeded;
+        count = 0;
+    }
+
+    private int checkHowMany(int read, int howMany) {
+        if (read != -1) {
+            count += howMany;
+            if (count > maxCharacters) {
+                whenMaxCharactersExceeded.accept(maxCharacters);
+            }
+        }
+        return read;
+    }
+
+    @Override
+    public int read(char @NotNull [] buff, int off, int len) throws IOException {
+        int howMany = delegate.read(buff, off, len);
+        return checkHowMany(howMany, howMany);
+    }
+
+    @Override
+    public int read() throws IOException {
+        int ch = delegate.read();
+        return checkHowMany(ch, 1);
+    }
+
+    @Override
+    public int read(@NotNull CharBuffer target) throws IOException {
+        int howMany = delegate.read(target);
+        return checkHowMany(howMany, howMany);
+    }
+
+    @Override
+    public int read( char @NotNull [] buff) throws IOException {
+        int howMany = delegate.read(buff);
+        return checkHowMany(howMany, howMany);
+    }
+
+    @Override
+    public void close() throws IOException {
+        delegate.close();
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        return delegate.skip(n);
+    }
+
+    @Override
+    public boolean ready() throws IOException {
+        return delegate.ready();
+    }
+
+    @Override
+    public boolean markSupported() {
+        return delegate.markSupported();
+    }
+
+    @Override
+    public void mark(int readAheadLimit) throws IOException {
+        delegate.mark(readAheadLimit);
+    }
+
+    @Override
+    public void reset() throws IOException {
+        delegate.reset();
+    }
+}
diff --git a/src/main/java/graphql/parser/exceptions/ParseCancelledTooManyCharsException.java b/src/main/java/graphql/parser/exceptions/ParseCancelledTooManyCharsException.java
@@ -0,0 +1,16 @@
+package graphql.parser.exceptions;
+
+import graphql.Internal;
+import graphql.i18n.I18n;
+import graphql.parser.InvalidSyntaxException;
+import org.jetbrains.annotations.NotNull;
+
+@Internal
+public class ParseCancelledTooManyCharsException extends InvalidSyntaxException {
+
+    @Internal
+    public ParseCancelledTooManyCharsException(@NotNull I18n i18N, int maxCharacters) {
+        super(i18N.msg("ParseCancelled.tooManyChars", maxCharacters),
+                null, null, null, null);
+    }
+}