graphql-java · dondonz · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/autoresearch-validator/autoresearch.sh b/autoresearch-validator/autoresearch.sh
@@ -0,0 +1,154 @@
+#!/usr/bin/env bash
+# Autoresearch loop driver for graphql-java validator optimization.
+#
+# Usage:
+#   ./autoresearch-validator/autoresearch.sh [max_iterations]
+#
+# Default: 200 iterations (designed for overnight runs)
+#
+# Safety:
+#   The agent runs with --permission-mode plan and explicit --allowedTools.
+#   It can read files, edit source code, and run gradle for profiling.
+#   Tests, benchmarks, git commits, and reverts are handled by the outer harness.
+
+set -euo pipefail
+
+MAX_ITERATIONS="${1:-200}"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+LOG_FILE="$SCRIPT_DIR/results.tsv"
+BEST_SCORE_FILE="$SCRIPT_DIR/.best_score"
+
+cd "$PROJECT_DIR"
+
+if ! command -v claude &>/dev/null; then
+    echo "ERROR: 'claude' CLI not found on PATH. Install Claude Code first."
+    exit 1
+fi
+
+if [ ! -f "$LOG_FILE" ]; then
+    printf "iteration\tcommit\tscore\tdelta\tstatus\tdescription\n" > "$LOG_FILE"
+fi
+
+echo "=== Getting baseline score ==="
+BASELINE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
+if [ "$BASELINE" = "FAILED" ]; then
+    echo "ERROR: Baseline benchmark failed."
+    exit 1
+fi
+echo "Baseline: $BASELINE ops/s"
+echo "$BASELINE" > "$BEST_SCORE_FILE"
+
+BEST_SCORE="$BASELINE"
+
+for i in $(seq 1 "$MAX_ITERATIONS"); do
+    echo ""
+    echo "========================================"
+    echo "=== Iteration $i / $MAX_ITERATIONS ==="
+    echo "=== Best score: $BEST_SCORE ops/s ==="
+    echo "========================================"
+
+    RECENT_LOG=$(tail -10 "$LOG_FILE" 2>/dev/null || echo "No previous iterations")
+
+    PROMPT="You are running iteration $i of an autoresearch optimization loop for graphql-java.
+
+Read autoresearch-validator/program.md for full context and strategy.
+
+Current best benchmark score: $BEST_SCORE ops/s (baseline was: $BASELINE ops/s)
+
+Previous optimization log (last 10 entries):
+$RECENT_LOG
+
+YOUR TASK: Make exactly ONE focused optimization to the validation code.
+- Read the code files first. If this is iteration 1 or you haven't profiled yet, run the
+  benchmark with async-profiler first to identify hotspots.
+- Pick the most promising strategy from program.md that has NOT already been tried
+- Make a minimal, targeted change to ONE or TWO files
+- Do NOT run tests or benchmarks — the outer harness handles that
+- Do NOT commit — the outer harness handles that
+- After editing, output a single-line summary of what you changed and why
+
+SCOPE: Only modify files under src/main/java/graphql/validation/,
+or the utility files listed in program.md (ImmutableKit.java, FpKit.java, AstComparator.java).
+
+Make the change now."
+
+    # Allowed tools: read-only exploration + code edits + safe bash commands
+    ALLOWED_TOOLS='Read,Glob,Grep,Edit,Write,Bash(./gradlew:*),Bash(cat:*),Bash(wc:*),Bash(head:*),Bash(tail:*),Bash(find:*),Bash(ls:*),Bash(grep:*),Bash(git diff:*),Bash(git status:*),Bash(git log:*),Bash(git show:*),Bash(git checkout:*)'
+
+    echo "--- Asking Claude to make an optimization ---"
+    CLAUDE_OUTPUT=$(claude \
+        --model sonnet \
+        --permission-mode plan \
+        --allowedTools "$ALLOWED_TOOLS" \
+        --max-turns 25 \
+        --verbose \
+        -p "$PROMPT" \
+        2>&1) || true
+
+    echo "$CLAUDE_OUTPUT" | tail -5
+
+    if git diff --quiet src/main/java/; then
+        echo "No source changes in iteration $i, skipping"
+        printf "%s\t-\t-\t-\tskipped\tno changes\n" "$i" >> "$LOG_FILE"
+        continue
+    fi
+
+    echo "--- Changes made ---"
+    git diff --stat src/main/java/
+
+    echo "--- Running tests ---"
+    if ! ./gradlew test --tests "graphql.validation.*" -q 2>&1 | tail -10; then
+        echo "Tests FAILED — reverting changes"
+        git checkout -- src/
+        printf "%s\t-\t-\t-\treverted\ttests failed\n" "$i" >> "$LOG_FILE"
+        continue
+    fi
+
+    echo "--- Running benchmark ---"
+    SCORE=$(bash "$SCRIPT_DIR/run_benchmark.sh")
+    if [ "$SCORE" = "FAILED" ]; then
+        echo "Benchmark FAILED — reverting changes"
+        git checkout -- src/
+        printf "%s\t-\t-\t-\treverted\tbenchmark failed\n" "$i" >> "$LOG_FILE"
+        continue
+    fi
+
+    IMPROVED=$(echo "$SCORE $BEST_SCORE" | awk '{print ($1 > $2) ? "yes" : "no"}')
+    DELTA=$(echo "$SCORE $BEST_SCORE" | awk '{printf "%.3f", $1 - $2}')
+
+    if [ "$IMPROVED" = "yes" ]; then
+        echo ""
+        echo "*** IMPROVED! $BEST_SCORE -> $SCORE ops/s (+$DELTA) ***"
+        echo ""
+        BEST_SCORE="$SCORE"
+        echo "$BEST_SCORE" > "$BEST_SCORE_FILE"
+
+        DESCRIPTION=$(git diff --stat src/main/java/ | tail -1 | xargs)
+
+        git add src/main/java/
+        git commit -m "autoresearch: iteration $i [+$DELTA ops/s]
+
+$(git diff --cached --stat | head -5)"
+
+        COMMIT=$(git rev-parse --short HEAD)
+        printf "%s\t%s\t%s\t+%s\tkept\t%s\n" "$i" "$COMMIT" "$SCORE" "$DELTA" "$DESCRIPTION" >> "$LOG_FILE"
+    else
+        echo "No improvement: $SCORE vs $BEST_SCORE ops/s ($DELTA) — reverting"
+        git checkout -- src/
+        printf "%s\t-\t%s\t%s\treverted\tno improvement\n" "$i" "$SCORE" "$DELTA" >> "$LOG_FILE"
+    fi
+done
+
+echo ""
+echo "========================================"
+echo "=== Autoresearch complete ==="
+echo "=== Baseline:    $BASELINE ops/s ==="
+echo "=== Final best:  $BEST_SCORE ops/s ==="
+TOTAL_DELTA=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.3f", $1 - $2}')
+TOTAL_PCT=$(echo "$BEST_SCORE $BASELINE" | awk '{printf "%.1f", (($1 - $2) / $2) * 100}')
+echo "=== Improvement: +$TOTAL_DELTA ops/s ($TOTAL_PCT%) ==="
+echo "========================================"
+echo ""
+echo "Results log: $LOG_FILE"
+echo "Review kept commits: git log --oneline --grep='autoresearch'"
diff --git a/autoresearch-validator/program.md b/autoresearch-validator/program.md
@@ -0,0 +1,93 @@
+# Autoresearch: Optimize Validator Performance (Overlapping Field Validation)
+
+## Goal
+
+Improve the throughput (ops/sec) of `OverlappingFieldValidationPerformance.overlappingFieldValidationThroughput`
+by making targeted optimizations to the validation engine. Validation runs on every query, so improvements here
+have broad impact. The benchmark tests overlapping field validation with a large schema and query, plus several
+generated scenarios (repeated fields, fragments, deep abstract/concrete types).
+
+Every improvement must pass the relevant test suite locally. Final full-suite verification happens on a clean EC2 instance.
+
+## Metric
+
+- **Primary**: `OverlappingFieldValidationPerformance.overlappingFieldValidationThroughput` — higher is better (ops/sec)
+- Run with: `./gradlew jmh -PjmhInclude="performance.OverlappingFieldValidationPerformance.overlappingFieldValidationThroughput" -PjmhFork=1 -PjmhIterations=3 -PjmhWarmupIterations=2`
+- **Use async-profiler** to identify hotspots: add `-PjmhProfilers=async` to the JMH command.
+- Additional benchmarks for cross-validation: `benchmarkRepeatedFields`, `benchmarkOverlapFrag`, `benchmarkDeepAbstractConcrete` etc.
+
+## Scope — Files You May Modify
+
+Primary targets under `src/main/java/graphql/validation/`:
+
+- `OperationValidator.java` (1785 lines) — the main validation logic, including overlapping fields check. **This is the primary target.**
+- `Validator.java` (54 lines) — top-level validator entry point
+- `LanguageTraversal.java` (44 lines) — AST traversal for validation
+- `TraversalContext.java` — maintains type context during traversal
+- `ValidationContext.java` — validation context
+- `ValidationErrorCollector.java` — error collection
+- `DocumentVisitor.java` — visitor interface
+
+Also consider utility classes:
+- `graphql/collect/ImmutableKit.java` — collection utilities
+- `graphql/util/FpKit.java` — functional programming utilities
+- `graphql/language/AstComparator.java` — AST comparison (used in field merging checks)
+
+**Do NOT modify**: test files, benchmark files, schema files, build files.
+
+## Constraints
+
+1. **Relevant tests must pass locally**: Run `./gradlew test --tests "graphql.validation.*" -q` for fast iteration (~10 sec). Full suite runs on EC2.
+2. **No new dependencies**: This is a firm project policy.
+3. **No wildcard imports, no inner classes, no Optional**: Project coding standards.
+4. **Preserve public API**: All `@PublicApi` method signatures must remain unchanged.
+5. **Thread safety**: The validator may be called concurrently. Don't introduce shared mutable state.
+6. **Use `graphql.Assert`** not `Objects.requireNonNull`.
+
+## Optimization Strategies to Explore (ordered by expected impact)
+
+### High Impact
+1. **Profile first**: Run async-profiler to identify actual CPU hotspots. The overlapping field validation in OperationValidator is known to have O(n^2) or worse complexity in some cases.
+2. **Reduce algorithmic complexity in overlapping field checks**: The `OVERLAPPING_FIELDS_CAN_BE_MERGED` rule compares pairs of fields. Memoization, caching of comparison results, or smarter traversal order can reduce redundant work.
+3. **Replace Guava immutable builders with mutable collections in hot paths**: ImmutableMap.Builder and ImmutableList.Builder have expensive hashCode overhead during build(). This was the #1 finding in the ENF optimization.
+4. **Cache field-pair comparison results**: If the same field pairs are compared repeatedly across different contexts, cache the results.
+
+### Medium Impact
+5. **Optimize AstComparator usage**: Field merging checks use AST comparison. If the same AST nodes are compared multiple times, caching helps.
+6. **Reduce object allocation in validation traversal**: Each visited node may create validation state objects.
+7. **Optimize type resolution during validation**: Type lookups for overlapping field checks.
+8. **Early termination**: Skip validation checks that can't apply to the current node type.
+
+### Lower Impact (but easy wins)
+9. **Pre-size collections**: When the number of fields/fragments is known.
+10. **Replace stream operations with loops** in hot paths.
+11. **Cache schema type lookups** that are repeated during validation.
+12. **Reduce string operations**: Error message construction in non-error paths.
+
+## How to Iterate
+
+1. **Profile first** with async-profiler to identify actual hotspots
+2. Pick ONE strategy targeting the top hotspot
+3. Make a focused, minimal change
+4. Run tests locally: `./gradlew test --tests "graphql.validation.*" -q`
+5. Run the benchmark — compare to previous best
+6. If improved: commit with message "autoresearch: <description> [+X.XX ops/s]"
+7. If not improved: revert with `git checkout -- src/`
+8. Re-profile to see updated hotspots, then pick next strategy
+
+## Lessons from Previous Autoresearch (ENF Optimization)
+
+- **ImmutableMap.Builder → LinkedHashMap**: Saved 20k ops/s due to Object.hashCode() overhead.
+- **ImmutableListMultimap → parallel ArrayList**: Saved 22k ops/s. Same hashCode issue.
+- **Avoid groupingBy when only checking group count**: Replaced full map with boolean flag.
+- **Short-circuit for empty/single-element cases**: Multiple small wins.
+- **Cache lambda captures**: Reuse Supplier fields instead of per-call lambdas.
+- **Profile-guided optimization**: The biggest wins came from profiling, not guessing.
+
+## Important Notes
+
+- `OperationValidator.java` at 1785 lines is the main target. It implements all validation rules.
+- The overlapping fields check (`OVERLAPPING_FIELDS_CAN_BE_MERGED`) is the most expensive rule and is specifically what the benchmark tests.
+- The benchmark uses `@Param({"100"})` for size, generating queries with 100 fields/fragments.
+- The validation runs `LanguageTraversal.traverse(document, operationValidator)` which walks the AST.
+- Guava is an existing dependency — you can use Guava utilities but nothing else new.
diff --git a/autoresearch-validator/run_benchmark.sh b/autoresearch-validator/run_benchmark.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Runs the OverlappingFieldValidation throughput benchmark and extracts the score.
+# Usage: ./autoresearch-validator/run_benchmark.sh
+# Output: prints the benchmark score (ops/sec) to stdout, or "FAILED" on error.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+cd "$PROJECT_DIR"
+
+echo "=== Running OverlappingFieldValidation throughput benchmark ===" >&2
+BENCHMARK_OUTPUT=$(./gradlew jmh \
+    -PjmhInclude="performance.OverlappingFieldValidationPerformance.overlappingFieldValidationThroughput" \
+    -PjmhFork=1 \
+    -PjmhIterations=3 \
+    -PjmhWarmupIterations=2 \
+    2>&1)
+
+# Extract score from JMH output line like:
+# OverlappingFieldValidationPerformance.overlappingFieldValidationThroughput  100  thrpt    3  XX.XXX ± Y.YYY  ops/s
+SCORE=$(echo "$BENCHMARK_OUTPUT" | grep -E "overlappingFieldValidationThroughput\s+" | awk '{print $(NF-3)}')
+
+if [ -z "$SCORE" ]; then
+    echo "FAILED: could not extract benchmark score" >&2
+    echo "Last 20 lines of output:" >&2
+    echo "$BENCHMARK_OUTPUT" | tail -20 >&2
+    echo "FAILED"
+    exit 1
+fi
+
+echo "Score: $SCORE ops/s" >&2
+echo "$SCORE"