Fix the benchmark scripts

ctrueden · ctrueden · commit 77006edc6a56 · 2024-04-09T18:02:44.000-05:00
* Fail fast if something goes wrong.
* Only run the benchmarks if results text file is not present.
* Build the conda environment into a local directory, on demand.
* Call the environment's python binary directly instead of activating.
* Emit feedback about what's happening during the benchmark script.
* Do not hardcode 0-SNAPSHOT version string.
* Run benchmarks with the same Java that Maven uses ($JAVA_HOME).
* Install SLF4J binding to avoid repeated SLF4J warning messages.
* Do not redirect human-readable output; instead, save to JSON.
* Run each iteration for 5s, not 10s (results appear equivalent).
* Make the shell script's output much nicer for humans to read.
* Update the Python script to ingest the JSON output, not text.
* Use milliseconds, not seconds, for benchmarking unit.
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,3 @@ target/
 
 # Jupyter #
 .ipynb_checkpoints
-
-# Docs #
-_build/
diff --git a/docs/.gitignore b/docs/.gitignore
@@ -0,0 +1 @@
+_build/
diff --git a/docs/ops/.gitignore b/docs/ops/.gitignore
@@ -0,0 +1,2 @@
+/.benchmark-env/
+/scijava-ops-benchmarks_results.json
diff --git a/docs/ops/bin/benchmark.sh b/docs/ops/bin/benchmark.sh
@@ -1,25 +1,68 @@
 #!/bin/bash
 
-conda init
+set -e
+set -o pipefail
+
+if ! command -v mamba >/dev/null 2>&1
+then
+  echo 'Please install mamba before running this script.'
+  exit 1
+fi
 
-# Get the path to the script
 SCRIPT_PATH=$(dirname "$(realpath -s "$0")")
 DOCS_OPS_PATH="$SCRIPT_PATH/.."
 INC_PATH="$DOCS_OPS_PATH/../../"
 BENCHMARKS_PATH="$INC_PATH/scijava-ops-benchmarks"
+BENCH_OUT_FILE=scijava-ops-benchmarks_results.json
+BENCH_OUT_PATH=$(realpath -s "$DOCS_OPS_PATH/$BENCH_OUT_FILE")
+
+if [ -f "$BENCH_OUT_PATH" ]
+then
+  echo 'Graphing existing benchmark results from:'
+  echo "    $BENCH_OUT_PATH"
+  echo 'To rerun benchmarks, delete this file first.'
+else
+  echo
+  echo '=== BUILDING THE CODE ==='
+  cd "$INC_PATH"
+  mvn -Denforcer.skip -Dinvoker.skip -Dmaven.test.skip -P benchmarks clean install -pl scijava-ops-benchmarks -am | grep '\(Building.*[0-9]\]\|ERROR\)'
 
-BENCH_OUT_FILE=scijava-ops-benchmarks_results.txt
+  echo
+  echo '=== COPYING DEPENDENCIES ==='
+  cd "$BENCHMARKS_PATH"
+  {
+    mvn dependency:copy-dependencies
+    mvn dependency:copy -Dartifact=org.slf4j:slf4j-simple:1.7.36 -DoutputDirectory=target/dependency
+  } | grep Copying | while read line
+  do
+    stdbuf -o0 echo -n '.'
+  done
+  echo
 
-cd "$INC_PATH"
-mvn -P benchmarks clean install -pl scijava-ops-benchmarks -am
+  # NB: Use the version of Java pointed to by the JAVA_HOME variable, if any.
+  test -x "$JAVA_HOME/bin/java" && JAVA="$JAVA_HOME/bin/java" || JAVA=java
 
-cd "$BENCHMARKS_PATH"
-mvn dependency:copy-dependencies
+  echo
+  echo '=== RUNNING BENCHMARKS ==='
+  cd "$DOCS_OPS_PATH"
+  "$JAVA" \
+    -cp "$BENCHMARKS_PATH/target/*:$BENCHMARKS_PATH/target/dependency/*" \
+    --add-opens=java.base/java.io=ALL-UNNAMED \
+    org.openjdk.jmh.Main \
+    -rf json -rff "$BENCH_OUT_FILE" \
+    -r 5
+fi
 
 cd "$DOCS_OPS_PATH"
-conda env create -f "environment.yml"
-java -cp "$BENCHMARKS_PATH/target/scijava-ops-benchmarks-0-SNAPSHOT.jar:$BENCHMARKS_PATH/target/dependency/*" org.openjdk.jmh.Main -o $BENCH_OUT_FILE
+envPath=.benchmark-env
+pythonPath="$envPath/bin/python"
+if [ ! -x "$pythonPath" ]
+then
+  echo
+  echo '=== CREATING CONDA ENVIRONMENT ==='
+  mamba create -y -p "$envPath" python=3.10 plotly=5.19.0
+fi
 
-source activate ops-docs
-python graph_results.py
-source deactivate
+echo
+echo '=== GRAPHING THE RESULTS ==='
+"$pythonPath" graph_results.py
diff --git a/docs/ops/graph_results.py b/docs/ops/graph_results.py
@@ -5,7 +5,7 @@
 
 # This script parses JMH benchmarking results into charts developed using plot.ly (https://plotly.com/)
 # It currently develops one boxplot PER class, with each JMH benchmark method represented as a separate boxplot.
-# It expects JMH benchmark results be dumped to a file "scijava-ops-benchmark_results.txt", within its directory.
+# It expects JMH benchmark results be dumped to a file "scijava-ops-benchmark_results.json", within its directory.
 
 # If you'd like to add a title to the plotly charts, add an entry to the following dict.
 #
@@ -33,44 +33,39 @@
 }
 
 # Read in the benchmark results
-with open("scijava-ops-benchmarks_results.txt") as f:
-    lines = f.readlines()
-
-# Keep only the lines containing our desired results
-for i in range(len(lines) - 1, 0, -1):
-    if (lines[i].startswith("Benchmark ")):
-        lines = lines[i+1:]
-        break
+with open("scijava-ops-benchmarks_results.json") as f:
+    data = json.load(f)
 
 # Build a map of results by benchmark class
 benchmark_classes = {}
-for line in lines:
-    words = line.split()
-    test = words[0]
-    last_period = test.rfind('.')
-    cls = test[:last_period]
-    test = test[last_period+1:]
+for row in data:
+    fqdn_tokens = row["benchmark"].split(".")
+    cls, test = fqdn_tokens[-2], fqdn_tokens[-1]
 
     if cls not in benchmark_classes:
         benchmark_classes[cls] = {}
-        
-    benchmark_classes[cls][test] = words[1:]
+
+    # NB: Convert seconds to milliseconds.
+    benchmark_classes[cls][test] = {
+        "score": 1000 * row["primaryMetric"]["score"],
+        "error": 1000 * row["primaryMetric"]["scoreError"],
+    }
 
 # For each class, build a chart and dump it to JSON
-for cls, data in benchmark_classes.items():
-    period_pos = cls.rfind(".")
-    if period_pos > -1:
-        cls = cls[period_pos+1:]
+for cls, test in benchmark_classes.items():
+    print(f"Generating figure for {cls}", end="")
     x = []
     y = []
     error_y = []
 
     # Add each benchmark in the class
-    for method, line in data.items():
+    for method, stats in test.items():
+        print(".", end="")
         method = benchmark_categories.get(method, method)
         x.append(method)
-        y.append(float(line[2]))
-        error_y.append(float(line[4]))
+        y.append(stats["score"])
+        error_y.append(stats["error"])
+
     # Create a bar chart
     fig = go.Figure()
     fig.add_bar(
@@ -80,9 +75,13 @@
     )
     fig.update_layout(
         title_text=figure_titles.get(cls, "TODO: Add title"),
-        yaxis_title="Performance (s/op)"
+        yaxis_title="Performance (ms/op)"
     )
 
     # Convert to JSON and dump
     with open(f"images/{cls}.json", "w") as f:
         f.write(io.to_json(fig))
+
+    print()
+
+print("Done!")

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+/.benchmark-env/`
	`2`	`+/scijava-ops-benchmarks_results.json`