chore(storage): add microbenchmark for crc32c#17420
Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces a new microbenchmark script, benchmark_crc32c.py, to measure the execution performance of google_crc32c.value across different data sizes. The feedback recommends adding a warm-up phase before the timed iterations to prevent cold-start overhead from skewing the maximum and mean duration results.
| durations = [] | ||
| for _ in range(args.iterations): | ||
| start = time.perf_counter() | ||
| _ = google_crc32c.value(data) | ||
| end = time.perf_counter() | ||
| durations.append(end - start) |
There was a problem hiding this comment.
Without a warm-up phase, the first few iterations of the benchmark can be significantly slower due to cold-start overhead (such as CPU cache misses or lazy loading of C-extension symbols). This skews the maximum and mean duration results. Adding a small warm-up loop before the timed iterations improves measurement accuracy.
| durations = [] | |
| for _ in range(args.iterations): | |
| start = time.perf_counter() | |
| _ = google_crc32c.value(data) | |
| end = time.perf_counter() | |
| durations.append(end - start) | |
| # Warm up to minimize cold-start effects | |
| for _ in range(10): | |
| _ = google_crc32c.value(data) | |
| durations = [] | |
| for _ in range(args.iterations): | |
| start = time.perf_counter() | |
| _ = google_crc32c.value(data) | |
| end = time.perf_counter() | |
| durations.append(end - start) |
parthea
left a comment
There was a problem hiding this comment.
Please take a look at the feedback from gemini-code-assist. I also saw similar feedback in a separate AI prompt regarding improving the result (removing the loop overhead/cold start from the benchmark) . If you're specifically interesting in the result with the loop overhead/cold start, please add a comment in the script, or consider testing it separately.
Fix the Micro-Benchmarking Flaw (Loop Overhead)
For very small sizes (like 1KiB), google_crc32c.value(data) executes incredibly fast. When you time it line-by-line inside a Python loop, a non-trivial amount of the measured time is actually Python loop overhead and the cost of calling time.perf_counter() itself.
For micro-benchmarks, it is better to time a nested block running multiple repetitions, or leverage Python's built-in timeit module, which handles this calibration automatically.
See the suggestion from AI below
import argparse
import os
import statistics
import sys
import time
from typing import Tuple
try:
import google_crc32c
except ImportError:
print(
"Error: google_crc32c package is not installed in the python environment.",
file=sys.stderr,
)
sys.exit(1)
def parse_size(size_str: str) -> int:
size_str = size_str.strip().upper()
# Unit mappings to avoid long if/elif chains
units = {
"KIB": 1024,
"MIB": 1024**2,
"GIB": 1024**3,
"KB": 1000,
"MB": 1000**2,
"GB": 1000**3,
"B": 1,
}
for unit, multiplier in units.items():
if size_str.endswith(unit):
try:
# Handle potential floats like "1.5MiB"
val = size_str[:-len(unit)]
return int(float(val) * multiplier) if unit != "B" else int(val)
except ValueError:
break
try:
return int(size_str)
except ValueError:
raise ValueError(f"Unknown size format: {size_str}")
def format_time(seconds: float) -> str:
if seconds < 1e-6:
return f"{seconds * 1e9:.2f} ns"
elif seconds < 1e-3:
return f"{seconds * 1e6:.2f} \u03bcs"
elif seconds < 1.0:
return f"{seconds * 1e3:.2f} ms"
else:
return f"{seconds:.2f} s"
def main():
parser = argparse.ArgumentParser(
description="Benchmark google_crc32c.value execution time."
)
parser.add_argument(
"--sizes",
type=str,
default="1KiB,100KiB,2MiB",
help="Comma-separated list of sizes (e.g. '1KiB,100KiB,2MiB')",
)
parser.add_argument(
"--iterations",
type=int,
default=100,
help="Number of iterations for benchmark (default: 100)",
)
args = parser.parse_args()
if args.iterations <= 0:
print("Error: --iterations must be greater than 0.", file=sys.stderr)
sys.exit(1)
# Ensure google_crc32c uses accelerated C code
impl = getattr(google_crc32c, "implementation", None)
print(f"google_crc32c implementation: {impl}")
if impl != "c":
print(
f"Error: google_crc32c is not using the accelerated C code (got '{impl}').",
file=sys.stderr,
)
sys.exit(1)
sizes_to_test = []
for s in args.sizes.split(","):
try:
sizes_to_test.append((s.strip(), parse_size(s)))
except ValueError as e:
print(f"Error parsing size '{s}': {e}", file=sys.stderr)
sys.exit(1)
print(f"\nBenchmarking google_crc32c.value(data) with {args.iterations} iterations:")
print("-" * 86)
print(
f"{'Size (String)':<15} | {'Size (Bytes)':<12} | {'Min':<11} | {'Max':<11} | {'Mean':<11} | {'Median':<11}"
)
print("-" * 86)
for size_str, size_bytes in sizes_to_test:
# Generate random bytes once per size to simulate steady state data
data = os.urandom(size_bytes)
durations = []
# JIT/Warmup loop run (discarded) to ensure CPU caches/Python lookups are hot
_ = google_crc32c.value(data)
for _ in range(args.iterations):
start = time.perf_counter()
_ = google_crc32c.value(data)
end = time.perf_counter()
durations.append(end - start)
min_time = min(durations)
max_time = max(durations)
mean_time = statistics.mean(durations)
median_time = statistics.median(durations)
# Slightly bumped column padding to 11 to protect against longer float strings
print(
f"{size_str:<15} | {size_bytes:<12} | "
f"{format_time(min_time):<11} | {format_time(max_time):<11} | "
f"{format_time(mean_time):<11} | {format_time(median_time):<11}"
)
if __name__ == "__main__":
main()
| import google_crc32c | ||
| except ImportError: | ||
| print( | ||
| "Error: google_crc32c package is not installed in the python environment.", |
There was a problem hiding this comment.
nit: Use google-crc32c which matches https://pypi.org/project/google-crc32c/
| "Error: google_crc32c package is not installed in the python environment.", | |
| "Error: google-crc32c package is not installed in the python environment.", |
Add a microbenchmark script for measuring crc32c execution time.