Skip to content

gh-146558: optimize dict access with objects with known hash#146559

Open
kumaraditya303 wants to merge 7 commits intopython:mainfrom
kumaraditya303:jit-hash
Open

gh-146558: optimize dict access with objects with known hash#146559
kumaraditya303 wants to merge 7 commits intopython:mainfrom
kumaraditya303:jit-hash

Conversation

@kumaraditya303
Copy link
Copy Markdown
Contributor

@kumaraditya303 kumaraditya303 commented Mar 28, 2026

I asked Claude to create a benchmark for this, it created the following script which shows a real significant speedup.

Benchmark No JIT JIT Speedup
dict_get[str] 55.7M/s 70.1M/s +26%
dict_get[int] 55.5M/s 75.4M/s +36%
dict_get[float] 50.9M/s 74.7M/s +47%
dict_get[complex] 44.9M/s 74.9M/s +67%
dict_store[str] 74.9M/s 85.7M/s +14%
dict_store[int] 76.0M/s 94.1M/s +24%
dict_store[float] 66.6M/s 97.0M/s +46%
dict_store[complex] 55.9M/s 98.0M/s +75%
dict_get[multi] 23.4M/s 35.1M/s +50%
dict_get[obj] 52.9M/s 68.8M/s +30%
dict_store[obj] 75.4M/s 92.5M/s +23%
Details
"""Benchmark dict get/store with constant keys to measure
_BINARY_OP_SUBSCR_DICT_KNOWN_HASH and _STORE_SUBSCR_DICT_KNOWN_HASH speedup.

Usage:
  PYTHON_JIT=0 ./python.exe bench_dict_known_hash.py
  PYTHON_JIT=1 ./python.exe bench_dict_known_hash.py
"""

import time
import os
import sys

N = 50_000_000


def bench_get_str(n):
    d = {'a': 1, 'b': 2, 'c': 3}
    x = 0
    for _ in range(n):
        x += d['a']
    return x


def bench_get_int(n):
    d = {1: 10, 2: 20, 3: 30}
    x = 0
    for _ in range(n):
        x += d[1]
    return x


def bench_store_str(n):
    d = {}
    for _ in range(n):
        d['a'] = 1
    return d


def bench_store_int(n):
    d = {}
    for _ in range(n):
        d[1] = 1
    return d


def bench_get_float(n):
    d = {1.5: 1, 2.5: 2, 3.5: 3}
    x = 0
    for _ in range(n):
        x += d[1.5]
    return x


def bench_store_float(n):
    d = {}
    for _ in range(n):
        d[1.5] = 1
    return d


def bench_get_complex(n):
    d = {1+2j: 1, 3+4j: 2}
    x = 0
    for _ in range(n):
        x += d[1+2j]
    return x


def bench_store_complex(n):
    d = {}
    for _ in range(n):
        d[1+2j] = 1
    return d


def bench_get_multi(n):
    d = {'a': 1, 1: 2, b'x': 3, (1, 2): 4}
    x = 0
    for _ in range(n):
        x += d['a'] + d[1] + d[b'x'] + d[(1, 2)]
    return x


class _Key:
    pass

_KEY = _Key()


def bench_get_obj(n):
    d = {_KEY: 1}
    x = 0
    for _ in range(n):
        x += d[_KEY]
    return x


def bench_store_obj(n):
    d = {}
    for _ in range(n):
        d[_KEY] = 1
    return d


def run_avg(name, func, n, runs=3):
    func(1000)  # warmup
    times = []
    for _ in range(runs):
        t0 = time.perf_counter()
        func(n)
        times.append(time.perf_counter() - t0)
    avg = sum(times) / len(times)
    rate = n / avg / 1e6
    print(f"  {name:25s}  {avg:.3f}s  ({rate:.1f}M iter/s)")
    return avg


if __name__ == "__main__":
    jit = "JIT" if os.environ.get("PYTHON_JIT", "1") == "1" else "no JIT"
    print(f"Python {sys.version.split()[0]} ({jit}), avg of 3 runs\n")

    run_avg("dict_get[str]",      bench_get_str,      N)
    run_avg("dict_get[int]",      bench_get_int,      N)
    run_avg("dict_get[float]",    bench_get_float,    N)
    run_avg("dict_get[complex]",  bench_get_complex,  N)
    run_avg("dict_store[str]",    bench_store_str,    N)
    run_avg("dict_store[int]",    bench_store_int,    N)
    run_avg("dict_store[float]",  bench_store_float,  N)
    run_avg("dict_store[complex]",bench_store_complex,N)
    run_avg("dict_get[multi]",    bench_get_multi,    N // 4)
    run_avg("dict_get[obj]",      bench_get_obj,      N)
    run_avg("dict_store[obj]",    bench_store_obj,    N)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants