runloopai · sid-rl · Dec 20, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/src/runloop_api_client/sdk/__init__.py b/src/runloop_api_client/sdk/__init__.py
@@ -22,20 +22,24 @@
 from .scorer import Scorer
 from .scenario import Scenario
 from .snapshot import Snapshot
+from .benchmark import Benchmark
 from .blueprint import Blueprint
 from .execution import Execution
 from .async_agent import AsyncAgent
 from .async_devbox import AsyncDevbox, AsyncNamedShell
 from .async_scorer import AsyncScorer
 from .scenario_run import ScenarioRun
+from .benchmark_run import BenchmarkRun
 from .async_scenario import AsyncScenario
 from .async_snapshot import AsyncSnapshot
 from .storage_object import StorageObject
+from .async_benchmark import AsyncBenchmark
 from .async_blueprint import AsyncBlueprint
 from .async_execution import AsyncExecution
 from .execution_result import ExecutionResult
 from .scenario_builder import ScenarioBuilder
 from .async_scenario_run import AsyncScenarioRun
+from .async_benchmark_run import AsyncBenchmarkRun
 from .async_storage_object import AsyncStorageObject
 from .async_execution_result import AsyncExecutionResult
 from .async_scenario_builder import AsyncScenarioBuilder
@@ -85,4 +89,8 @@
     "AsyncStorageObject",
     "NamedShell",
     "AsyncNamedShell",
+    "BenchmarkRun",
+    "AsyncBenchmarkRun",
+    "Benchmark",
+    "AsyncBenchmark",
 ]
diff --git a/src/runloop_api_client/sdk/_types.py b/src/runloop_api_client/sdk/_types.py
@@ -1,33 +1,39 @@
 from typing import Union, Callable, Optional
 from typing_extensions import TypedDict
 
+from ..types import (
+    InputContext,
+    ScenarioView,
+    AgentListParams,
+    DevboxListParams,
+    ObjectListParams,
+    AgentCreateParams,
+    DevboxCreateParams,
+    ObjectCreateParams,
+    ScenarioListParams,
+    BlueprintListParams,
+    ObjectDownloadParams,
+    ScenarioUpdateParams,
+    BenchmarkUpdateParams,
+    BlueprintCreateParams,
+    DevboxUploadFileParams,
+    DevboxCreateTunnelParams,
+    DevboxDownloadFileParams,
+    DevboxRemoveTunnelParams,
+    DevboxSnapshotDiskParams,
+    DevboxReadFileContentsParams,
+    DevboxWriteFileContentsParams,
+)
 from .._types import Body, Query, Headers, Timeout, NotGiven
 from ..lib.polling import PollingConfig
 from ..types.devboxes import DiskSnapshotListParams, DiskSnapshotUpdateParams
 from ..types.scenarios import ScorerListParams, ScorerCreateParams, ScorerUpdateParams, ScorerValidateParams
 from ..types.benchmarks import RunListScenarioRunsParams
-from ..types.input_context import InputContext
-from ..types.scenario_view import ScenarioView
-from ..types.agent_list_params import AgentListParams
-from ..types.devbox_list_params import DevboxListParams
-from ..types.object_list_params import ObjectListParams
-from ..types.agent_create_params import AgentCreateParams
-from ..types.devbox_create_params import DevboxCreateParams, DevboxBaseCreateParams
-from ..types.object_create_params import ObjectCreateParams
-from ..types.scenario_list_params import ScenarioListParams
-from ..types.blueprint_list_params import BlueprintListParams
-from ..types.object_download_params import ObjectDownloadParams
-from ..types.scenario_update_params import ScenarioUpdateParams
-from ..types.blueprint_create_params import BlueprintCreateParams
-from ..types.devbox_upload_file_params import DevboxUploadFileParams
+from ..types.devbox_create_params import DevboxBaseCreateParams
 from ..types.scenario_start_run_params import ScenarioStartRunBaseParams
-from ..types.devbox_create_tunnel_params import DevboxCreateTunnelParams
-from ..types.devbox_download_file_params import DevboxDownloadFileParams
+from ..types.benchmark_start_run_params import BenchmarkSelfStartRunParams
+from ..types.benchmarks.run_list_params import RunSelfListParams
 from ..types.devbox_execute_async_params import DevboxNiceExecuteAsyncParams
-from ..types.devbox_remove_tunnel_params import DevboxRemoveTunnelParams
-from ..types.devbox_snapshot_disk_params import DevboxSnapshotDiskParams
-from ..types.devbox_read_file_contents_params import DevboxReadFileContentsParams
-from ..types.devbox_write_file_contents_params import DevboxWriteFileContentsParams
 
 LogCallback = Callable[[str], None]
 
@@ -206,6 +212,17 @@ class ScenarioPreview(ScenarioView):
     """The input context for the Scenario."""
 
 
-# Benchmark Run params
+class SDKBenchmarkUpdateParams(BenchmarkUpdateParams, LongRequestOptions):
+    pass
+
+
+class SDKBenchmarkStartRunParams(BenchmarkSelfStartRunParams, LongRequestOptions):
+    pass
+
+
+class SDKBenchmarkListRunsParams(RunSelfListParams, BaseRequestOptions):
+    pass
+
+
 class SDKBenchmarkRunListScenarioRunsParams(RunListScenarioRunsParams, BaseRequestOptions):
     pass
diff --git a/src/runloop_api_client/sdk/async_benchmark.py b/src/runloop_api_client/sdk/async_benchmark.py
@@ -0,0 +1,164 @@
+"""AsyncBenchmark resource class for asynchronous operations."""
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Unpack, override
+
+from ..types import BenchmarkView
+from ._types import (
+    BaseRequestOptions,
+    LongRequestOptions,
+    SDKBenchmarkUpdateParams,
+    SDKBenchmarkListRunsParams,
+    SDKBenchmarkStartRunParams,
+)
+from .._types import SequenceNotStr
+from .._client import AsyncRunloop
+from .async_benchmark_run import AsyncBenchmarkRun
+
+
+class AsyncBenchmark:
+    """A benchmark for evaluating agent performance across scenarios (async).
+
+    Provides async methods for retrieving benchmark details, updating the benchmark,
+    managing scenarios, and starting benchmark runs. Obtain instances via
+    ``runloop.benchmark.from_id()`` or ``runloop.benchmark.list()``.
+
+    Example:
+        >>> benchmark = runloop.benchmark.from_id("bmd_xxx")
+        >>> info = await benchmark.get_info()
+        >>> run = await benchmark.start_run(run_name="evaluation-v1")
+        >>> for scenario_id in info.scenario_ids:
+        ...     scenario = await runloop.scenario.from_id(scenario_id)
+        ...     scenario_run = await scenario.run(benchmark_run_id=run.id, run_name="evaluation-v1")
+    """
+
+    def __init__(self, client: AsyncRunloop, benchmark_id: str) -> None:
+        """Create an AsyncBenchmark instance.
+
+        :param client: AsyncRunloop client instance
+        :type client: AsyncRunloop
+        :param benchmark_id: Benchmark ID
+        :type benchmark_id: str
+        """
+        self._client = client
+        self._id = benchmark_id
+
+    @override
+    def __repr__(self) -> str:
+        return f"<AsyncBenchmark id={self._id!r}>"
+
+    @property
+    def id(self) -> str:
+        """Return the benchmark ID.
+
+        :return: Unique benchmark ID
+        :rtype: str
+        """
+        return self._id
+
+    async def get_info(
+        self,
+        **options: Unpack[BaseRequestOptions],
+    ) -> BenchmarkView:
+        """Retrieve current benchmark details.
+
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.BaseRequestOptions` for available options
+        :return: Current benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.retrieve(
+            self._id,
+            **options,
+        )
+
+    async def update(
+        self,
+        **params: Unpack[SDKBenchmarkUpdateParams],
+    ) -> BenchmarkView:
+        """Update the benchmark.
+
+        Only provided fields will be updated.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkUpdateParams` for available parameters
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.update(
+            self._id,
+            **params,
+        )
+
+    async def start_run(
+        self,
+        **params: Unpack[SDKBenchmarkStartRunParams],
+    ) -> AsyncBenchmarkRun:
+        """Start a new benchmark run.
+
+        Creates a new benchmark run and returns an AsyncBenchmarkRun instance for
+        managing the run lifecycle.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkStartRunParams` for available parameters
+        :return: AsyncBenchmarkRun instance for managing the run
+        :rtype: AsyncBenchmarkRun
+        """
+        run_view = await self._client.benchmarks.start_run(
+            benchmark_id=self._id,
+            **params,
+        )
+        return AsyncBenchmarkRun(self._client, run_view.id, run_view.benchmark_id)
+
+    async def add_scenarios(
+        self,
+        scenario_ids: SequenceNotStr[str],
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkView:
+        """Add scenarios to the benchmark.
+
+        :param scenario_ids: List of scenario IDs to add
+        :type scenario_ids: SequenceNotStr[str]
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.update_scenarios(
+            self._id,
+            scenarios_to_add=scenario_ids,
+            **options,
+        )
+
+    async def remove_scenarios(
+        self,
+        scenario_ids: SequenceNotStr[str],
+        **options: Unpack[LongRequestOptions],
+    ) -> BenchmarkView:
+        """Remove scenarios from the benchmark.
+
+        :param scenario_ids: List of scenario IDs to remove
+        :type scenario_ids: SequenceNotStr[str]
+        :param options: See :typeddict:`~runloop_api_client.sdk._types.LongRequestOptions` for available options
+        :return: Updated benchmark info
+        :rtype: BenchmarkView
+        """
+        return await self._client.benchmarks.update_scenarios(
+            self._id,
+            scenarios_to_remove=scenario_ids,
+            **options,
+        )
+
+    async def list_runs(
+        self,
+        **params: Unpack[SDKBenchmarkListRunsParams],
+    ) -> List[AsyncBenchmarkRun]:
+        """List all runs for this benchmark.
+
+        :param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListRunsParams` for available parameters
+        :return: List of async benchmark runs
+        :rtype: List[AsyncBenchmarkRun]
+        """
+        page = await self._client.benchmarks.runs.list(
+            benchmark_id=self._id,
+            **params,
+        )
+        return [AsyncBenchmarkRun(self._client, run.id, run.benchmark_id) for run in page.runs]
diff --git a/src/runloop_api_client/sdk/async_benchmark_run.py b/src/runloop_api_client/sdk/async_benchmark_run.py
@@ -16,11 +16,11 @@ class AsyncBenchmarkRun:
 
     Provides async methods for monitoring run status, managing the run lifecycle,
     and accessing scenario run results. Obtain instances via
-    ``benchmark.run()`` or ``benchmark.list_runs()``.
+    ``benchmark.start_run()`` or ``benchmark.list_runs()``.
 
     Example:
         >>> benchmark = runloop.benchmark.from_id("bench-xxx")
-        >>> run = await benchmark.run(run_name="evaluation-v1")
+        >>> run = await benchmark.start_run(run_name="evaluation-v1")
         >>> info = await run.get_info()
         >>> scenario_runs = await run.list_scenario_runs()
     """