Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions src/runloop_api_client/sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@

from __future__ import annotations

from .sync import AgentOps, DevboxOps, ScorerOps, RunloopSDK, ScenarioOps, SnapshotOps, BlueprintOps, StorageObjectOps
from .sync import (
AgentOps,
DevboxOps,
ScorerOps,
RunloopSDK,
ScenarioOps,
SnapshotOps,
BenchmarkOps,
BlueprintOps,
StorageObjectOps,
)
from .agent import Agent
from ._types import ScenarioPreview
from .async_ import (
Expand All @@ -15,6 +25,7 @@
AsyncRunloopSDK,
AsyncScenarioOps,
AsyncSnapshotOps,
AsyncBenchmarkOps,
AsyncBlueprintOps,
AsyncStorageObjectOps,
)
Expand Down Expand Up @@ -51,6 +62,8 @@
# Management interfaces
"AgentOps",
"AsyncAgentOps",
"BenchmarkOps",
"AsyncBenchmarkOps",
"DevboxOps",
"AsyncDevboxOps",
"BlueprintOps",
Expand All @@ -66,6 +79,10 @@
# Resource classes
"Agent",
"AsyncAgent",
"Benchmark",
"AsyncBenchmark",
"BenchmarkRun",
"AsyncBenchmarkRun",
"Devbox",
"AsyncDevbox",
"Execution",
Expand All @@ -89,8 +106,4 @@
"AsyncStorageObject",
"NamedShell",
"AsyncNamedShell",
"BenchmarkRun",
"AsyncBenchmarkRun",
"Benchmark",
"AsyncBenchmark",
]
10 changes: 10 additions & 0 deletions src/runloop_api_client/sdk/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
DevboxCreateParams,
ObjectCreateParams,
ScenarioListParams,
BenchmarkListParams,
BlueprintListParams,
ObjectDownloadParams,
ScenarioUpdateParams,
BenchmarkCreateParams,
BenchmarkUpdateParams,
BlueprintCreateParams,
DevboxUploadFileParams,
Expand Down Expand Up @@ -212,6 +214,14 @@ class ScenarioPreview(ScenarioView):
"""The input context for the Scenario."""


class SDKBenchmarkCreateParams(BenchmarkCreateParams, LongRequestOptions):
pass


class SDKBenchmarkListParams(BenchmarkListParams, BaseRequestOptions):
pass


class SDKBenchmarkUpdateParams(BenchmarkUpdateParams, LongRequestOptions):
pass

Expand Down
56 changes: 56 additions & 0 deletions src/runloop_api_client/sdk/async_.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
SDKObjectCreateParams,
SDKScenarioListParams,
SDKScorerCreateParams,
SDKBenchmarkListParams,
SDKBlueprintListParams,
SDKBenchmarkCreateParams,
SDKBlueprintCreateParams,
SDKDiskSnapshotListParams,
SDKDevboxCreateFromImageParams,
Expand All @@ -34,6 +36,7 @@
from .async_scorer import AsyncScorer
from .async_scenario import AsyncScenario
from .async_snapshot import AsyncSnapshot
from .async_benchmark import AsyncBenchmark
from .async_blueprint import AsyncBlueprint
from ..lib.context_loader import TarFilter, build_directory_tar
from .async_storage_object import AsyncStorageObject
Expand Down Expand Up @@ -815,6 +818,55 @@ async def list(self, **params: Unpack[SDKScenarioListParams]) -> list[AsyncScena
return [AsyncScenario(self._client, item.id) async for item in page]


class AsyncBenchmarkOps:
"""Manage benchmarks (async). Access via ``runloop.benchmark``.

Example:
>>> runloop = AsyncRunloopSDK()
>>> benchmarks = await runloop.benchmark.list()
>>> benchmark = runloop.benchmark.from_id("bmd_xxx")
>>> run = await benchmark.start_run(run_name="evaluation-v1")
"""

def __init__(self, client: AsyncRunloop) -> None:
"""Initialize AsyncBenchmarkOps.

:param client: AsyncRunloop client instance
:type client: AsyncRunloop
"""
self._client = client

async def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> AsyncBenchmark:
"""Create a new benchmark.

:param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters
:return: The newly created benchmark
:rtype: AsyncBenchmark
"""
response = await self._client.benchmarks.create(**params)
return AsyncBenchmark(self._client, response.id)

def from_id(self, benchmark_id: str) -> AsyncBenchmark:
"""Get an AsyncBenchmark instance for an existing benchmark ID.

:param benchmark_id: ID of the benchmark
:type benchmark_id: str
:return: AsyncBenchmark instance for the given ID
:rtype: AsyncBenchmark
"""
return AsyncBenchmark(self._client, benchmark_id)

async def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[AsyncBenchmark]:
"""List all benchmarks, optionally filtered by parameters.

:param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters
:return: List of benchmarks
:rtype: list[AsyncBenchmark]
"""
page = await self._client.benchmarks.list(**params)
return [AsyncBenchmark(self._client, item.id) for item in page.benchmarks]


class AsyncRunloopSDK:
"""High-level asynchronous entry point for the Runloop SDK.

Expand All @@ -826,6 +878,8 @@ class AsyncRunloopSDK:
:vartype api: AsyncRunloop
:ivar agent: High-level async interface for agent management.
:vartype agent: AsyncAgentOps
:ivar benchmark: High-level async interface for benchmark management
:vartype benchmark: AsyncBenchmarkOps
:ivar devbox: High-level async interface for devbox management
:vartype devbox: AsyncDevboxOps
:ivar blueprint: High-level async interface for blueprint management
Expand All @@ -849,6 +903,7 @@ class AsyncRunloopSDK:

api: AsyncRunloop
agent: AsyncAgentOps
benchmark: AsyncBenchmarkOps
devbox: AsyncDevboxOps
blueprint: AsyncBlueprintOps
scenario: AsyncScenarioOps
Expand Down Expand Up @@ -895,6 +950,7 @@ def __init__(
)

self.agent = AsyncAgentOps(self.api)
self.benchmark = AsyncBenchmarkOps(self.api)
self.devbox = AsyncDevboxOps(self.api)
self.blueprint = AsyncBlueprintOps(self.api)
self.scenario = AsyncScenarioOps(self.api)
Expand Down
56 changes: 56 additions & 0 deletions src/runloop_api_client/sdk/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
SDKObjectCreateParams,
SDKScenarioListParams,
SDKScorerCreateParams,
SDKBenchmarkListParams,
SDKBlueprintListParams,
SDKBenchmarkCreateParams,
SDKBlueprintCreateParams,
SDKDiskSnapshotListParams,
SDKDevboxCreateFromImageParams,
Expand All @@ -33,6 +35,7 @@
from ._helpers import detect_content_type
from .scenario import Scenario
from .snapshot import Snapshot
from .benchmark import Benchmark
from .blueprint import Blueprint
from .storage_object import StorageObject
from .scenario_builder import ScenarioBuilder
Expand Down Expand Up @@ -840,6 +843,55 @@ def list(self, **params: Unpack[SDKScenarioListParams]) -> list[Scenario]:
return [Scenario(self._client, item.id) for item in page]


class BenchmarkOps:
"""Manage benchmarks. Access via ``runloop.benchmark``.

Example:
>>> runloop = RunloopSDK()
>>> benchmarks = runloop.benchmark.list()
>>> benchmark = runloop.benchmark.from_id("bmd_xxx")
>>> run = benchmark.start_run(run_name="evaluation-v1")
"""

def __init__(self, client: Runloop) -> None:
"""Initialize BenchmarkOps.

:param client: Runloop client instance
:type client: Runloop
"""
self._client = client

def create(self, **params: Unpack[SDKBenchmarkCreateParams]) -> Benchmark:
"""Create a new benchmark.

:param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkCreateParams` for available parameters
:return: The newly created benchmark
:rtype: Benchmark
"""
response = self._client.benchmarks.create(**params)
return Benchmark(self._client, response.id)

def from_id(self, benchmark_id: str) -> Benchmark:
"""Get a Benchmark instance for an existing benchmark ID.

:param benchmark_id: ID of the benchmark
:type benchmark_id: str
:return: Benchmark instance for the given ID
:rtype: Benchmark
"""
return Benchmark(self._client, benchmark_id)

def list(self, **params: Unpack[SDKBenchmarkListParams]) -> list[Benchmark]:
"""List all benchmarks, optionally filtered by parameters.

:param params: See :typeddict:`~runloop_api_client.sdk._types.SDKBenchmarkListParams` for available parameters
:return: List of benchmarks
:rtype: list[Benchmark]
"""
page = self._client.benchmarks.list(**params)
return [Benchmark(self._client, item.id) for item in page.benchmarks]


class RunloopSDK:
"""High-level synchronous entry point for the Runloop SDK.

Expand All @@ -851,6 +903,8 @@ class RunloopSDK:
:vartype api: Runloop
:ivar agent: High-level interface for agent management.
:vartype agent: AgentOps
:ivar benchmark: High-level interface for benchmark management
:vartype benchmark: BenchmarkOps
:ivar devbox: High-level interface for devbox management
:vartype devbox: DevboxOps
:ivar blueprint: High-level interface for blueprint management
Expand All @@ -874,6 +928,7 @@ class RunloopSDK:

api: Runloop
agent: AgentOps
benchmark: BenchmarkOps
devbox: DevboxOps
blueprint: BlueprintOps
scenario: ScenarioOps
Expand Down Expand Up @@ -920,6 +975,7 @@ def __init__(
)

self.agent = AgentOps(self.api)
self.benchmark = BenchmarkOps(self.api)
self.devbox = DevboxOps(self.api)
self.blueprint = BlueprintOps(self.api)
self.scenario = ScenarioOps(self.api)
Expand Down
60 changes: 60 additions & 0 deletions tests/sdk/test_async_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
MockScorerView,
MockScenarioView,
MockSnapshotView,
MockBenchmarkView,
MockBlueprintView,
create_mock_httpx_response,
)
Expand All @@ -27,12 +28,14 @@
AsyncAgentOps,
AsyncScenario,
AsyncSnapshot,
AsyncBenchmark,
AsyncBlueprint,
AsyncDevboxOps,
AsyncScorerOps,
AsyncRunloopSDK,
AsyncScenarioOps,
AsyncSnapshotOps,
AsyncBenchmarkOps,
AsyncBlueprintOps,
AsyncStorageObject,
AsyncStorageObjectOps,
Expand Down Expand Up @@ -1200,6 +1203,62 @@ async def async_iter():
mock_async_client.scenarios.list.assert_awaited_once()


class TestAsyncBenchmarkOps:
"""Tests for AsyncBenchmarkOps class."""

@pytest.mark.asyncio
async def test_create(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
"""Test create method."""
mock_async_client.benchmarks.create = AsyncMock(return_value=benchmark_view)

ops = AsyncBenchmarkOps(mock_async_client)
benchmark = await ops.create(name="test-benchmark", scenario_ids=["scn_001", "scn_002"])

assert isinstance(benchmark, AsyncBenchmark)
assert benchmark.id == "bmd_123"
mock_async_client.benchmarks.create.assert_awaited_once_with(
name="test-benchmark", scenario_ids=["scn_001", "scn_002"]
)

def test_from_id(self, mock_async_client: AsyncMock) -> None:
"""Test from_id method."""
ops = AsyncBenchmarkOps(mock_async_client)
benchmark = ops.from_id("bmd_123")

assert isinstance(benchmark, AsyncBenchmark)
assert benchmark.id == "bmd_123"

@pytest.mark.asyncio
async def test_list_multiple(self, mock_async_client: AsyncMock) -> None:
"""Test list method with multiple results."""
benchmark_view1 = MockBenchmarkView(id="bmd_001", name="benchmark-1")
benchmark_view2 = MockBenchmarkView(id="bmd_002", name="benchmark-2")
page = SimpleNamespace(benchmarks=[benchmark_view1, benchmark_view2])
mock_async_client.benchmarks.list = AsyncMock(return_value=page)

ops = AsyncBenchmarkOps(mock_async_client)
benchmarks = await ops.list(limit=10)

assert len(benchmarks) == 2
assert isinstance(benchmarks[0], AsyncBenchmark)
assert isinstance(benchmarks[1], AsyncBenchmark)
assert benchmarks[0].id == "bmd_001"
assert benchmarks[1].id == "bmd_002"
mock_async_client.benchmarks.list.assert_awaited_once_with(limit=10)

@pytest.mark.asyncio
async def test_list_with_name_filter(self, mock_async_client: AsyncMock, benchmark_view: MockBenchmarkView) -> None:
"""Test list method with name filter."""
page = SimpleNamespace(benchmarks=[benchmark_view])
mock_async_client.benchmarks.list = AsyncMock(return_value=page)

ops = AsyncBenchmarkOps(mock_async_client)
benchmarks = await ops.list(name="test-benchmark", limit=10)

assert len(benchmarks) == 1
mock_async_client.benchmarks.list.assert_awaited_once_with(name="test-benchmark", limit=10)


class TestAsyncRunloopSDK:
"""Tests for AsyncRunloopSDK class."""

Expand All @@ -1208,6 +1267,7 @@ def test_init(self) -> None:
runloop = AsyncRunloopSDK(bearer_token="test-token")
assert runloop.api is not None
assert isinstance(runloop.agent, AsyncAgentOps)
assert isinstance(runloop.benchmark, AsyncBenchmarkOps)
assert isinstance(runloop.devbox, AsyncDevboxOps)
assert isinstance(runloop.scorer, AsyncScorerOps)
assert isinstance(runloop.snapshot, AsyncSnapshotOps)
Expand Down
Loading