-
Notifications
You must be signed in to change notification settings - Fork 107
Expand file tree
/
Copy pathbenchmarking.py
More file actions
723 lines (624 loc) · 27.5 KB
/
benchmarking.py
File metadata and controls
723 lines (624 loc) · 27.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
"""
Benchmarking utilities for Feldera pipelines.
Provides functionality to collect pipeline performance metrics, format them as
Bencher Metric Format (BMF), and upload results to a Bencher-compatible server.
This mirrors the `fda bench --upload` CLI functionality so Python-based benchmark
workloads can collect and upload results programmatically.
**Differences from** ``fda bench``: throughput, uptime, and state-amplification
are computed as deltas between the first and last collected sample (i.e. over the
observation window), whereas ``fda bench`` uses absolute values since pipeline
start. This means the two tools will report different numbers for the same run,
but the Python SDK approach is more appropriate when benchmarking a pipeline that
may have been running before collection started.
"""
import json
import logging
import math
import os
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum
from typing import TYPE_CHECKING, Optional
from urllib.parse import urlparse
import requests
if TYPE_CHECKING:
from feldera.pipeline import Pipeline
from feldera.rest.feldera_client import FelderaClient
logger = logging.getLogger(__name__)
POLL_INTERVAL_S = 0.25
class CompletionCondition(Enum):
"""Strategy for determining when benchmark collection should stop.
:cvar PIPELINE_COMPLETE: Stop when the pipeline's ``pipeline_complete``
flag becomes ``True``. This is the default and works for finite input
connectors (e.g. files, HTTP) that signal end-of-input to the pipeline.
:cvar IDLE: Stop when ``total_processed_records`` has been stable (unchanged)
for a configurable idle interval **and** at least one record has been
processed. Use this for streaming connectors like Kafka that never
signal end-of-input even after all available data has been consumed.
"""
PIPELINE_COMPLETE = "pipeline_complete"
IDLE = "idle"
def _human_readable_bytes(n: int) -> str:
"""Format a byte count as a human-readable string."""
units = ["B", "KiB", "MiB", "GiB", "TiB"]
value = float(n)
exp = 0
while value >= 1024 and exp < len(units) - 1:
value /= 1024
exp += 1
return f"{value:.2f} {units[exp]}"
def _stddev(values: list[float]) -> float:
"""Population standard deviation."""
n = len(values)
if n < 2:
return 0.0
mean = sum(values) / n
return math.sqrt(sum((x - mean) ** 2 for x in values) / n)
def _averaged_metrics(runs: list["BenchmarkResult"]) -> "BenchmarkMetrics":
"""Compute averaged BenchmarkMetrics from a list of runs.
Averages: throughput, uptime_ms, buffered_input_records_avg, state_amplification
Min-of-mins: memory_bytes_min, storage_bytes_min, buffered_input_records_min
Max-of-maxes: memory_bytes_max, storage_bytes_max, buffered_input_records_max
"""
n = len(runs)
metrics_list = [r.metrics for r in runs]
sa_values = [
m.state_amplification for m in metrics_list if m.state_amplification is not None
]
return BenchmarkMetrics(
throughput=int(sum(m.throughput for m in metrics_list) / n),
memory_bytes_max=max(m.memory_bytes_max for m in metrics_list),
memory_bytes_min=min(m.memory_bytes_min for m in metrics_list),
storage_bytes_max=max(m.storage_bytes_max for m in metrics_list),
storage_bytes_min=min(m.storage_bytes_min for m in metrics_list),
uptime_ms=int(sum(m.uptime_ms for m in metrics_list) / n),
buffered_input_records_avg=int(
sum(m.buffered_input_records_avg for m in metrics_list) / n
),
buffered_input_records_min=min(
m.buffered_input_records_min for m in metrics_list
),
buffered_input_records_max=max(
m.buffered_input_records_max for m in metrics_list
),
state_amplification=sum(sa_values) / len(sa_values) if sa_values else None,
)
@dataclass
class RawSample:
"""One stats snapshot from ``pipeline.stats()``.
:param rss_bytes: Resident set size of the pipeline process in bytes.
:param uptime_msecs: Real-world elapsed milliseconds since the pipeline
process started (wall-clock time, not aggregated CPU time).
:param incarnation_uuid: UUID identifying the current pipeline incarnation.
Changes if the pipeline restarts.
:param storage_bytes: Bytes currently stored by the pipeline.
:param buffered_input_records: Number of records buffered in input connectors.
:param total_processed_records: Cumulative number of records processed since
pipeline start.
:param input_bytes: Total bytes received across all input connectors.
:param input_errors: ``True`` if any input connector reported a fatal error,
parse error, or transport error.
"""
rss_bytes: int
uptime_msecs: int
incarnation_uuid: str
storage_bytes: int
buffered_input_records: int
total_processed_records: int
input_bytes: int
input_errors: bool
@classmethod
def from_pipeline_statistics(cls, stats) -> "RawSample":
"""Construct a :class:`RawSample` from a
:class:`~feldera.stats.PipelineStatistics` object.
:param stats: A :class:`~feldera.stats.PipelineStatistics` instance
returned by :meth:`~feldera.pipeline.Pipeline.stats`.
:returns: A new :class:`RawSample` populated from *stats*.
"""
gm = stats.global_metrics
input_bytes = sum(
ep.metrics.total_bytes
for ep in stats.inputs
if ep.metrics is not None and ep.metrics.total_bytes is not None
)
input_errors = any(
(ep.fatal_error is not None)
or (ep.metrics is not None and (ep.metrics.num_transport_errors or 0) > 0)
or (ep.metrics is not None and (ep.metrics.num_parse_errors or 0) > 0)
for ep in stats.inputs
)
return cls(
rss_bytes=gm.rss_bytes or 0,
uptime_msecs=int(gm.uptime_msecs or 0),
incarnation_uuid=str(gm.incarnation_uuid),
storage_bytes=gm.storage_bytes or 0,
buffered_input_records=gm.buffered_input_records or 0,
total_processed_records=gm.total_processed_records or 0,
input_bytes=input_bytes,
input_errors=input_errors,
)
@dataclass
class BenchmarkMetrics:
"""Aggregated benchmark metrics derived from a list of :class:`RawSample`.
:param throughput: Records processed per second during the measurement
window, computed as a delta between the first and last samples.
:param memory_bytes_max: Peak RSS memory usage in bytes across all samples.
:param memory_bytes_min: Minimum RSS memory usage in bytes across all samples.
:param storage_bytes_max: Peak storage usage in bytes across all samples.
:param storage_bytes_min: Minimum storage usage in bytes across all samples.
:param uptime_ms: Real-world elapsed milliseconds of the measurement window
(delta of ``uptime_msecs`` between first and last sample).
:param buffered_input_records_avg: Average buffered input record count across
all samples (integer division).
:param buffered_input_records_min: Minimum buffered input record count across
all samples.
:param buffered_input_records_max: Maximum buffered input record count across
all samples.
:param state_amplification: Ratio of peak storage bytes to input bytes
received during the measurement window. ``None`` when delta input
bytes is zero.
"""
throughput: int
memory_bytes_max: int
memory_bytes_min: int
storage_bytes_max: int
storage_bytes_min: int
uptime_ms: int
buffered_input_records_avg: int
buffered_input_records_min: int
buffered_input_records_max: int
state_amplification: Optional[float]
@classmethod
def from_samples(cls, samples: list) -> "BenchmarkMetrics":
"""Aggregate a list of :class:`RawSample` objects into benchmark metrics.
:param samples: Non-empty list of :class:`RawSample` instances collected
during a benchmark run.
:returns: A new :class:`BenchmarkMetrics` instance.
:raises ValueError: If *samples* is empty.
"""
if not samples:
raise ValueError(
"No measurements were recorded. Maybe try to increase `duration`."
)
first = samples[0]
last = samples[-1]
# Delta-based: measure only the records and time within the window
delta_records = last.total_processed_records - first.total_processed_records
delta_ms = last.uptime_msecs - first.uptime_msecs
delta_s = delta_ms / 1000.0
throughput = int(delta_records / delta_s) if delta_s > 0 else 0
memory_bytes_max = max(s.rss_bytes for s in samples)
memory_bytes_min = min(s.rss_bytes for s in samples)
storage_bytes_max = max(s.storage_bytes for s in samples)
storage_bytes_min = min(s.storage_bytes for s in samples)
buf_values = [s.buffered_input_records for s in samples]
buffered_input_records_avg = sum(buf_values) // len(buf_values)
buffered_input_records_min = min(buf_values)
buffered_input_records_max = max(buf_values)
if buffered_input_records_min == 0:
zero_count = sum(1 for b in buf_values if b == 0)
logger.warning(
"Input buffering was 0 for %d samples; the pipeline may not be "
"receiving enough data to evaluate its true performance.",
zero_count,
)
delta_input_bytes = last.input_bytes - first.input_bytes
state_amplification = (
storage_bytes_max / delta_input_bytes if delta_input_bytes > 0 else None
)
return cls(
throughput=throughput,
memory_bytes_max=memory_bytes_max,
memory_bytes_min=memory_bytes_min,
storage_bytes_max=storage_bytes_max,
storage_bytes_min=storage_bytes_min,
uptime_ms=delta_ms,
buffered_input_records_avg=buffered_input_records_avg,
buffered_input_records_min=buffered_input_records_min,
buffered_input_records_max=buffered_input_records_max,
state_amplification=state_amplification,
)
@dataclass
class BenchmarkResult:
"""A named benchmark result with timing information.
For a single run, ``_metrics`` is set directly. For multi-run aggregation,
``runs`` holds the individual results and ``metrics`` computes averages
on the fly.
:param name: Benchmark name, used as the top-level key in BMF output.
:param start_time: UTC timestamp when metric collection began.
:param end_time: UTC timestamp when metric collection ended.
:param runs: List of individual run results (multi-run aggregation).
:param _metrics: Aggregated performance metrics (single run).
"""
name: str
start_time: datetime
end_time: datetime
runs: list["BenchmarkResult"] | None = None
_metrics: BenchmarkMetrics | None = field(default=None, repr=False)
@property
def metrics(self) -> BenchmarkMetrics:
"""Return metrics — stored for single run, computed for multi-run."""
if self._metrics is not None:
return self._metrics
if self.runs:
return _averaged_metrics(self.runs)
raise ValueError("BenchmarkResult has no metrics and no runs")
@classmethod
def aggregate(
cls, results: list["BenchmarkResult"], name: str | None = None
) -> "BenchmarkResult":
"""Create an aggregated result from multiple runs.
:param results: Non-empty list of individual run results.
:param name: Optional name override; defaults to the first result's name.
:returns: A new :class:`BenchmarkResult` with ``runs`` set.
"""
return cls(
name=name or results[0].name,
start_time=min(r.start_time for r in results),
end_time=max(r.end_time for r in results),
runs=results,
)
def to_bmf(self) -> dict:
"""Return the result as a Bencher Metric Format (BMF) dict.
:returns: A dict ``{name: {metric_name: {value, ...}, ...}}`` suitable
for serialising to JSON and submitting to a Bencher-compatible server.
"""
m = self.metrics
entry: dict = {
"throughput": {"value": m.throughput},
"memory": {
"value": m.memory_bytes_max,
"lower_value": m.memory_bytes_min,
},
"storage": {
"value": m.storage_bytes_max,
"lower_value": m.storage_bytes_min,
},
"uptime": {"value": m.uptime_ms},
"buffered-input-records": {
"value": m.buffered_input_records_avg,
"lower_value": m.buffered_input_records_min,
"upper_value": m.buffered_input_records_max,
},
}
if m.state_amplification is not None:
entry["state-amplification"] = {"value": m.state_amplification}
return {self.name: entry}
def to_json(self) -> str:
"""Return the BMF dict serialised as a pretty-printed JSON string.
:returns: A JSON string representation of :meth:`to_bmf`.
"""
return json.dumps(self.to_bmf(), indent=2)
def format_table(self) -> str:
"""Return a human-readable tabular display of the benchmark results.
For multi-run results (``self.runs is not None``), value cells show
``avg (stddev X.Y%)`` computed from per-run metrics.
:returns: A multi-line string containing an ASCII table with one row
per metric.
"""
m = self.metrics
is_multi = self.runs is not None and len(self.runs) > 1
def _val_with_stddev(avg: float, values: list[float], fmt: str = ".0f") -> str:
if not is_multi:
return f"{avg:{fmt}}"
sd = _stddev(values)
pct = (sd / avg * 100) if avg != 0 else 0.0
return f"{avg:{fmt}} (stddev {pct:.1f}%)"
def _bytes_with_stddev(avg: int, values: list[int]) -> str:
if not is_multi:
return _human_readable_bytes(avg)
sd = _stddev([float(v) for v in values])
pct = (sd / avg * 100) if avg != 0 else 0.0
return f"{_human_readable_bytes(avg)} (stddev {pct:.1f}%)"
rows: list[tuple[str, str]] = [
("Metric", "Value"),
(
"Throughput (records/s)",
_val_with_stddev(
m.throughput,
[r.metrics.throughput for r in self.runs] if is_multi else [],
),
),
(
"Memory",
_bytes_with_stddev(
m.memory_bytes_max,
[r.metrics.memory_bytes_max for r in self.runs] if is_multi else [],
),
),
(
"Storage",
_bytes_with_stddev(
m.storage_bytes_max,
[r.metrics.storage_bytes_max for r in self.runs]
if is_multi
else [],
),
),
(
"Uptime [ms]",
_val_with_stddev(
m.uptime_ms,
[r.metrics.uptime_ms for r in self.runs] if is_multi else [],
),
),
]
if m.state_amplification is not None:
rows.append(
(
"State Amplification",
_val_with_stddev(
m.state_amplification,
[
r.metrics.state_amplification
for r in self.runs
if r.metrics.state_amplification is not None
]
if is_multi
else [],
fmt=".2f",
),
)
)
col_widths = [max(len(row[i]) for row in rows) for i in range(len(rows[0]))]
sep = "+-" + "-+-".join("-" * w for w in col_widths) + "-+"
n_runs = len(self.runs) if self.runs else 1
header = f"Benchmark Results ({n_runs} run{'s' if n_runs != 1 else ''}):"
lines = [header, sep]
for i, row in enumerate(rows):
line = (
"| "
+ " | ".join(cell.ljust(col_widths[j]) for j, cell in enumerate(row))
+ " |"
)
lines.append(line)
if i == 0:
lines.append(sep)
lines.append(sep)
return "\n".join(lines)
def collect_metrics(
pipeline: "Pipeline",
duration_secs: Optional[float] = None,
completion_condition: CompletionCondition = CompletionCondition.PIPELINE_COMPLETE,
idle_interval_s: float = 1.0,
) -> tuple:
"""
Poll pipeline stats until completion or ``duration_secs`` elapses.
:param pipeline: A running :class:`~feldera.pipeline.Pipeline`.
:param duration_secs: Optional maximum collection duration in seconds.
If ``None``, polling continues until the completion condition is met.
:param completion_condition: Strategy for detecting completion.
See :class:`CompletionCondition`.
:param idle_interval_s: When using :attr:`CompletionCondition.IDLE`,
the number of seconds ``total_processed_records`` must remain unchanged
before collection stops. Ignored for other conditions.
:returns: ``(samples, start_time, end_time)`` where *samples* is a list
of :class:`RawSample` objects.
:raises RuntimeError: If any input connector reported errors during collection.
"""
samples: list[RawSample] = []
start_time = datetime.now(timezone.utc).replace(tzinfo=None)
loop_start = time.monotonic()
first_uuid: Optional[str] = None
# State for IDLE completion tracking
idle_started_at: Optional[float] = None
prev_processed: Optional[int] = None
while True:
stats = pipeline.stats()
sample = RawSample.from_pipeline_statistics(stats)
samples.append(sample)
logger.info("Collected metrics at %.1fs", time.monotonic() - loop_start)
# Validate incarnation UUID consistency
if first_uuid is None:
first_uuid = sample.incarnation_uuid
elif sample.incarnation_uuid != first_uuid:
logger.warning(
"Inconsistent incarnation_uuid detected during benchmark "
"(was %s, now %s). Did the pipeline restart while measuring?",
first_uuid,
sample.incarnation_uuid,
)
# Check completion based on the chosen strategy
if completion_condition == CompletionCondition.PIPELINE_COMPLETE:
if stats.global_metrics.pipeline_complete:
logger.info("Pipeline completed, stopping benchmark collection.")
break
elif completion_condition == CompletionCondition.IDLE:
now = time.monotonic()
cur_processed = sample.total_processed_records
if (
prev_processed is not None
and cur_processed == prev_processed
and cur_processed > 0
):
if idle_started_at is None:
idle_started_at = now
elif now - idle_started_at >= idle_interval_s:
logger.info(
"Pipeline idle for %.1fs at %d records processed, "
"stopping benchmark collection.",
idle_interval_s,
cur_processed,
)
break
else:
idle_started_at = None
prev_processed = cur_processed
# Stop when duration limit reached
if duration_secs is not None and time.monotonic() - loop_start >= duration_secs:
logger.info("Reached duration limit of %.1fs.", duration_secs)
break
time.sleep(POLL_INTERVAL_S)
end_time = datetime.now(timezone.utc).replace(tzinfo=None)
if any(s.input_errors for s in samples):
raise RuntimeError(
"Detected errors in input connectors during benchmark collection. "
"Check pipeline logs for details."
)
return samples, start_time, end_time
def bench(
pipeline: "Pipeline",
name: Optional[str] = None,
duration_secs: Optional[float] = None,
completion_condition: CompletionCondition = CompletionCondition.PIPELINE_COMPLETE,
idle_interval_s: float = 5.0,
) -> BenchmarkResult:
"""
Collect benchmark metrics from a running pipeline and return a result.
:param pipeline: A running :class:`~feldera.pipeline.Pipeline`.
:param name: Benchmark name. Defaults to the pipeline name.
:param duration_secs: Optional maximum collection duration in seconds.
Acts as an upper bound regardless of *completion_condition*.
:param completion_condition: Strategy for detecting completion.
See :class:`CompletionCondition`.
:param idle_interval_s: Seconds of stable ``total_processed_records``
before :attr:`CompletionCondition.IDLE` triggers. Ignored otherwise.
:returns: A :class:`BenchmarkResult` with collected metrics.
"""
benchmark_name = name if name is not None else pipeline.name
samples, start_time, end_time = collect_metrics(
pipeline, duration_secs, completion_condition, idle_interval_s
)
metrics = BenchmarkMetrics.from_samples(samples)
return BenchmarkResult(
name=benchmark_name,
start_time=start_time,
end_time=end_time,
_metrics=metrics,
)
def upload_to_bencher(
result: BenchmarkResult,
project: Optional[str] = None,
*,
host: Optional[str] = None,
token: Optional[str] = None,
branch: str = "main",
testbed: Optional[str] = None,
git_hash: Optional[str] = None,
start_point: Optional[str] = None,
start_point_hash: Optional[str] = None,
start_point_max_versions: int = 255,
start_point_clone_thresholds: bool = False,
start_point_reset: bool = False,
feldera_client: Optional["FelderaClient"] = None,
) -> requests.Response:
"""
Upload a :class:`BenchmarkResult` to a Bencher-compatible server.
Environment variables (used as defaults when the corresponding parameter is
``None``):
- ``BENCHER_API_TOKEN`` — API token for authentication.
- ``BENCHER_PROJECT`` — Project slug on the Bencher server.
- ``BENCHER_HOST`` — Base URL of the Bencher server.
:param result: The benchmark result to upload.
:param project: Bencher project slug. Defaults to the ``BENCHER_PROJECT``
environment variable.
:param host: Bencher server base URL. Defaults to the ``BENCHER_HOST``
environment variable, or ``"https://benchmarks.feldera.io"``.
:param token: Bencher API token. Defaults to the ``BENCHER_API_TOKEN``
environment variable.
:param branch: Branch name to report the run under. Defaults to ``"main"``.
:param testbed: Testbed name. When ``None`` and *feldera_client* is provided,
the hostname of the Feldera instance is used.
:param git_hash: Optional git commit hash associated with this run.
:param start_point: Optional branch name to use as the start point.
:param start_point_hash: Optional git hash for the start point.
:param start_point_max_versions: Maximum number of start point versions to
consider. Defaults to 255.
:param start_point_clone_thresholds: Whether to clone thresholds from the
start point branch.
:param start_point_reset: Whether to reset the start point.
:param feldera_client: Optional :class:`~feldera.rest.feldera_client.FelderaClient`
used to enrich the run context with edition and revision information.
:returns: The :class:`requests.Response` from the Bencher server.
:raises ValueError: If *project* is ``None`` and ``BENCHER_PROJECT`` is not set.
:raises RuntimeError: If the server returns a non-2xx status code.
"""
resolved_host = (
host or os.environ.get("BENCHER_HOST") or "https://benchmarks.feldera.io"
)
resolved_token = token or os.environ.get("BENCHER_API_TOKEN")
resolved_project = project or os.environ.get("BENCHER_PROJECT")
if resolved_project is None:
raise ValueError(
"project must be provided either as a parameter or via the "
"BENCHER_PROJECT environment variable."
)
# Build run context
context: dict = {
"bencher.dev/v0/branch/ref/name": branch,
}
# Resolve testbed and enrich context from Feldera instance config
resolved_testbed = testbed
if feldera_client is not None:
try:
config = feldera_client.get_config()
context["bencher.dev/v0/repo/name"] = f"feldera {config.edition.value}"
context["bencher.dev/v0/branch/hash"] = config.revision
edition = config.edition.value
if edition == "Open source":
context["bencher.dev/v0/repo/hash"] = (
"de8879fbda0c9e9392e3b94064c683a1b4bae216"
)
elif edition == "Enterprise":
context["bencher.dev/v0/repo/hash"] = (
"751db38ff821d73bcc67c836af421d76d4d42bdd"
)
else:
logger.warning(
"Unknown Feldera edition '%s'; not setting repo hash.", edition
)
# Auto-resolve git_hash from runtime_revision if not provided by caller,
# mirroring the fda bench logic (runtime_version takes precedence, then
# runtime_revision).
if git_hash is None and config.runtime_revision:
git_hash = config.runtime_revision
except Exception as exc:
logger.warning("Failed to fetch Feldera instance config: %s", exc)
if resolved_testbed is None:
instance_url = feldera_client.config.url
parsed = urlparse(instance_url)
resolved_testbed = parsed.hostname or instance_url
# Build payload
payload: dict = {
"branch": branch,
"project": resolved_project,
"context": context,
"start_time": result.start_time.isoformat() + "Z",
"end_time": result.end_time.isoformat() + "Z",
"results": [result.to_json()],
"settings": {"adapter": "json"},
"thresholds": None,
}
if git_hash is not None:
payload["hash"] = git_hash
if resolved_testbed is not None:
payload["testbed"] = resolved_testbed
if start_point is not None:
sp: dict = {
"branch": start_point,
"clone_thresholds": start_point_clone_thresholds,
"max_versions": start_point_max_versions,
"reset": start_point_reset,
}
if start_point_hash is not None:
sp["hash"] = start_point_hash
payload["start_point"] = sp
else:
payload["start_point"] = None
# Build headers
headers: dict = {}
if resolved_token:
headers["Authorization"] = f"Bearer {resolved_token}"
else:
logger.warning(
"No Bencher API token provided; attempting upload without authentication."
)
url = f"{resolved_host.rstrip('/')}/v0/run"
response = requests.post(url, json=payload, headers=headers, timeout=15)
if not response.ok:
raise RuntimeError(
f"Failed to upload benchmark result: HTTP {response.status_code} — {response.text}"
)
logger.info("Benchmark result uploaded successfully.")
return response