This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 68
Expand file tree
/
Copy pathutils.py
More file actions
107 lines (88 loc) · 3.05 KB
/
utils.py
File metadata and controls
107 lines (88 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import dataclasses
import time
import bigframes
READ_GBQ_COLAB_PAGE_SIZE = 100
@dataclasses.dataclass(frozen=True)
class BenchmarkConfig:
project_id: str
dataset_id: str
session: bigframes.Session | None
benchmark_suffix: str | None
table_id: str | None = None
def get_configuration(include_table_id=False, start_session=True) -> BenchmarkConfig:
parser = argparse.ArgumentParser()
parser.add_argument(
"--project_id",
type=str,
required=True,
help="The BigQuery project ID.",
)
parser.add_argument(
"--dataset_id",
type=str,
required=True,
help="The BigQuery dataset ID.",
)
if include_table_id:
parser.add_argument(
"--table_id",
type=str,
required=True,
help="The BigQuery table ID to query.",
)
parser.add_argument(
"--ordered",
type=str,
help="Set to True (default) to have an ordered session, or False for an unordered session.",
)
parser.add_argument(
"--benchmark_suffix",
type=str,
help="Suffix to append to benchmark names for identification purposes.",
)
args = parser.parse_args()
session = _initialize_session(_str_to_bool(args.ordered)) if start_session else None
return BenchmarkConfig(
project_id=args.project_id,
dataset_id=args.dataset_id,
table_id=args.table_id if include_table_id else None,
session=session,
benchmark_suffix=args.benchmark_suffix,
)
def get_execution_time(func, current_path, suffix, *args, **kwargs):
start_time = time.perf_counter()
func(*args, **kwargs)
end_time = time.perf_counter()
runtime = end_time - start_time
clock_time_file_path = f"{current_path}_{suffix}.local_exec_time_seconds"
with open(clock_time_file_path, "a") as log_file:
log_file.write(f"{runtime}\n")
def _str_to_bool(value):
if value == "True":
return True
elif value == "False":
return False
else:
raise argparse.ArgumentTypeError('Only "True" or "False" expected.')
def _initialize_session(ordered: bool):
# TODO(tswast): add a flag to enable the polars semi-executor.
context = bigframes.BigQueryOptions(
location="US", ordering_mode="strict" if ordered else "partial"
)
session = bigframes.Session(context=context)
print(f"Initialized {'ordered' if ordered else 'unordered'} session.")
return session