python-bigquery-dataframes/tests/benchmark/utils.py at col_numpy · googleapis/python-bigquery-dataframes

This repository was archived by the owner on Apr 1, 2026. It is now read-only.

107 lines (88 loc) · 3.05 KB

# Copyright 2024 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     https://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import dataclasses
import time
import bigframes
READ_GBQ_COLAB_PAGE_SIZE = 100
@dataclasses.dataclass(frozen=True)
class BenchmarkConfig:
    project_id: str
    dataset_id: str
    session: bigframes.Session | None
    benchmark_suffix: str | None
    table_id: str | None = None
def get_configuration(include_table_id=False, start_session=True) -> BenchmarkConfig:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--project_id",
        type=str,
        required=True,
        help="The BigQuery project ID.",
    parser.add_argument(
        "--dataset_id",
        type=str,
        required=True,
        help="The BigQuery dataset ID.",
    if include_table_id:
        parser.add_argument(
            "--table_id",
            type=str,
            required=True,
            help="The BigQuery table ID to query.",
    parser.add_argument(
        "--ordered",
        type=str,
        help="Set to True (default) to have an ordered session, or False for an unordered session.",
    parser.add_argument(
        "--benchmark_suffix",
        type=str,
        help="Suffix to append to benchmark names for identification purposes.",
    args = parser.parse_args()
    session = _initialize_session(_str_to_bool(args.ordered)) if start_session else None
    return BenchmarkConfig(
        project_id=args.project_id,
        dataset_id=args.dataset_id,
        table_id=args.table_id if include_table_id else None,
        session=session,
        benchmark_suffix=args.benchmark_suffix,
def get_execution_time(func, current_path, suffix, *args, **kwargs):
    start_time = time.perf_counter()
    func(*args, **kwargs)
    end_time = time.perf_counter()
    runtime = end_time - start_time
    clock_time_file_path = f"{current_path}_{suffix}.local_exec_time_seconds"
    with open(clock_time_file_path, "a") as log_file:
        log_file.write(f"{runtime}\n")
def _str_to_bool(value):
    if value == "True":
        return True
    elif value == "False":
        return False
        raise argparse.ArgumentTypeError('Only "True" or "False" expected.')
def _initialize_session(ordered: bool):
    # TODO(tswast): add a flag to enable the polars semi-executor.
    context = bigframes.BigQueryOptions(
        location="US", ordering_mode="strict" if ordered else "partial"
    session = bigframes.Session(context=context)
    print(f"Initialized {'ordered' if ordered else 'unordered'} session.")
    return session

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

utils.py

Latest commit

History

utils.py

File metadata and controls