forked from googleapis/python-bigquery-dataframes
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_sql.py
More file actions
123 lines (114 loc) · 3.7 KB
/
Copy pathtest_sql.py
File metadata and controls
123 lines (114 loc) · 3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import decimal
import pytest
import shapely # type: ignore
from bigframes.core import sql
@pytest.mark.parametrize(
("value", "expected"),
(
# Try to have some literals for each scalar data type:
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
(None, "NULL"),
# TODO: support ARRAY type (possibly another method?)
(True, "True"),
(False, "False"),
(
b"\x01\x02\x03ABC",
r"b'\x01\x02\x03ABC'",
),
(
datetime.date(2025, 1, 1),
"DATE('2025-01-01')",
),
(
datetime.datetime(2025, 1, 2, 3, 45, 6, 789123),
"DATETIME('2025-01-02T03:45:06.789123')",
),
(
shapely.Point(0, 1),
"ST_GEOGFROMTEXT('POINT (0 1)')",
),
# TODO: INTERVAL type (e.g. from dateutil.relativedelta)
# TODO: JSON type (TBD what Python object that would correspond to)
(123, "123"),
(decimal.Decimal("123.75"), "CAST('123.75' AS NUMERIC)"),
# TODO: support BIGNUMERIC by looking at precision/scale of the DECIMAL
(123.75, "123.75"),
# TODO: support RANGE type
("abc", "'abc'"),
# TODO: support STRUCT type (possibly another method?)
(
datetime.time(12, 34, 56, 789123),
"TIME(DATETIME('1970-01-01 12:34:56.789123'))",
),
(
datetime.datetime(
2025, 1, 2, 3, 45, 6, 789123, tzinfo=datetime.timezone.utc
),
"TIMESTAMP('2025-01-02T03:45:06.789123+00:00')",
),
),
)
def test_simple_literal(value, expected):
got = sql.simple_literal(value)
assert got == expected
def test_create_vector_search_sql_simple():
result_query = sql.create_vector_search_sql(
sql_string="SELECT embedding FROM my_embeddings_table WHERE id = 1",
base_table="my_base_table",
column_to_search="my_embedding_column",
)
assert (
result_query
== """
SELECT
query.*,
base.*,
distance,
FROM VECTOR_SEARCH(TABLE `my_base_table`,
'my_embedding_column',
(SELECT embedding FROM my_embeddings_table WHERE id = 1))
"""
)
def test_create_vector_search_sql_all_named_parameters():
result_query = sql.create_vector_search_sql(
sql_string="SELECT embedding FROM my_embeddings_table WHERE id = 1",
base_table="my_base_table",
column_to_search="my_embedding_column",
query_column_to_search="another_embedding_column",
top_k=10,
distance_type="cosine",
options={
"fraction_lists_to_search": 0.1,
"use_brute_force": False,
},
)
assert (
result_query
== """
SELECT
query.*,
base.*,
distance,
FROM VECTOR_SEARCH(TABLE `my_base_table`,
'my_embedding_column',
(SELECT embedding FROM my_embeddings_table WHERE id = 1),
query_column_to_search => 'another_embedding_column',
top_k=> 10,
distance_type => 'cosine',
options => '{\\"fraction_lists_to_search\\": 0.1, \\"use_brute_force\\": false}')
"""
)