Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@
to_json,
to_json_string,
)
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.search import (
create_vector_index,
search,
vector_search,
)
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
from bigframes.core import log_adapter
Expand Down Expand Up @@ -99,6 +103,7 @@
to_json_string,
# search ops
create_vector_index,
search,
vector_search,
# sql ops
sql_scalar,
Expand Down Expand Up @@ -150,6 +155,7 @@
"to_json_string",
# search ops
"create_vector_index",
"search",
"vector_search",
# sql ops
"sql_scalar",
Expand Down
70 changes: 66 additions & 4 deletions bigframes/bigquery/_operations/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@

import google.cloud.bigquery as bigquery

import bigframes.core.sql
import bigframes.dataframe
import bigframes.ml.utils as utils

if typing.TYPE_CHECKING:
import bigframes.dataframe as dataframe
import bigframes.series as series
import bigframes.session

Expand Down Expand Up @@ -91,15 +92,15 @@ def create_vector_index(
def vector_search(
base_table: str,
column_to_search: str,
query: Union[dataframe.DataFrame, series.Series],
query: Union[bigframes.dataframe.DataFrame, series.Series],
*,
query_column_to_search: Optional[str] = None,
top_k: Optional[int] = None,
distance_type: Optional[Literal["euclidean", "cosine", "dot_product"]] = None,
fraction_lists_to_search: Optional[float] = None,
use_brute_force: Optional[bool] = None,
allow_large_results: Optional[bool] = None,
) -> dataframe.DataFrame:
) -> bigframes.dataframe.DataFrame:
"""
Conduct vector search which searches embeddings to find semantically similar entities.

Expand All @@ -108,7 +109,6 @@ def vector_search(

**Examples:**


>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq

Expand Down Expand Up @@ -247,3 +247,65 @@ def vector_search(
df = query._session.read_gbq_query(sql, allow_large_results=allow_large_results)

return df


def search(
data_to_search: Union[bigframes.dataframe.DataFrame, series.Series],
search_query: str,
) -> series.Series:
"""
The SEARCH function checks to see whether a BigQuery table or other search
data contains a set of search terms (tokens). It returns TRUE if all search
terms appear in the data, based on the rules for search_query and text
analysis described in the text analyzer. Otherwise, this function returns
FALSE.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq

>>> data = bpd.read_gbq("SELECT 'Please use foobar@example.com as your email.' AS email")
>>> bbq.search(data['email'], 'exam')
0 False
Name: email, dtype: boolean

>>> bbq.search(data['email'], 'foobar')
0 True
Name: email, dtype: boolean

>>> bbq.search(data['email'], 'example.com')
0 True
Name: email, dtype: boolean

Args:
data_to_search (bigframes.dataframe.DataFrame | bigframes.series.Series):
The data to search over.
search_query (str):
A STRING literal, or a STRING constant expression that represents
the terms of the search query.

Returns:
bigframes.series.Series: A new Series with the boolean result.
"""
import bigframes.operations.search_ops as search_ops
import bigframes.series

if not isinstance(
data_to_search, (bigframes.series.Series, bigframes.dataframe.DataFrame)
):
raise ValueError("data_to_search must be a Series or DataFrame")

if isinstance(data_to_search, bigframes.dataframe.DataFrame):
# SEARCH on a table (or dataframe) treats it as a STRUCT. For easier
# application of a scalar unary op, we convert to a struct proactively
# in the expression.
import bigframes.bigquery._operations.struct as struct_ops

data_to_search = struct_ops.struct(data_to_search)

return data_to_search._apply_unary_op(
search_ops.SearchOp(
search_query=search_query,
)
)
1 change: 1 addition & 0 deletions bigframes/core/compile/ibis_compiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@

import bigframes.core.compile.ibis_compiler.operations.generic_ops # noqa: F401
import bigframes.core.compile.ibis_compiler.operations.geo_ops # noqa: F401
import bigframes.core.compile.ibis_compiler.operations.search_ops # noqa: F401
import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401
40 changes: 40 additions & 0 deletions bigframes/core/compile/ibis_compiler/operations/search_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
BigFrames -> Ibis compilation for the operations in bigframes.operations.search_ops.

Please keep implementations in sequential order by op name.
"""

from __future__ import annotations

from bigframes_vendored.ibis.expr import types as ibis_types
import bigframes_vendored.ibis.expr.operations.udf as ibis_udf

from bigframes.core.compile.ibis_compiler import scalar_op_compiler
from bigframes.operations import search_ops

register_unary_op = scalar_op_compiler.scalar_op_compiler.register_unary_op


@register_unary_op(search_ops.SearchOp, pass_op=True)
def search_op_impl(x: ibis_types.Value, op: search_ops.SearchOp):
return search(x, op.search_query)


@ibis_udf.scalar.builtin(name="search")
def search(data_to_search, search_query) -> bool:
"""Checks to see whether a table or other search data contains a set of search terms."""
return False # pragma: NO COVER
1 change: 1 addition & 0 deletions bigframes/core/compile/sqlglot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401
import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401
import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401
import bigframes.core.compile.sqlglot.expressions.search_ops # noqa: F401
import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401
import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401
import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401
Expand Down
29 changes: 29 additions & 0 deletions bigframes/core/compile/sqlglot/expressions/search_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import sqlglot.expressions as sge

from bigframes import operations as ops
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler

register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op


@register_unary_op(ops.SearchOp, pass_op=True)
def _(expr: TypedExpr, op: ops.SearchOp) -> sge.Expression:
return sge.func("SEARCH", expr.expr, sge.convert(op.search_query))
3 changes: 3 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@
NaryRemoteFunctionOp,
RemoteFunctionOp,
)
from bigframes.operations.search_ops import SearchOp
from bigframes.operations.string_ops import (
capitalize_op,
EndsWithOp,
Expand Down Expand Up @@ -374,6 +375,8 @@
"BinaryRemoteFunctionOp",
"NaryRemoteFunctionOp",
"RemoteFunctionOp",
# Search ops
"SearchOp",
# Frequency ops
"DatetimeToIntegerLabelOp",
"FloorDtOp",
Expand Down
28 changes: 28 additions & 0 deletions bigframes/operations/search_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import typing

from bigframes import dtypes
from bigframes.operations import base_ops


@dataclasses.dataclass(frozen=True)
class SearchOp(base_ops.UnaryOp):
name: typing.ClassVar[str] = "search"
search_query: str

def output_type(self, *input_types):
return dtypes.BOOL_DTYPE
Loading
Loading