Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
03a153c
feat: Add BigFrames.bigquery.st_regionstats method
google-labs-jules[bot] Oct 27, 2025
ab46078
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 27, 2025
92c69aa
feat: Add BigFrames.bigquery.st_regionstats method
google-labs-jules[bot] Oct 27, 2025
8b681c0
Merge branch 'main' into feat-st-regionstats
tswast Oct 27, 2025
21243e4
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 27, 2025
459c5b2
Merge branch 'feat-st-regionstats' of https://github.com/googleapis/p…
gcf-owl-bot[bot] Oct 27, 2025
350d241
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 27, 2025
b4db49c
Merge branch 'feat-st-regionstats' of https://github.com/googleapis/p…
gcf-owl-bot[bot] Oct 27, 2025
a680526
feat: Add BigFrames.bigquery.st_regionstats method
google-labs-jules[bot] Oct 28, 2025
9775fb6
Merge branch 'main' into feat-st-regionstats
tswast Oct 28, 2025
bcd4047
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 28, 2025
01edd21
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 28, 2025
bf00055
Merge branch 'feat-st-regionstats' of https://github.com/googleapis/p…
gcf-owl-bot[bot] Oct 28, 2025
3c172bb
feat: Add BigFrames.bigquery.st_regionstats method
google-labs-jules[bot] Oct 28, 2025
13a67bd
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 28, 2025
6a4b1c9
Merge remote-tracking branch 'origin/main' into feat-st-regionstats
tswast Oct 29, 2025
745211e
add arguments to st_regionstats and make it into a unary op
tswast Oct 29, 2025
4252e90
move to geo_ops
tswast Oct 29, 2025
b44d520
fix the sqlgot compiler
tswast Oct 29, 2025
18a6c76
fix sqlgot compiler
tswast Oct 29, 2025
1b007b9
attempt at ibis compiler
tswast Oct 29, 2025
2483e80
use JSON literals for options parameter
tswast Oct 29, 2025
c8eb266
fix sample
tswast Oct 29, 2025
6d436ff
Merge remote-tracking branch 'origin/main' into feat-st-regionstats
tswast Oct 29, 2025
ad021b7
Merge branch 'main' into feat-st-regionstats
tswast Oct 30, 2025
7a57ddd
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
st_intersection,
st_isclosed,
st_length,
st_regionstats,
)
from bigframes.bigquery._operations.json import (
json_extract,
Expand Down Expand Up @@ -80,6 +81,7 @@
st_intersection,
st_isclosed,
st_length,
st_regionstats,
# json ops
json_extract,
json_extract_array,
Expand Down
63 changes: 62 additions & 1 deletion bigframes/bigquery/_operations/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

from __future__ import annotations

from typing import Union
import json
from typing import Mapping, Optional, Union

import shapely # type: ignore

from bigframes import operations as ops
import bigframes.dataframe
import bigframes.geopandas
import bigframes.series

Expand Down Expand Up @@ -675,3 +677,62 @@ def st_length(
series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid))
series.name = None
return series


def st_regionstats(
geography: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
raster_id: str,
band: Optional[str] = None,
include: Optional[str] = None,
options: Optional[Mapping[str, Union[str, int, float]]] = None,
) -> bigframes.series.Series:
"""Returns statistics summarizing the pixel values of the raster image
referenced by raster_id that intersect with geography.

The statistics include the count, minimum, maximum, sum, standard
deviation, mean, and area of the valid pixels of the raster band named
band_name. Google Earth Engine computes the results of the function call.

See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats

Args:
geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
A series of geography objects to intersect with the raster image.
raster_id (str):
A string that identifies a raster image. The following formats are
supported. A URI from an image table provided by Google Earth Engine
in BigQuery sharing (formerly Analytics Hub). A URI for a readable
GeoTIFF raster file. A Google Earth Engine asset path that
references public catalog data or project-owned assets with read
access.
band (Optional[str]):
A string in one of the following formats:
A single band within the raster image specified by raster_id. A
formula to compute a value from the available bands in the raster
image. The formula uses the Google Earth Engine image expression
syntax. Bands can be referenced by their name, band_name, in
expressions. If you don't specify a band, the first band of the
image is used.
include (Optional[str]):
An optional string formula that uses the Google Earth Engine image
expression syntax to compute a pixel weight. The formula should
return values from 0 to 1. Values outside this range are set to the
nearest limit, either 0 or 1. A value of 0 means that the pixel is
invalid and it's excluded from analysis. A positive value means that
a pixel is valid. Values between 0 and 1 represent proportional
weights for calculations, such as weighted means.
options (Mapping[str, Union[str, int, float]], optional):
A dictionary of options to pass to the function. See the BigQuery
documentation for a list of available options.

Returns:
bigframes.pandas.Series:
A STRUCT Series containing the computed statistics.
"""
op = ops.StRegionStatsOp(
raster_id=raster_id,
band=band,
include=include,
options=json.dumps(options) if options else None,
)
return geography._apply_unary_op(op)
32 changes: 32 additions & 0 deletions bigframes/core/compile/ibis_compiler/operations/geo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

from typing import cast

from bigframes_vendored import ibis
from bigframes_vendored.ibis.expr import types as ibis_types
import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
import bigframes_vendored.ibis.expr.operations.geospatial as ibis_geo
import bigframes_vendored.ibis.expr.operations.udf as ibis_udf

from bigframes.core.compile.ibis_compiler import scalar_op_compiler
Expand Down Expand Up @@ -101,6 +103,36 @@ def geo_st_isclosed_op_impl(x: ibis_types.Value):
return st_isclosed(x)


@register_unary_op(geo_ops.StRegionStatsOp, pass_op=True)
def st_regionstats(
geography: ibis_types.Value,
op: geo_ops.StRegionStatsOp,
):

if op.band:
band = ibis.literal(op.band, type=ibis_dtypes.string())
else:
band = None

if op.include:
include = ibis.literal(op.include, type=ibis_dtypes.string())
else:
include = None

if op.options:
options = ibis.literal(op.options, type=ibis_dtypes.json())
else:
options = None

return ibis_geo.GeoRegionStats(
arg=geography,
raster_id=ibis.literal(op.raster_id, type=ibis_dtypes.string()),
band=band,
include=include,
options=options,
).to_expr()


@register_unary_op(ops.geo_x_op)
def geo_x_op_impl(x: ibis_types.Value):
return cast(ibis_types.GeoSpatialValue, x).x()
Expand Down
12 changes: 10 additions & 2 deletions bigframes/core/compile/ibis_compiler/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,20 +159,28 @@ def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
return decorator

def register_ternary_op(
self, op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]]
self,
op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]],
pass_op: bool = False,
):
"""
Decorator to register a ternary op implementation.

Args:
op_ref (TernaryOp or TernaryOp type):
Class or instance of operator that is implemented by the decorated function.
pass_op (bool):
Set to true if implementation takes the operator object as the last argument.
This is needed for parameterized ops where parameters are part of op object.
"""
key = typing.cast(str, op_ref.name)

def decorator(impl: typing.Callable[..., ibis_types.Value]):
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
return impl(args[0], args[1], args[2])
if pass_op:
return impl(args[0], args[1], args[2], op)
else:
return impl(args[0], args[1], args[2])

self._register(key, normalized_impl)
return impl
Expand Down
17 changes: 17 additions & 0 deletions bigframes/core/compile/sqlglot/expressions/geo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,20 @@ def _(expr: TypedExpr) -> sge.Expression:
@register_unary_op(ops.geo_y_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("SAFE.ST_Y", expr.expr)


@register_unary_op(ops.StRegionStatsOp, pass_op=True)
def _(
geography: TypedExpr,
op: ops.StRegionStatsOp,
):
args = [geography.expr, sge.convert(op.raster_id)]
if op.band:
args.append(sge.Kwarg(this="band", expression=sge.convert(op.band)))
if op.include:
args.append(sge.Kwarg(this="include", expression=sge.convert(op.include)))
if op.options:
args.append(
sge.Kwarg(this="options", expression=sge.JSON(this=sge.convert(op.options)))
)
return sge.func("ST_REGIONSTATS", *args)
12 changes: 10 additions & 2 deletions bigframes/core/compile/sqlglot/scalar_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,20 +154,28 @@ def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
return decorator

def register_ternary_op(
self, op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]]
self,
op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]],
pass_op: bool = False,
):
"""
Decorator to register a ternary op implementation.

Args:
op_ref (TernaryOp or TernaryOp type):
Class or instance of operator that is implemented by the decorated function.
pass_op (bool):
Set to true if implementation takes the operator object as the last argument.
This is needed for parameterized ops where parameters are part of op object.
"""
key = typing.cast(str, op_ref.name)

def decorator(impl: typing.Callable[..., sge.Expression]):
def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
return impl(args[0], args[1], args[2])
if pass_op:
return impl(args[0], args[1], args[2], op)
else:
return impl(args[0], args[1], args[2])

self._register(key, normalized_impl)
return impl
Expand Down
2 changes: 2 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
GeoStBufferOp,
GeoStDistanceOp,
GeoStLengthOp,
StRegionStatsOp,
)
from bigframes.operations.json_ops import (
JSONExtract,
Expand Down Expand Up @@ -419,6 +420,7 @@
"geo_x_op",
"geo_y_op",
"GeoStDistanceOp",
"StRegionStatsOp",
# AI ops
"AIClassify",
"AIGenerate",
Expand Down
24 changes: 24 additions & 0 deletions bigframes/operations/geo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import dataclasses
from typing import Optional

from bigframes import dtypes
from bigframes.operations import base_ops
Expand Down Expand Up @@ -126,6 +127,29 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
return dtypes.FLOAT_DTYPE


@dataclasses.dataclass(frozen=True)
class StRegionStatsOp(base_ops.UnaryOp):
"""See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats"""

name = "st_regionstats"
raster_id: str
band: Optional[str]
include: Optional[str]
options: Optional[str]

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
return dtypes.struct_type(
[
("min", dtypes.FLOAT_DTYPE),
("max", dtypes.FLOAT_DTYPE),
("sum", dtypes.FLOAT_DTYPE),
("count", dtypes.INT_DTYPE),
("mean", dtypes.FLOAT_DTYPE),
("area", dtypes.FLOAT_DTYPE),
]
)


@dataclasses.dataclass(frozen=True)
class GeoStLengthOp(base_ops.UnaryOp):
name = "geo_st_length"
Expand Down
14 changes: 14 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2732,6 +2732,20 @@ def _apply_binary_op(
block, result_id = self._block.project_expr(expr, name)
return Series(block.select_column(result_id))

def _apply_ternary_op(
self,
other1: typing.Any,
other2: typing.Any,
op: ops.TernaryOp,
) -> bigframes.dataframe.DataFrame:
"""Applies a ternary operator to the series and others."""
(self_col, other1_col, other2_col, block) = self._align3(other1, other2)
name = self._name
block, result_id = block.project_expr(
op.as_expr(self_col, other1_col, other2_col), name
)
return bigframes.dataframe.DataFrame(block.select_column(result_id))

def _apply_nary_op(
self,
op: ops.NaryOp,
Expand Down
67 changes: 67 additions & 0 deletions samples/snippets/st_regionstats_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def test_st_regionstats():
project_id = "bigframes-dev"

# [START bigquery_dataframes_st_regionstats]
from typing import cast

import bigframes.bigquery as bq
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use bbq instead of bq.

import bigframes.pandas as bpd

# TODO: Set the project_id to your Google Cloud project ID.
# project_id = "your-project-id"
#
# TODO: Set the dataset_id to the ID of the dataset that contains the
# `climate` table. This is likely a linked dataset to Earth Engine.
# See: https://cloud.google.com/bigquery/docs/link-earth-engine
linked_dataset = "era5_land_daily_aggregated"

# Load the table of country boundaries.
bpd.options.bigquery.project = project_id
countries = bpd.read_gbq("bigquery-public-data.overture_maps.division_area")

# Filter to just the countries.
countries = countries[countries["subtype"] == "country"].copy()
countries["name"] = countries["names"].struct.field("primary")

# TODO: Add st_simplify when it is available in BigFrames.
# https://github.com/googleapis/python-bigquery-dataframes/issues/1497
# countries["simplified_geometry"] = bq.st_simplify(countries["geometry"], 10000)
Comment on lines +41 to +43
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to add st_simplify first so that this sample can work without any todos for us.

countries["simplified_geometry"] = countries["geometry"]

# Get the reference to the temperature data from a linked dataset.
# Note: This sample assumes you have a linked dataset to Earth Engine.
# See: https://cloud.google.com/bigquery/docs/link-earth-engine
image_href = bpd.read_gbq(f"{project_id}.{linked_dataset}.climate").where(
lambda df: df["start_datetime"] == "2025-01-01 00:00:00"
)
raster_id = image_href["assets"].struct.field("image").struct.field("href").item
stats = bq.st_regionstats(
countries["simplified_geometry"],
raster_id=cast(str, raster_id),
band="temperature_2m",
)

# Extract the mean and convert from Kelvin to Celsius.
countries["mean_temperature"] = stats.struct.field("mean") - 273.15

# Sort by the mean temperature to find the warmest countries.
result = countries[["name", "mean_temperature"]].sort_values(
"mean_temperature", ascending=False
)
print(result.head())
# [END bigquery_dataframes_st_regionstats]
Loading
Loading