Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions bigframes/core/compile/sqlglot/aggregations/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,9 @@ def apply_window_if_present(
# Unbound grouping window.
order_by = None
elif window.is_range_bounded:
# Note that, when the window is range-bounded, we only need one ordering key.
# There are two reasons:
# 1. Manipulating null positions requires more than one ordering key, which
# is forbidden by SQL window syntax for range rolling.
# 2. Pandas does not allow range rolling on timeseries with nulls.
order_by = get_window_order_by((window.ordering[0],), override_null_order=False)
order_by = get_window_order_by((window.ordering[0],))
else:
order_by = get_window_order_by(window.ordering, override_null_order=True)
order_by = get_window_order_by(window.ordering)

order = sge.Order(expressions=order_by) if order_by else None

Expand Down Expand Up @@ -102,7 +97,15 @@ def get_window_order_by(
ordering: typing.Tuple[ordering_spec.OrderingExpression, ...],
override_null_order: bool = False,
) -> typing.Optional[tuple[sge.Ordered, ...]]:
"""Returns the SQL order by clause for a window specification."""
"""Returns the SQL order by clause for a window specification.
Args:
ordering (Tuple[ordering_spec.OrderingExpression, ...]):
A tuple of ordering specification objects.
override_null_order (bool):
If True, overrides BigQuery's default null ordering behavior, which
is sometimes incompatible with ordered aggregations. The generated SQL
will include extra expressions to correctly enforce NULL FIRST/LAST.
"""
if not ordering:
return None

Expand All @@ -115,8 +118,6 @@ def get_window_order_by(
nulls_first = not ordering_spec_item.na_last

if override_null_order:
# Bigquery SQL considers NULLS to be "smallest" values, but we need
# to override in these cases.
is_null_expr = sge.Is(this=expr, expression=sge.Null())
if nulls_first and desc:
order_by.append(
Expand Down
2 changes: 2 additions & 0 deletions bigframes/core/compile/sqlglot/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ def compile_random_sample(
def compile_aggregate(
self, node: nodes.AggregateNode, child: ir.SQLGlotIR
) -> ir.SQLGlotIR:
# The BigQuery ordered aggregation cannot support for NULL FIRST/LAST,
# so we need to add extra expressions to enforce the null ordering.
ordering_cols = windows.get_window_order_by(
node.order_by, override_null_order=True
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
ROW_NUMBER() OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (), TRUE)
END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_bool`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (PARTITION BY `bfcol_1`), TRUE)
END AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_2` AS `agg_bool`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE ANY_VALUE(`bfcol_0`) OVER () END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE ANY_VALUE(`bfcol_0`) OVER (PARTITION BY `bfcol_1`)
END AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_2` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
COUNT(`bfcol_0`) OVER () AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
COUNT(`bfcol_0`) OVER (PARTITION BY `bfcol_1`) AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_2` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
CAST(DATE_DIFF(
`bfcol_0`,
LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST),
DAY
) * 86400000000 AS INT64) AS `bfcol_1`
CAST(DATE_DIFF(`bfcol_0`, LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST), DAY) * 86400000000 AS INT64) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
DENSE_RANK() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
DENSE_RANK() OVER (ORDER BY `bfcol_0` DESC) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
`bfcol_0` <> LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
`bfcol_0` <> LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` DESC) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
`bfcol_0` - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
`bfcol_0` - LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ WITH `bfcte_0` AS (
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE FIRST_VALUE(`bfcol_0`) OVER (
ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ELSE FIRST_VALUE(`bfcol_0`) OVER (ORDER BY `bfcol_0` DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
END AS `bfcol_1`
FROM `bfcte_0`
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ WITH `bfcte_0` AS (
SELECT
*,
FIRST_VALUE(`bfcol_0` IGNORE NULLS) OVER (
ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST
ORDER BY `bfcol_0` ASC NULLS LAST
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS `bfcol_1`
FROM `bfcte_0`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ WITH `bfcte_0` AS (
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE LAST_VALUE(`bfcol_0`) OVER (
ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ELSE LAST_VALUE(`bfcol_0`) OVER (ORDER BY `bfcol_0` DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
END AS `bfcol_1`
FROM `bfcte_0`
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ WITH `bfcte_0` AS (
SELECT
*,
LAST_VALUE(`bfcol_0` IGNORE NULLS) OVER (
ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST
ORDER BY `bfcol_0` ASC NULLS LAST
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS `bfcol_1`
FROM `bfcte_0`
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE MAX(`bfcol_0`) OVER () END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE MAX(`bfcol_0`) OVER (PARTITION BY `bfcol_1`)
END AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_2` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE AVG(`bfcol_0`) OVER () END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE AVG(`bfcol_0`) OVER (PARTITION BY `bfcol_1`)
END AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_2` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE MIN(`bfcol_0`) OVER () END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
THEN NULL
ELSE MIN(`bfcol_0`) OVER (PARTITION BY `bfcol_1`)
END AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
`bfcol_2` AS `agg_int64`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
RANK() OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
RANK() OVER (ORDER BY `bfcol_0` DESC NULLS FIRST) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
LAG(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
), `bfcte_1` AS (
SELECT
*,
LEAD(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` IS NULL ASC NULLS LAST, `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
LEAD(`bfcol_0`, 1) OVER (ORDER BY `bfcol_0` ASC) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE COALESCE(SUM(`bfcol_0`) OVER (), 0) END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `agg_int64`
FROM `bfcte_1`
Loading