googleapis · TrevorBergeron · May 10, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
@@ -71,21 +71,19 @@ def indicate_duplicates(
     if keep == "first":
         # Count how many copies occur up to current copy of value
         # Discard this value if there are copies BEFORE
-        window_spec = windows.WindowSpec(
+        window_spec = windows.cumulative_rows(
             grouping_keys=tuple(columns),
-            following=0,
         )
     elif keep == "last":
         # Count how many copies occur up to current copy of values
         # Discard this value if there are copies AFTER
-        window_spec = windows.WindowSpec(
+        window_spec = windows.inverse_cumulative_rows(
             grouping_keys=tuple(columns),
-            preceding=0,
         )
     else:  # keep == False
         # Count how many copies of the value occur in entire series.
         # Discard this value if there are copies ANYWHERE
-        window_spec = windows.WindowSpec(grouping_keys=tuple(columns))
+        window_spec = windows.unbound(grouping_keys=tuple(columns))
     block, dummy = block.create_constant(1)
     block, val_count_col_id = block.apply_window_op(
         dummy,
@@ -114,7 +112,7 @@ def quantile(
     dropna: bool = False,
 ) -> blocks.Block:
     # TODO: handle windowing and more interpolation methods
-    window = core.WindowSpec(
+    window = windows.unbound(
         grouping_keys=tuple(grouping_column_ids),
     )
     quantile_cols = []
@@ -212,8 +210,8 @@ def _interpolate_column(
     if interpolate_method not in ["linear", "nearest", "ffill"]:
         raise ValueError("interpolate method not supported")
     window_ordering = (ordering.OrderingExpression(ex.free_var(x_values)),)
-    backwards_window = windows.WindowSpec(following=0, ordering=window_ordering)
-    forwards_window = windows.WindowSpec(preceding=0, ordering=window_ordering)
+    backwards_window = windows.rows(following=0, ordering=window_ordering)
+    forwards_window = windows.rows(preceding=0, ordering=window_ordering)
 
     # Note, this method may
     block, notnull = block.apply_unary_op(column, ops.notnull_op)
@@ -364,7 +362,7 @@ def value_counts(
     )
     count_id = agg_ids[0]
     if normalize:
-        unbound_window = windows.WindowSpec()
+        unbound_window = windows.unbound()
         block, total_count_id = block.apply_window_op(
             count_id, agg_ops.sum_op, unbound_window
         )
@@ -388,7 +386,7 @@ def value_counts(
 
 def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block:
     column_labels = block.column_labels
-    window_spec = windows.WindowSpec(
+    window_spec = windows.rows(
         preceding=periods if periods > 0 else None,
         following=-periods if periods < 0 else None,
     )
@@ -430,23 +428,22 @@ def rank(
             ops.isnull_op,
         )
         nullity_col_ids.append(nullity_col_id)
-        window = windows.WindowSpec(
-            # BigQuery has syntax to reorder nulls with "NULLS FIRST/LAST", but that is unavailable through ibis presently, so must order on a separate nullity expression first.
-            ordering=(
-                ordering.OrderingExpression(
-                    ex.free_var(col),
-                    ordering.OrderingDirection.ASC
-                    if ascending
-                    else ordering.OrderingDirection.DESC,
-                    na_last=(na_option in ["bottom", "keep"]),
-                ),
+        window_ordering = (
+            ordering.OrderingExpression(
+                ex.free_var(col),
+                ordering.OrderingDirection.ASC
+                if ascending
+                else ordering.OrderingDirection.DESC,
+                na_last=(na_option in ["bottom", "keep"]),
             ),
         )
         # Count_op ignores nulls, so if na_option is "top" or "bottom", we instead count the nullity columns, where nulls have been mapped to bools
         block, rownum_id = block.apply_window_op(
             col if na_option == "keep" else nullity_col_id,
             agg_ops.dense_rank_op if method == "dense" else agg_ops.count_op,
-            window_spec=window,
+            window_spec=windows.unbound(ordering=window_ordering)
+            if method == "dense"
+            else windows.rows(following=0, ordering=window_ordering),
             skip_reproject_unsafe=(col != columns[-1]),
         )
         rownum_col_ids.append(rownum_id)
@@ -464,7 +461,7 @@ def rank(
             block, result_id = block.apply_window_op(
                 rownum_col_ids[i],
                 agg_op,
-                window_spec=windows.WindowSpec(grouping_keys=(columns[i],)),
+                window_spec=windows.unbound(grouping_keys=(columns[i],)),
                 skip_reproject_unsafe=(i < (len(columns) - 1)),
             )
             post_agg_rownum_col_ids.append(result_id)
@@ -528,7 +525,7 @@ def nsmallest(
         block, counter = block.apply_window_op(
             column_ids[0],
             agg_ops.rank_op,
-            window_spec=windows.WindowSpec(ordering=tuple(order_refs)),
+            window_spec=windows.unbound(ordering=tuple(order_refs)),
         )
         block, condition = block.project_expr(ops.le_op.as_expr(counter, ex.const(n)))
         block = block.filter_by_id(condition)
@@ -558,7 +555,7 @@ def nlargest(
         block, counter = block.apply_window_op(
             column_ids[0],
             agg_ops.rank_op,
-            window_spec=windows.WindowSpec(ordering=tuple(order_refs)),
+            window_spec=windows.unbound(ordering=tuple(order_refs)),
         )
         block, condition = block.project_expr(ops.le_op.as_expr(counter, ex.const(n)))
         block = block.filter_by_id(condition)
@@ -653,7 +650,7 @@ def _mean_delta_to_power(
     grouping_column_ids: typing.Sequence[str],
 ) -> typing.Tuple[blocks.Block, typing.Sequence[str]]:
     """Calculate (x-mean(x))^n. Useful for calculating moment statistics such as skew and kurtosis."""
-    window = windows.WindowSpec(grouping_keys=tuple(grouping_column_ids))
+    window = windows.unbound(grouping_keys=tuple(grouping_column_ids))
     block, mean_ids = block.multi_apply_window_op(column_ids, agg_ops.mean_op, window)
     delta_ids = []
     for val_id, mean_val_id in zip(column_ids, mean_ids):
@@ -845,7 +842,7 @@ def _idx_extrema(
                 for idx_col in original_block.index_columns
             ],
         ]
-        window_spec = windows.WindowSpec(ordering=tuple(order_refs))
+        window_spec = windows.unbound(ordering=tuple(order_refs))
         idx_col = original_block.index_columns[0]
         block, result_col = block.apply_window_op(
             idx_col, agg_ops.first_op, window_spec

@@ -47,6 +47,7 @@
 import bigframes.core.tree_properties as tree_properties
 import bigframes.core.utils
 import bigframes.core.utils as utils
+import bigframes.core.window_spec as window_specs
 import bigframes.dtypes
 import bigframes.features
 import bigframes.operations as ops
@@ -807,7 +808,7 @@ def multi_apply_window_op(
         self,
         columns: typing.Sequence[str],
         op: agg_ops.WindowOp,
-        window_spec: core.WindowSpec,
+        window_spec: window_specs.WindowSpec,
         *,
         skip_null_groups: bool = False,
         never_skip_nulls: bool = False,
@@ -866,7 +867,7 @@ def apply_window_op(
         self,
         column: str,
         op: agg_ops.WindowOp,
-        window_spec: core.WindowSpec,
+        window_spec: window_specs.WindowSpec,
         *,
         result_label: Label = None,
         skip_null_groups: bool = False,
@@ -2020,7 +2021,7 @@ def _is_monotonic(
             return self._stats_cache[column_name][op_name]
 
         period = 1
-        window = bigframes.core.WindowSpec(
+        window = window_specs.rows(
             preceding=period,
             following=None,
         )

@@ -40,7 +40,7 @@
     OrderingExpression,
 )
 import bigframes.core.schema as schemata
-from bigframes.core.window_spec import WindowSpec
+from bigframes.core.window_spec import RangeWindowBounds, RowsWindowBounds, WindowSpec
 import bigframes.dtypes
 import bigframes.operations.aggregations as agg_ops
 
@@ -735,7 +735,9 @@ def project_window_op(
         skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
         """
         column = typing.cast(ibis_types.Column, self._get_ibis_column(column_name))
-        window = self._ibis_window_from_spec(window_spec, allow_ties=op.handles_ties)
+        window = self._ibis_window_from_spec(
+            window_spec, require_total_order=op.uses_total_row_ordering
+        )
         bindings = {col: self._get_ibis_column(col) for col in self.column_ids}
 
         window_op = agg_compiler.compile_analytic(
@@ -1162,7 +1164,9 @@ def _create_string_ordering_column(self) -> ibis_types.StringColumn:
     def _compile_expression(self, expr: ex.Expression):
         return op_compiler.compile_expression(expr, self._ibis_bindings)
 
-    def _ibis_window_from_spec(self, window_spec: WindowSpec, allow_ties: bool = False):
+    def _ibis_window_from_spec(
+        self, window_spec: WindowSpec, require_total_order: bool
+    ):
         group_by: typing.List[ibis_types.Value] = (
             [
                 typing.cast(
@@ -1175,26 +1179,40 @@ def _ibis_window_from_spec(self, window_spec: WindowSpec, allow_ties: bool = Fal
         )
         if self._reduced_predicate is not None:
             group_by.append(self._reduced_predicate)
+
+        # Construct ordering. There are basically 3 main cases
+        # 1. Order-independent op (aggregation, cut, rank) with unbound window - no ordering clause needed
+        # 2. Order-independent op (aggregation, cut, rank) with range window - use ordering clause, ties allowed
+        # 3. Order-depedenpent op (navigation functions, array_agg) or rows bounds - use total row order to break ties.
         if window_spec.ordering:
             order_by = _convert_ordering_to_table_values(
                 {**self._column_names, **self._hidden_ordering_column_names},
                 window_spec.ordering,
             )
-            if not allow_ties:
-                # Most operator need an unambiguous ordering, so the table's total ordering is appended
+            if require_total_order or isinstance(window_spec.bounds, RowsWindowBounds):
+                # Some operators need an unambiguous ordering, so the table's total ordering is appended
                 order_by = tuple([*order_by, *self._ibis_order])
-        elif (window_spec.following is not None) or (window_spec.preceding is not None):
+        elif isinstance(window_spec.bounds, RowsWindowBounds):
             # If window spec has following or preceding bounds, we need to apply an unambiguous ordering.
             order_by = tuple(self._ibis_order)
         else:
             # Unbound grouping window. Suitable for aggregations but not for analytic function application.
             order_by = None
-        return ibis.window(
-            preceding=window_spec.preceding,
-            following=window_spec.following,
-            order_by=order_by,
-            group_by=group_by,
-        )
+
+        bounds = window_spec.bounds
+        window = ibis.window(order_by=order_by, group_by=group_by)
+        if bounds is not None:
+            if isinstance(bounds, RangeWindowBounds):
+                window = window.preceding_following(
+                    bounds.preceding, bounds.following, how="range"
+                )
+            if isinstance(bounds, RowsWindowBounds):
+                window = window.preceding_following(
+                    bounds.preceding, bounds.following, how="rows"
+                )
+            else:
+                raise ValueError(f"unrecognized window bounds {bounds}")
+        return window
 
     class Builder:
         def __init__(

@@ -28,6 +28,7 @@
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.core.window as windows
+import bigframes.core.window_spec as window_specs
 import bigframes.dataframe as df
 import bigframes.dtypes as dtypes
 import bigframes.operations.aggregations as agg_ops
@@ -217,15 +218,15 @@ def cumprod(self, *args, **kwargs) -> df.DataFrame:
         return self._apply_window_op(agg_ops.product_op, numeric_only=True)
 
     def shift(self, periods=1) -> series.Series:
-        window = core.WindowSpec(
+        window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
             preceding=periods if periods > 0 else None,
             following=-periods if periods < 0 else None,
         )
         return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
 
     def diff(self, periods=1) -> series.Series:
-        window = core.WindowSpec(
+        window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
             preceding=periods if periods > 0 else None,
             following=-periods if periods < 0 else None,
@@ -234,7 +235,7 @@ def diff(self, periods=1) -> series.Series:
 
     def rolling(self, window: int, min_periods=None) -> windows.Window:
         # To get n size window, need current row and n-1 preceding rows.
-        window_spec = core.WindowSpec(
+        window_spec = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
             preceding=window - 1,
             following=0,
@@ -248,9 +249,8 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
         )
 
     def expanding(self, min_periods: int = 1) -> windows.Window:
-        window_spec = core.WindowSpec(
+        window_spec = window_specs.cumulative_rows(
             grouping_keys=tuple(self._by_col_ids),
-            following=0,
             min_periods=min_periods,
         )
         block = self._block.order_by(
@@ -424,8 +424,8 @@ def _apply_window_op(
         numeric_only: bool = False,
     ):
         """Apply window op to groupby. Defaults to grouped cumulative window."""
-        window_spec = window or core.WindowSpec(
-            grouping_keys=tuple(self._by_col_ids), following=0
+        window_spec = window or window_specs.cumulative_rows(
+            grouping_keys=tuple(self._by_col_ids)
         )
         columns = self._aggregated_columns(numeric_only=numeric_only)
         block, result_ids = self._block.multi_apply_window_op(
@@ -594,15 +594,15 @@ def cumcount(self, *args, **kwargs) -> series.Series:
 
     def shift(self, periods=1) -> series.Series:
         """Shift index by desired number of periods."""
-        window = core.WindowSpec(
+        window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
             preceding=periods if periods > 0 else None,
             following=-periods if periods < 0 else None,
         )
         return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
 
     def diff(self, periods=1) -> series.Series:
-        window = core.WindowSpec(
+        window = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
             preceding=periods if periods > 0 else None,
             following=-periods if periods < 0 else None,
@@ -611,7 +611,7 @@ def diff(self, periods=1) -> series.Series:
 
     def rolling(self, window: int, min_periods=None) -> windows.Window:
         # To get n size window, need current row and n-1 preceding rows.
-        window_spec = core.WindowSpec(
+        window_spec = window_specs.rows(
             grouping_keys=tuple(self._by_col_ids),
             preceding=window - 1,
             following=0,
@@ -629,9 +629,8 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
         )
 
     def expanding(self, min_periods: int = 1) -> windows.Window:
-        window_spec = core.WindowSpec(
+        window_spec = window_specs.cumulative_rows(
             grouping_keys=tuple(self._by_col_ids),
-            following=0,
             min_periods=min_periods,
         )
         block = self._block.order_by(
@@ -661,8 +660,8 @@ def _apply_window_op(
         window: typing.Optional[core.WindowSpec] = None,
     ):
         """Apply window op to groupby. Defaults to grouped cumulative window."""
-        window_spec = window or core.WindowSpec(
-            grouping_keys=tuple(self._by_col_ids), following=0
+        window_spec = window or window_specs.cumulative_rows(
+            grouping_keys=tuple(self._by_col_ids)
         )
 
         label = self._value_name if not discard_name else None

@@ -19,10 +19,10 @@
 import pandas as pd
 
 import bigframes.constants as constants
-import bigframes.core as core
 import bigframes.core.expression as ex
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
+import bigframes.core.window_spec as window_specs
 import bigframes.dataframe
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -159,7 +159,7 @@ def cut(
         )
 
     return x._apply_window_op(
-        agg_ops.CutOp(bins, labels=labels), window_spec=core.WindowSpec()
+        agg_ops.CutOp(bins, labels=labels), window_spec=window_specs.unbound()
     )
 
 
@@ -189,7 +189,7 @@ def qcut(
     block, result = block.apply_window_op(
         x._value_column,
         agg_ops.QcutOp(q),  # type: ignore
-        window_spec=core.WindowSpec(
+        window_spec=window_specs.unbound(
             grouping_keys=(nullity_id,),
             ordering=(order.ascending_over(x._value_column),),
         ),