Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 61 additions & 8 deletions bigframes/core/compile/sqlglot/expressions/datetime_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

import sqlglot.expressions as sge

from bigframes import dtypes
from bigframes import operations as ops
from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler

Expand Down Expand Up @@ -81,22 +83,73 @@ def _(expr: TypedExpr) -> sge.Expression:

@register_unary_op(ops.StrftimeOp, pass_op=True)
def _(expr: TypedExpr, op: ops.StrftimeOp) -> sge.Expression:
return sge.func("FORMAT_TIMESTAMP", sge.convert(op.date_format), expr.expr)
func_name = ""
if expr.dtype == dtypes.DATE_DTYPE:
func_name = "FORMAT_DATE"
elif expr.dtype == dtypes.DATETIME_DTYPE:
func_name = "FORMAT_DATETIME"
elif expr.dtype == dtypes.TIME_DTYPE:
func_name = "FORMAT_TIME"
elif expr.dtype == dtypes.TIMESTAMP_DTYPE:
func_name = "FORMAT_TIMESTAMP"

return sge.func(func_name, sge.convert(op.date_format), expr.expr)


@register_unary_op(ops.time_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("TIME", expr.expr)


@register_unary_op(ops.ToDatetimeOp)
def _(expr: TypedExpr) -> sge.Expression:
return sge.Cast(this=sge.func("TIMESTAMP_SECONDS", expr.expr), to="DATETIME")

@register_unary_op(ops.ToDatetimeOp, pass_op=True)
def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression:
if op.format:
result = expr.expr
if expr.dtype != dtypes.STRING_DTYPE:
result = sge.Cast(this=result, to="STRING")
result = sge.func(
"PARSE_TIMESTAMP", sge.convert(op.format), result, sge.convert("UTC")
)
return sge.Cast(this=result, to="DATETIME")

if expr.dtype == dtypes.STRING_DTYPE:
return sge.TryCast(this=expr.expr, to="DATETIME")

value = expr.expr
unit = op.unit or "ns"
factor = UNIT_TO_US_CONVERSION_FACTORS[unit]
if factor != 1:
value = sge.Mul(this=value, expression=sge.convert(factor))
value = sge.func("TRUNC", value)
return sge.Cast(
this=sge.func("TIMESTAMP_MICROS", sge.Cast(this=value, to="INT64")),
to="DATETIME",
)


@register_unary_op(ops.ToTimestampOp, pass_op=True)
def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression:
if op.format:
result = expr.expr
if expr.dtype != dtypes.STRING_DTYPE:
result = sge.Cast(this=result, to="STRING")
return sge.func(
"PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC")
)

@register_unary_op(ops.ToTimestampOp)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("TIMESTAMP_SECONDS", expr.expr)
if expr.dtype == dtypes.STRING_DTYPE:
return sge.func("TIMESTAMP", expr.expr)

value = expr.expr
unit = op.unit or "ns"
factor = UNIT_TO_US_CONVERSION_FACTORS[unit]
if factor != 1:
value = sge.Mul(this=value, expression=sge.convert(factor))
value = sge.func("TRUNC", value)
return sge.Cast(
this=sge.func("TIMESTAMP_MICROS", sge.Cast(this=value, to="INT64")),
to="TIMESTAMP",
)


@register_unary_op(ops.UnixMicros)
Expand Down
6 changes: 6 additions & 0 deletions bigframes/core/compile/sqlglot/expressions/timedelta_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import sqlglot.expressions as sge

from bigframes import dtypes
from bigframes import operations as ops
from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
Expand All @@ -32,7 +33,12 @@ def _(expr: TypedExpr) -> sge.Expression:
@register_unary_op(ops.ToTimedeltaOp, pass_op=True)
def _(expr: TypedExpr, op: ops.ToTimedeltaOp) -> sge.Expression:
value = expr.expr
if expr.dtype == dtypes.TIMEDELTA_DTYPE:
return value

factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit]
if factor != 1:
value = sge.Mul(this=value, expression=sge.convert(factor))
if expr.dtype == dtypes.FLOAT_DTYPE:
value = sge.Cast(this=sge.Floor(this=value), to=sge.DataType(this="INT64"))
return value
2 changes: 2 additions & 0 deletions bigframes/core/compile/sqlglot/sqlglot_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,8 @@ def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression:
elif dtype == dtypes.BYTES_DTYPE:
return _cast(str(value), sqlglot_type)
elif dtypes.is_time_like(dtype):
if isinstance(value, str):
return _cast(sge.convert(value), sqlglot_type)
if isinstance(value, np.generic):
value = value.item()
return _cast(sge.convert(value.isoformat()), sqlglot_type)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
WITH `bfcte_0` AS (
SELECT
`date_col`,
`datetime_col`,
`time_col`,
`timestamp_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
FORMAT_TIMESTAMP('%Y-%m-%d', `timestamp_col`) AS `bfcol_1`
FORMAT_DATE('%Y-%m-%d', `date_col`) AS `bfcol_8`,
FORMAT_DATETIME('%Y-%m-%d', `datetime_col`) AS `bfcol_9`,
FORMAT_TIME('%Y-%m-%d', `time_col`) AS `bfcol_10`,
FORMAT_TIMESTAMP('%Y-%m-%d', `timestamp_col`) AS `bfcol_11`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `timestamp_col`
`bfcol_8` AS `date_col`,
`bfcol_9` AS `datetime_col`,
`bfcol_10` AS `time_col`,
`bfcol_11` AS `timestamp_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
WITH `bfcte_0` AS (
SELECT
`int64_col`
`float64_col`,
`int64_col`,
`string_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CAST(TIMESTAMP_SECONDS(`int64_col`) AS DATETIME) AS `bfcol_1`
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `bfcol_6`,
SAFE_CAST(`string_col` AS DATETIME) AS `bfcol_7`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `bfcol_8`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `int64_col`
`bfcol_6` AS `int64_col`,
`bfcol_7` AS `string_col`,
`bfcol_8` AS `float64_col`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
WITH `bfcte_0` AS (
SELECT
`float64_col`,
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
TIMESTAMP_SECONDS(`int64_col`) AS `bfcol_1`
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `bfcol_2`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `bfcol_3`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000000) AS INT64)) AS TIMESTAMP) AS `bfcol_4`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `bfcol_5`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `bfcol_6`,
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `bfcol_7`
FROM `bfcte_0`
)
SELECT
`bfcol_1` AS `int64_col`
`bfcol_2` AS `int64_col`,
`bfcol_3` AS `float64_col`,
`bfcol_4` AS `int64_col_s`,
`bfcol_5` AS `int64_col_ms`,
`bfcol_6` AS `int64_col_us`,
`bfcol_7` AS `int64_col_ns`
FROM `bfcte_1`
Original file line number Diff line number Diff line change
@@ -1,37 +1,54 @@
WITH `bfcte_0` AS (
SELECT
`float64_col`,
`int64_col`,
`rowindex`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
`rowindex` AS `bfcol_4`,
`int64_col` AS `bfcol_5`,
`int64_col` AS `bfcol_6`
`rowindex` AS `bfcol_6`,
`int64_col` AS `bfcol_7`,
`float64_col` AS `bfcol_8`,
`int64_col` AS `bfcol_9`
FROM `bfcte_0`
), `bfcte_2` AS (
SELECT
*,
`bfcol_4` AS `bfcol_10`,
`bfcol_5` AS `bfcol_11`,
`bfcol_6` AS `bfcol_12`,
`bfcol_5` * 1000000 AS `bfcol_13`
`bfcol_6` AS `bfcol_14`,
`bfcol_7` AS `bfcol_15`,
`bfcol_8` AS `bfcol_16`,
`bfcol_9` AS `bfcol_17`,
CAST(FLOOR(`bfcol_8` * 1000000) AS INT64) AS `bfcol_18`
FROM `bfcte_1`
), `bfcte_3` AS (
SELECT
*,
`bfcol_10` AS `bfcol_18`,
`bfcol_11` AS `bfcol_19`,
`bfcol_12` AS `bfcol_20`,
`bfcol_13` AS `bfcol_21`,
`bfcol_11` * 604800000000 AS `bfcol_22`
`bfcol_14` AS `bfcol_24`,
`bfcol_15` AS `bfcol_25`,
`bfcol_16` AS `bfcol_26`,
`bfcol_17` AS `bfcol_27`,
`bfcol_18` AS `bfcol_28`,
`bfcol_15` * 3600000000 AS `bfcol_29`
FROM `bfcte_2`
), `bfcte_4` AS (
SELECT
*,
`bfcol_24` AS `bfcol_36`,
`bfcol_25` AS `bfcol_37`,
`bfcol_26` AS `bfcol_38`,
`bfcol_27` AS `bfcol_39`,
`bfcol_28` AS `bfcol_40`,
`bfcol_29` AS `bfcol_41`,
`bfcol_27` AS `bfcol_42`
FROM `bfcte_3`
)
SELECT
`bfcol_18` AS `rowindex`,
`bfcol_19` AS `int64_col`,
`bfcol_20` AS `duration_us`,
`bfcol_21` AS `duration_s`,
`bfcol_22` AS `duration_w`
FROM `bfcte_3`
`bfcol_36` AS `rowindex`,
`bfcol_37` AS `int64_col`,
`bfcol_38` AS `float64_col`,
`bfcol_39` AS `duration_us`,
`bfcol_40` AS `duration_s`,
`bfcol_41` AS `duration_w`,
`bfcol_42` AS `duration_on_duration`
FROM `bfcte_4`
37 changes: 22 additions & 15 deletions tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,15 @@ def test_second(scalar_types_df: bpd.DataFrame, snapshot):


def test_strftime(scalar_types_df: bpd.DataFrame, snapshot):
col_name = "timestamp_col"
bf_df = scalar_types_df[[col_name]]
sql = utils._apply_ops_to_sql(
bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name]
)
bf_df = scalar_types_df[["timestamp_col", "datetime_col", "date_col", "time_col"]]
ops_map = {
"date_col": ops.StrftimeOp("%Y-%m-%d").as_expr("date_col"),
"datetime_col": ops.StrftimeOp("%Y-%m-%d").as_expr("datetime_col"),
"time_col": ops.StrftimeOp("%Y-%m-%d").as_expr("time_col"),
"timestamp_col": ops.StrftimeOp("%Y-%m-%d").as_expr("timestamp_col"),
}

sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
snapshot.assert_match(sql, "out.sql")


Expand All @@ -161,22 +164,26 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot):


def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
col_name = "int64_col"
bf_df = scalar_types_df[[col_name]]
sql = utils._apply_ops_to_sql(
bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]
)
col_names = ["int64_col", "string_col", "float64_col"]
bf_df = scalar_types_df[col_names]
ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names}

sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
snapshot.assert_match(sql, "out.sql")


def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
col_name = "int64_col"
bf_df = scalar_types_df[[col_name]]
sql = utils._apply_ops_to_sql(
bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]
)
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col"]]
ops_map = {
"int64_col": ops.ToTimestampOp().as_expr("int64_col"),
"float64_col": ops.ToTimestampOp().as_expr("float64_col"),
"int64_col_s": ops.ToTimestampOp(unit="s").as_expr("int64_col"),
"int64_col_ms": ops.ToTimestampOp(unit="ms").as_expr("int64_col"),
"int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"),
"int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"),
}

sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))
snapshot.assert_match(sql, "out.sql")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@


def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot):
bf_df = scalar_types_df[["int64_col"]]
bf_df = scalar_types_df[["int64_col", "float64_col"]]
bf_df["duration_us"] = bpd.to_timedelta(bf_df["int64_col"], "us")
bf_df["duration_s"] = bpd.to_timedelta(bf_df["int64_col"], "s")
bf_df["duration_w"] = bpd.to_timedelta(bf_df["int64_col"], "W")
bf_df["duration_s"] = bpd.to_timedelta(bf_df["float64_col"], "s")
bf_df["duration_w"] = bpd.to_timedelta(bf_df["int64_col"], "h")
bf_df["duration_on_duration"] = bpd.to_timedelta(bf_df["duration_us"], "ms")

snapshot.assert_match(bf_df.sql, "out.sql")

Expand Down