-
Notifications
You must be signed in to change notification settings - Fork 107
Expand file tree
/
Copy path_helpers.py
More file actions
164 lines (132 loc) · 4.6 KB
/
_helpers.py
File metadata and controls
164 lines (132 loc) · 4.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import uuid
import pandas as pd
from decimal import Decimal
from datetime import datetime
from typing import Mapping, Any
def sql_type_to_pandas_type(sql_type: str):
"""
Converts a SQL type to a pandas type.
"""
match sql_type.upper():
case "UUID":
return None
case "BOOLEAN":
return "boolean"
case "TINYINT":
return "Int8"
case "SMALLINT":
return "Int16"
case "INTEGER":
return "Int32"
case "BIGINT":
return "Int64"
case "REAL":
return "Float32"
case "DOUBLE":
return "Float64"
case "DECIMAL":
return None
case "CHAR":
return "str"
case "VARCHAR":
return "str"
case "DATE" | "TIMESTAMP":
return "datetime64[ns]"
case "TIME" | "INTERVAL":
return "timedelta64[ns]"
case "ARRAY":
return None
case "NULL":
return None
case "BINARY" | "VARBINARY":
return None
case "STRUCT" | "MAP":
return None
def ensure_dataframe_has_columns(df: pd.DataFrame):
"""
Ensures that the DataFrame has column names set.
"""
if [v for v in range(df.shape[1])] == list(df.columns):
raise ValueError(
"""
DataFrame has no column names set.
Input DataFrame must have column names set and they must be consistent with the columns in the input table.
"""
)
def dataframe_from_response(
buffer: list[list[Mapping[str, Any]]], fields: list[Mapping[str, Any]]
):
"""
Converts the response from Feldera to a pandas DataFrame.
:param buffer: A buffer of a list of JSON formatted output of the view you are listening to.
:param fields: The schema (list of fields) of the view you are listening to.
"""
pd_schema = {}
decimal_col = []
uuid_col = []
for column in fields:
column_name = column["name"]
if not column["case_sensitive"]:
column_name = column_name.lower()
column_type = column["columntype"]["type"]
if column_type == "DECIMAL":
decimal_col.append(column_name)
elif column_type == "UUID":
uuid_col.append(column_name)
pd_schema[column_name] = sql_type_to_pandas_type(column_type)
data = [
{**item["insert"], "insert_delete": 1}
if "insert" in item
else {**item["delete"], "insert_delete": -1}
for sublist in buffer
for item in sublist
]
if len(decimal_col) != 0:
for datum in data:
for col in decimal_col:
if datum[col] is not None:
datum[col] = Decimal(datum[col])
if len(uuid_col) != 0:
for datum in data:
for col in uuid_col:
if datum[col] is not None:
datum[col] = uuid.UUID(datum[col])
df = pd.DataFrame(data)
df = df.astype(pd_schema)
return df
def chunk_dataframe(df, chunk_size=1000):
"""
Yield successive n-sized chunks from the given dataframe.
"""
for i in range(0, len(df), chunk_size):
yield df.iloc[i : i + chunk_size]
def parse_datetime(value: str, field: str) -> datetime:
"""Parse RFC3339-like datetime strings returned by the API."""
try:
return datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError as exc:
raise ValueError(f"invalid datetime for '{field}': {value!r}") from exc
def expect_mapping(d: Mapping[str, Any], key: str) -> Mapping[str, Any]:
"""Return required mapping field or raise ValueError."""
value = d.get(key)
if not isinstance(value, Mapping):
raise ValueError(f"missing or invalid required object field '{key}'")
return value
def expect_str(d: Mapping[str, Any], key: str) -> str:
"""Return required string field or raise ValueError."""
value = d.get(key)
if not isinstance(value, str):
raise ValueError(f"missing or invalid required string field '{key}'")
return value
def expect_int(d: Mapping[str, Any], key: str) -> int:
"""Return required integer field or raise ValueError."""
value = d.get(key)
if not isinstance(value, int):
raise ValueError(f"missing or invalid required integer field '{key}'")
return value
def expect_bool(d: Mapping[str, Any], key: str) -> bool:
"""Return required boolean field or raise ValueError."""
value = d.get(key)
if not isinstance(value, bool):
raise ValueError(f"missing or invalid required boolean field '{key}'")
return value