forked from feldera/feldera
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_variant.py
More file actions
102 lines (77 loc) · 3.42 KB
/
test_variant.py
File metadata and controls
102 lines (77 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import unittest
from feldera import PipelineBuilder
from tests import TEST_CLIENT
from decimal import Decimal
class TestVariant(unittest.TestCase):
def test_local(self):
sql = """
-- Ingest JSON as string; output it as VARIANT.
CREATE TABLE json_table (json VARCHAR) with ('materialized' = 'true');
CREATE MATERIALIZED VIEW json_view AS SELECT PARSE_JSON(json) AS json FROM json_table;
CREATE MATERIALIZED VIEW json_string_view AS SELECT TO_JSON(json) AS json FROM json_view;
CREATE MATERIALIZED VIEW average_view AS SELECT
CAST(json['name'] AS VARCHAR) as name,
((CAST(json['scores'][1] AS DECIMAL(8, 2)) + CAST(json['scores'][2] AS DECIMAL(8, 2))) / 2) as average
FROM json_view;
-- Ingest JSON as variant; extract strongly typed columns from it.
CREATE TABLE variant_table(val VARIANT) with ('materialized' = 'true');
CREATE MATERIALIZED VIEW typed_view AS SELECT
CAST(val['name'] AS VARCHAR) as name,
CAST(val['scores'] AS DECIMAL ARRAY) as scores
FROM variant_table;
"""
pipeline = PipelineBuilder(
TEST_CLIENT, name="test_variant", sql=sql
).create_or_replace()
input_strings = [
{"json": '{"name":"Bob","scores":[8,10]}'},
{"json": '{"name":"Dunce","scores":[3,4]}'},
{"json": '{"name":"John","scores":[9,10]}'},
]
input_json = [
{"val": {"name": "Bob", "scores": [8, 10]}},
{"val": {"name": "Dunce", "scores": [3, 4]}},
{"val": {"name": "John", "scores": [9, 10]}},
]
expected_strings = [j | {"insert_delete": 1} for j in input_strings]
expected_average = [
{"name": "Bob", "average": Decimal(9)},
{"name": "Dunce", "average": Decimal(3.5)},
{"name": "John", "average": Decimal(9.5)},
]
for datum in expected_average:
datum.update({"insert_delete": 1})
expected_typed = [
{"name": "Bob", "scores": [8, 10]},
{"name": "Dunce", "scores": [3, 4]},
{"name": "John", "scores": [9, 10]},
]
for datum in expected_typed:
datum.update({"insert_delete": 1})
expected_variant = [
{"json": {"name": "Bob", "scores": [8, 10]}},
{"json": {"name": "Dunce", "scores": [3, 4]}},
{"json": {"name": "John", "scores": [9, 10]}},
]
for datum in expected_variant:
datum.update({"insert_delete": 1})
variant_out = pipeline.listen("json_view")
json_out = pipeline.listen("json_string_view")
average_out = pipeline.listen("average_view")
typed_out = pipeline.listen("typed_view")
pipeline.start()
# Feed JSON as strings, receive output from `average_view` and `json_view`
pipeline.input_json("json_table", input_strings)
pipeline.wait_for_completion(False)
assert expected_average == average_out.to_dict()
assert expected_variant == variant_out.to_dict()
assert expected_strings == json_out.to_dict()
# Feed VARIANT, read strongly typed columns. Since output colums have the same
# shape as inputs, output and input should be identical.
pipeline.input_json("variant_table", input_json)
pipeline.wait_for_completion(False)
assert expected_typed == typed_out.to_dict()
pipeline.wait_for_completion(True)
pipeline.delete()
if __name__ == "__main__":
unittest.main()