-
Notifications
You must be signed in to change notification settings - Fork 107
Expand file tree
/
Copy pathtest_udp.py
More file actions
128 lines (111 loc) · 3.21 KB
/
test_udp.py
File metadata and controls
128 lines (111 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import unittest
from feldera import PipelineBuilder
from tests import TEST_CLIENT, unique_pipeline_name
from feldera.runtime_config import RuntimeConfig
from feldera.testutils import FELDERA_TEST_NUM_WORKERS, FELDERA_TEST_NUM_HOSTS
# Test user-defined preprocessor
class TestUDP(unittest.TestCase):
def test_local(self):
sql = """
CREATE TABLE t (
i INT,
ti TINYINT,
si SMALLINT,
bi BIGINT,
r REAL,
d DOUBLE,
bin VARBINARY,
dt DATE,
t TIME,
ts TIMESTAMP,
a INT ARRAY,
b BOOLEAN,
dc DECIMAL(7,2),
s VARCHAR
) WITH ('connectors' = '[{
"name": "t",
"transport": {
"name": "datagen",
"config": {
"seed": 1,
"plan": [{
"limit": 100000
}]
}
},
"preprocessor": [{
"name": "logger",
"message_oriented": true,
"config": {}
}]
}]');
CREATE MATERIALIZED VIEW v AS
SELECT * FROM t;
"""
udfs = """
use tracing::info;
use std::sync::{Arc, Mutex};
use feldera_adapterlib::format::{ParseError, Splitter};
use feldera_adapterlib::preprocess::{
Preprocessor, PreprocessorCreateError, PreprocessorFactory,
};
use feldera_types::preprocess::PreprocessorConfig;
pub struct LoggerPreprocessor {
count: Arc<Mutex<u64>>,
}
impl Preprocessor for LoggerPreprocessor {
fn process(&mut self, data: &[u8]) -> (Vec<u8>, Vec<ParseError>) {
let mut count = self.count.lock().unwrap();
*count += data.len() as u64;
// Log a message if the counter has crossed a Megabyte boundary
if *count / (1024 * 1024) > (*count - data.len() as u64) / (1024 * 1024) {
info!("Processed {} bytes of data", *count);
}
(data.to_vec(), vec![])
}
fn fork(&self) -> Box<dyn Preprocessor> {
Box::new(LoggerPreprocessor { count: Arc::clone(&self.count) })
}
fn splitter(&self) -> Option<Box<dyn Splitter>> {
None
}
}
pub struct LoggerPreprocessorFactory;
impl PreprocessorFactory for LoggerPreprocessorFactory {
fn create(
&self,
_config: &PreprocessorConfig,
) -> Result<Box<dyn Preprocessor>, PreprocessorCreateError> {
Ok(Box::new(LoggerPreprocessor { count: Arc::new(Mutex::new(0)) }))
}
}
"""
toml = """
tracing = { version = "0.1.40" }
"""
pipeline = PipelineBuilder(
TEST_CLIENT,
name=unique_pipeline_name("test_udps"),
sql=sql,
udf_rust=udfs,
udf_toml=toml,
runtime_config=RuntimeConfig(
workers=FELDERA_TEST_NUM_WORKERS,
hosts=FELDERA_TEST_NUM_HOSTS,
),
).create_or_replace()
pipeline.start_paused()
pipeline.resume()
pipeline.wait_for_completion()
for log in pipeline.logs():
# This will loop forever if the message is not found
if "bytes of data" in log:
break
hash = pipeline.query_hash("SELECT * FROM v ORDER BY i, ti, si, bi")
assert (
hash == "0F5CD4C02B4670AB14FE753523D7D9962E251850D8AD247EC04ABC1531EB4AF3"
), "Hash does not match"
pipeline.stop(force=True)
pipeline.delete(True)
if __name__ == "__main__":
unittest.main()