-
Notifications
You must be signed in to change notification settings - Fork 108
Expand file tree
/
Copy pathpreprocess.rs
More file actions
103 lines (91 loc) · 3.32 KB
/
preprocess.rs
File metadata and controls
103 lines (91 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
//! Data preprocessing layer for connectors.
//!
//! This module provides a preprocessing framework that allows data transformation
//! before it reaches the parser.
//!
//! The preprocessing layer fits between transport and parsing in the data pipeline:
//!
//! ```text
//! Transport → Preprocessor → Parser → Circuit
//! ```
use crate::format::{ParseError, Splitter};
use feldera_types::preprocess::PreprocessorConfig;
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter, Result as FmtResult};
use std::sync::Arc;
// Errors that can occur during creation of a preprocessor
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreprocessorCreateError {
/// Preprocessing configuration is invalid.
ConfigurationError(String),
/// Implementation for factory generating Preprocessor not found
FactoryNotFound(String),
}
impl Display for PreprocessorCreateError {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
match self {
PreprocessorCreateError::ConfigurationError(msg) => {
write!(f, "Configuration error: {}", msg)
}
PreprocessorCreateError::FactoryNotFound(msg) => {
write!(
f,
"Could not locate factory generating preprocessor: {}",
msg
)
}
}
}
}
impl std::error::Error for PreprocessorCreateError {}
/// Trait for preprocessing raw data before parsing.
pub trait Preprocessor: Send + Sync {
/// Process raw input data and return transformed data.
///
/// # Arguments
/// * `data` - Raw input data bytes
///
/// # Returns
/// A `PreprocessResult` containing the transformed data or errors.
fn process(&mut self, data: &[u8]) -> (Vec<u8>, Vec<ParseError>);
/// Create a new preprocessor with the same configuration as `self`.
///
/// Used by multithreaded transport endpoints to create multiple parallel
/// input pipelines.
fn fork(&self) -> Box<dyn Preprocessor>;
/// Returns an object that can be used to break a stream of incoming data
/// into complete records to pass to [Preprocessor::process]. If the object
/// is None, the parser's splitter object will actually be used.
fn splitter(&self) -> Option<Box<dyn Splitter>>;
}
/// A factory that can create a new Preprocessor object.
pub trait PreprocessorFactory: Send + Sync {
/// Create a new preprocessor based on the supplied configuration.
///
/// # Arguments
///
/// * `config` - Preprocessor-specific configuration.
fn create(
&self,
config: &PreprocessorConfig,
) -> Result<Box<dyn Preprocessor>, PreprocessorCreateError>;
}
/// A registry where all factories that can create Preprocessors are registered
#[derive(Default)]
pub struct PreprocessorRegistry {
registered: BTreeMap<&'static str, Arc<dyn PreprocessorFactory>>,
}
impl PreprocessorRegistry {
pub fn new() -> Self {
Self {
registered: BTreeMap::new(),
}
}
/// Register a new factory under the specified name
pub fn register(&mut self, name: &'static str, factory: Box<dyn PreprocessorFactory>) {
self.registered.insert(name, Arc::from(factory));
}
pub fn get(&self, name: &str) -> Option<Arc<dyn PreprocessorFactory>> {
self.registered.get(name).cloned()
}
}