Skip to content
Open
401 changes: 322 additions & 79 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ rkyv = { version = "0.7.45", default-features = false }
rmp-serde = "1.3.0"
rmpv = "1.3.0"
rstest = "0.15"
s2-sdk = "0.26.0"
# Make sure this is the same rustls version used by the `tonic` crate.
# See the `ensure_default_crypto_provider` function.
rustls = "0.23.12"
Expand Down
3 changes: 3 additions & 0 deletions crates/adapters/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ default = [
"with-pubsub",
"with-redis",
"with-nats",
"with-s2",
]
with-kafka = ["rdkafka"]
with-deltalake = ["deltalake", "deltalake-catalog-unity"]
Expand All @@ -37,6 +38,7 @@ with-avro = [
with-nexmark = ["dbsp_nexmark"]
with-redis = ["redis", "r2d2"]
with-nats = []
with-s2 = ["s2-sdk"]
# Run delta table tests against an S3 bucket. Requires S3 authentication key
# to be provided via an environment variable.
delta-s3-test = []
Expand Down Expand Up @@ -69,6 +71,7 @@ awc = { workspace = true, features = [
async-nats = { workspace = true }
async-stream = { workspace = true }
anyhow = { workspace = true, features = ["backtrace"] }
s2-sdk = { workspace = true, optional = true }
bytestring = "1.4.0"
circular-queue = { workspace = true, features = ["serde_support"] }
crossbeam = { workspace = true }
Expand Down
111 changes: 109 additions & 2 deletions crates/adapters/src/format/json/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ use feldera_adapterlib::format::Splitter;
use feldera_sqllib::Variant;
use feldera_types::format::json::{JsonLines, JsonParserConfig, JsonUpdateFormat};
use serde::Deserialize;
use serde_json::json;
use serde_json::value::RawValue;
use serde_json::{Value as JsonValue, json};
use serde_urlencoded::Deserializer as UrlDeserializer;
use std::borrow::Cow;

Expand Down Expand Up @@ -255,6 +255,51 @@ fn validate_parser_config(
Ok(())
}

fn is_insert_delete_action_key(key: &str) -> bool {
matches!(key, "insert" | "delete" | "update")
}

fn looks_like_insert_delete_envelope_object(value: &JsonValue) -> bool {
let Some(object) = value.as_object() else {
return false;
};

let mut action_key = None;
for key in object.keys() {
if key == "table" {
continue;
}
if is_insert_delete_action_key(key) {
if action_key.is_some() {
return false;
}
action_key = Some(key);
} else {
return false;
}
}

let Some(action_key) = action_key else {
return false;
};

matches!(
object.get(action_key),
Some(JsonValue::Object(_) | JsonValue::Array(_))
)
}

fn looks_like_insert_delete_envelope(update: &RawValue) -> bool {
let Ok(value) = serde_json::from_str::<JsonValue>(update.get()) else {
return false;
};

looks_like_insert_delete_envelope_object(&value)
|| value.as_array().is_some_and(|array| {
!array.is_empty() && array.iter().all(looks_like_insert_delete_envelope_object)
})
}

struct JsonParser {
/// Input handle to push parsed data to.
input_stream: Box<dyn DeCollectionStream>,
Expand All @@ -263,6 +308,18 @@ struct JsonParser {
}

impl JsonParser {
fn raw_format_insert_delete_mismatch_error(update: &RawValue) -> Option<ParseError> {
looks_like_insert_delete_envelope(update).then(|| {
ParseError::text_envelope_error(
"raw JSON update format expects plain rows, but received an insert/delete/update envelope".to_string(),
update.get(),
Some(Cow::from(
"Set `format.config.update_format` to `insert_delete` for payloads like {\"insert\": {...}} or {\"delete\": {...}}.",
)),
)
})
}

fn new(input_stream: Box<dyn DeCollectionStream>, config: JsonParserConfig) -> Self {
Self {
input_stream,
Expand Down Expand Up @@ -404,7 +461,18 @@ impl Parser for JsonParser {
self.apply_update::<WeightedUpdate<_>>(update, &metadata, &mut errors)
}
JsonUpdateFormat::Raw => {
self.apply_update::<&RawValue>(update, &metadata, &mut errors)
let errors_before = errors.len();
self.apply_update::<&RawValue>(update, &metadata, &mut errors);
// On parse failure, check if the user sent insert/delete
// envelopes with the raw format and provide a better hint.
if errors.len() > errors_before {
if let Some(error) =
Self::raw_format_insert_delete_mismatch_error(update)
{
errors.truncate(errors_before);
errors.push(error);
}
}
}
JsonUpdateFormat::Redis | JsonUpdateFormat::Snowflake => {
panic!("Unexpected update format: {:?}", &self.config.update_format)
Expand Down Expand Up @@ -851,6 +919,45 @@ mod test {
, (r#"[[false, 100, "foo"]]"#.to_string(), Vec::new())],
vec![MockUpdate::with_polarity(TestStruct::new(true, 0, Some("e")), true), MockUpdate::with_polarity(TestStruct::new(false, 100, Some("foo")), true)],
),
// raw: insert/delete envelope hint.
TestCase::new(
JsonParserConfig {
update_format: JsonUpdateFormat::Raw,
json_flavor: JsonFlavor::Default,
array: false,
lines: JsonLines::Single,
},
vec![(
r#"{"insert": {"b": true, "i": 0}}"#.to_string(),
vec![ParseError::text_envelope_error(
"raw JSON update format expects plain rows, but received an insert/delete/update envelope".to_string(),
"{\"insert\": {\"b\": true, \"i\": 0}}",
Some(Cow::from(
"Set `format.config.update_format` to `insert_delete` for payloads like {\"insert\": {...}} or {\"delete\": {...}}.",
)),
)],
)],
Vec::new(),
),
TestCase::new(
JsonParserConfig {
update_format: JsonUpdateFormat::Raw,
json_flavor: JsonFlavor::Default,
array: true,
lines: JsonLines::Single,
},
vec![(
r#"[{"insert": {"b": true, "i": 0}}]"#.to_string(),
vec![ParseError::text_envelope_error(
"raw JSON update format expects plain rows, but received an insert/delete/update envelope".to_string(),
"[{\"insert\": {\"b\": true, \"i\": 0}}]",
Some(Cow::from(
"Set `format.config.update_format` to `insert_delete` for payloads like {\"insert\": {...}} or {\"delete\": {...}}.",
)),
)],
)],
Vec::new(),
),
// raw: invalid json.
TestCase::new(
JsonParserConfig {
Expand Down
17 changes: 16 additions & 1 deletion crates/adapters/src/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pub(crate) mod kafka;
#[cfg(feature = "with-nats")]
pub(crate) mod nats;

#[cfg(feature = "with-s2")]
pub(crate) mod s2;

#[cfg(feature = "with-nexmark")]
mod nexmark;

Expand All @@ -68,6 +71,9 @@ use crate::transport::kafka::{KafkaFtInputEndpoint, KafkaFtOutputEndpoint, Kafka
#[cfg(feature = "with-nats")]
use crate::transport::nats::NatsInputEndpoint;

#[cfg(feature = "with-s2")]
use crate::transport::s2::{S2InputEndpoint, S2OutputEndpoint};

#[cfg(feature = "with-nexmark")]
use crate::transport::nexmark::NexmarkEndpoint;
use crate::transport::s3::S3InputEndpoint;
Expand Down Expand Up @@ -98,6 +104,10 @@ pub fn input_transport_config_to_endpoint(
TransportConfig::NatsInput(config) => Box::new(NatsInputEndpoint::new(config)?),
#[cfg(not(feature = "with-nats"))]
TransportConfig::NatsInput(_) => return Ok(None),
#[cfg(feature = "with-s2")]
TransportConfig::S2Input(config) => Box::new(S2InputEndpoint::new(config)?),
#[cfg(not(feature = "with-s2"))]
TransportConfig::S2Input(_) => return Ok(None),
#[cfg(feature = "with-pubsub")]
TransportConfig::PubSubInput(config) => Box::new(PubSubInputEndpoint::new(config.clone())?),
#[cfg(not(feature = "with-pubsub"))]
Expand All @@ -120,7 +130,8 @@ pub fn input_transport_config_to_endpoint(
| TransportConfig::PostgresOutput(_)
| TransportConfig::HttpOutput
| TransportConfig::RedisOutput(_)
| TransportConfig::IcebergInput(_) => return Ok(None),
| TransportConfig::IcebergInput(_)
| TransportConfig::S2Output(_) => return Ok(None),
};
Ok(Some(endpoint))
}
Expand Down Expand Up @@ -156,6 +167,10 @@ pub fn output_transport_config_to_endpoint(
TransportConfig::RedisOutput(config) => {
Ok(Some(Box::new(RedisOutputEndpoint::new(config)?)))
}
#[cfg(feature = "with-s2")]
TransportConfig::S2Output(config) => {
Ok(Some(Box::new(S2OutputEndpoint::new(config)?)))
}
_ => Ok(None),
}
}
Loading