Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/adapterlib/src/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,8 @@ impl InputQueue<(), Box<dyn InputBuffer>> {
///
/// Use [`TransportInputEndpoint::open`] to obtain an [`InputReader`].
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The as_any method requires 14 implementors to each copy the same one-liner body. A helper supertrait with a blanket default (e.g. AsAnyArc) or a derive macro could eliminate that boilerplate. Not a blocker, just worth noting.

pub trait InputReader: Send + Sync {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync>;

/// Requests the input reader to execute `command`.
fn request(&self, command: InputReaderCommand);

Expand Down
33 changes: 30 additions & 3 deletions crates/adapters/src/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ use dbsp::{Runtime, WeakRuntime};
use enum_map::EnumMap;
use feldera_adapterlib::format::BufferSize;
use feldera_adapterlib::metrics::{ConnectorMetrics, ValueType};
use feldera_adapterlib::transport::{Resume, Watermark};
use feldera_adapterlib::transport::{InputReader, Resume, Watermark};
use feldera_ir::LirCircuit;
use feldera_storage::fbuf::slab::FBufSlabsStats;
use feldera_storage::histogram::{ExponentialHistogram, ExponentialHistogramSnapshot};
Expand Down Expand Up @@ -664,6 +664,10 @@ impl Controller {
.add_input_endpoint(endpoint_name, endpoint_config, Some(endpoint), resume_info)
}

pub fn get_input_endpoint(&self, endpoint_name: &str) -> Option<Arc<dyn InputReader>> {
self.inner.get_input_endpoint(endpoint_name)
}

/// Disconnect an existing output endpoint.
///
/// This method is asynchronous and may return before all endpoint
Expand Down Expand Up @@ -4271,6 +4275,20 @@ impl ControllerInit {
)
}

// Transfer HTTP input endpoints that are not affected by the program diff from the checkpoint to the new configuration.
checkpoint_config
.inputs
.iter()
.filter(|(_connector_name, connector_config)| {
connector_config.connector_config.transport.is_http_input()
&& !pipeline_diff.is_affected_relation(&connector_config.stream)
})
.for_each(|(connector_name, connector_config)| {
config
.inputs
.insert(connector_name.clone(), connector_config.clone());
});

// Merge `config` (the configuration provided by the pipeline manager)
// with `checkpoint_config` (the configuration read from the
// checkpoint).
Expand Down Expand Up @@ -5862,7 +5880,7 @@ impl ControllerInner {
.write()
.get_mut(&endpoint_id)
.unwrap()
.reader = Some(reader);
.reader = Some(Arc::from(reader));
}
Err(e) => {
self.status.inputs.write().remove(&endpoint_id);
Expand Down Expand Up @@ -5908,7 +5926,7 @@ impl ControllerInner {
.write()
.get_mut(&endpoint_id)
.unwrap()
.reader = Some(reader);
.reader = Some(Arc::from(reader));
}
Err(e) => {
self.status.inputs.write().remove(&endpoint_id);
Expand All @@ -5935,6 +5953,15 @@ impl ControllerInner {
Ok(endpoint_id)
}

fn get_input_endpoint(&self, endpoint_name: &str) -> Option<Arc<dyn InputReader>> {
let endpoint_id = self.status.input_endpoint_id_by_name(endpoint_name).ok()?;
self.status
.inputs
.read()
.get(&endpoint_id)
.and_then(|ep| ep.reader.as_ref().cloned())
}

fn register_api_connection(&self) -> Result<(), u64> {
let num_connections = self.num_api_connections.load(Ordering::Acquire);

Expand Down
4 changes: 2 additions & 2 deletions crates/adapters/src/controller/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,7 @@ impl ControllerStatus {
if !self
.input_status()
.values()
.filter(|endpoint_stats| !endpoint_stats.endpoint_name.starts_with("api-ingress"))
.filter(|endpoint_stats| !endpoint_stats.endpoint_name.contains(".api-ingress-"))
.all(|endpoint_stats| endpoint_stats.is_eoi())
{
return false;
Expand Down Expand Up @@ -1887,7 +1887,7 @@ pub struct InputEndpointStatus {
pub progress: Mutex<Option<StepResults>>,

/// May be None during endpoint initialization.
pub reader: Option<Box<dyn InputReader>>,
pub reader: Option<Arc<dyn InputReader>>,

/// Endpoint support for fault tolerance.
pub fault_tolerance: Option<FtModel>,
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/integrated/delta_table/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ impl DeltaTableInputReader {
}

impl InputReader for DeltaTableInputReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

couldn't this be a default function in the InputReader trait?

self
}

fn request(&self, command: InputReaderCommand) {
match command {
InputReaderCommand::Replay { .. } => panic!(
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/integrated/postgres/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ impl PostgresInputReader {
}

impl InputReader for PostgresInputReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
match command.as_nonft().unwrap() {
NonFtInputReaderCommand::Queue => self.inner.queue.queue(),
Expand Down
52 changes: 32 additions & 20 deletions crates/adapters/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::convert::Infallible;
use std::ffi::OsStr;
use std::hash::{BuildHasherDefault, DefaultHasher};
use std::hash::{BuildHasherDefault, DefaultHasher, Hash, Hasher};
use std::io::ErrorKind;
use std::mem::take;
use std::ops::{Deref, DerefMut};
Expand Down Expand Up @@ -2005,13 +2005,22 @@ struct IngressArgs {
force: bool,
}

/// Create a new HTTP input endpoint.
async fn create_http_input_endpoint(
/// Lookup or create an HTTP input endpoint.
async fn get_or_create_http_input_endpoint(
state: &WebData<ServerState>,
format: FormatConfig,
table_name: String,
endpoint_name: String,
) -> Result<HttpInputEndpoint, PipelineError> {
let controller = state.controller()?;

// We rely on the name to uniquely encode connector configuration.
if let Some(reader) = controller.get_input_endpoint(&endpoint_name)
&& let Ok(endpoint) = reader.as_any().downcast::<HttpInputEndpoint>()
{
return Ok(endpoint.as_ref().clone());
}

let config = HttpInputConfig {
name: endpoint_name.clone(),
};
Expand All @@ -2035,8 +2044,6 @@ async fn create_http_input_endpoint(
},
};

// Connect endpoint.
let controller = state.controller()?;
if controller.register_api_connection().is_err() {
return Err(PipelineError::ApiConnectionLimit);
}
Expand Down Expand Up @@ -2064,38 +2071,43 @@ async fn input_endpoint(
args: Query<IngressArgs>,
payload: Payload,
) -> Result<HttpResponse, PipelineError> {
// A local cache of HTTP input endpoints. We create one endpoint per (table_name, format) pair
// in the controller. Caching them in a thread-local variable avoids acquiring the global controller
// lock on every HTTP request.
thread_local! {
static TABLE_ENDPOINTS: RefCell<HashMap<(String, FormatConfig), HttpInputEndpoint, BuildHasherDefault<DefaultHasher>>> = const {
static TABLE_ENDPOINTS: RefCell<HashMap<String, HttpInputEndpoint, BuildHasherDefault<DefaultHasher>>> = const {
RefCell::new(HashMap::with_hasher(BuildHasherDefault::new()))
};
}
debug!("{req:?}");

let table_name = path.into_inner();

// Generate endpoint name.
let endpoint_name = format!("api-ingress-{table_name}-{}", Uuid::new_v4());
let format = parser_config_from_http_request(&endpoint_name, &args.format, &req)?;
// Generate deterministic endpoint name per (table_name, FormatConfig).
let parser_endpoint_name = format!("{table_name}.api-ingress-{}", args.format);
let format = parser_config_from_http_request(&parser_endpoint_name, &args.format, &req)?;

let cached_endpoint = TABLE_ENDPOINTS.with(|endpoints| {
endpoints
.borrow()
.get(&(table_name.clone(), format.clone()))
.cloned()
});
let mut endpoint_hasher = DefaultHasher::new();
table_name.hash(&mut endpoint_hasher);
format.hash(&mut endpoint_hasher);
let endpoint_hash = endpoint_hasher.finish();

let endpoint_name = format!("{table_name}.api-ingress-{endpoint_hash:016x}");

let cached_endpoint =
TABLE_ENDPOINTS.with(|endpoints| endpoints.borrow().get(&endpoint_name).cloned());
let endpoint = match cached_endpoint {
Some(endpoint) => endpoint,
None => {
let endpoint = create_http_input_endpoint(
let endpoint = get_or_create_http_input_endpoint(
&state,
format.clone(),
table_name.clone(),
endpoint_name.clone(),
)
.await?;
TABLE_ENDPOINTS.with_borrow_mut(|endpoints| {
endpoints.insert((table_name, format), endpoint.clone())
});
TABLE_ENDPOINTS
.with_borrow_mut(|endpoints| endpoints.insert(endpoint_name, endpoint.clone()));
endpoint
}
};
Expand Down Expand Up @@ -2188,7 +2200,7 @@ async fn output_endpoint(
let table_name = path.into_inner();

// Generate endpoint name depending on the query and output mode.
let endpoint_name = format!("api-{}-{table_name}", Uuid::new_v4());
let endpoint_name = format!("{table_name}.api-{}", Uuid::new_v4());

// debug!("Endpoint name: '{endpoint_name}'");

Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/adhoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,10 @@ impl TransportInputEndpoint for AdHocInputEndpoint {
}

impl InputReader for AdHocInputEndpoint {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
match command {
InputReaderCommand::Replay { data, .. } => {
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/clock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,10 @@ impl ClockReader {
}

impl InputReader for ClockReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.sender.send(command);
}
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ impl FileInputReader {
}

impl InputReader for FileInputReader {
fn as_any(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: super::InputReaderCommand) {
let _ = self.sender.send(command);
self.unparker.unpark();
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/http/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ impl TransportInputEndpoint for HttpInputEndpoint {
}

impl InputReader for HttpInputEndpoint {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.sender.send(command);
}
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/kafka/ft/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,10 @@ impl TransportInputEndpoint for KafkaFtInputEndpoint {
}

impl InputReader for KafkaFtInputReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.command_sender.send(command);
self.poller_thread.unpark();
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/nats/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,10 @@ impl Canceller {
}

impl InputReader for NatsReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.command_sender.send(command);
}
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/nexmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ impl InputGenerator {
}

impl InputReader for InputGenerator {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
match self.table {
NexmarkTable::Bid => {
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/pubsub/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ impl PubSubReader {
}

impl InputReader for PubSubReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.state_sender.send(command.as_nonft().unwrap());
}
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/s3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ struct S3InputReader {
}

impl InputReader for S3InputReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.sender.send_blocking(command);
}
Expand Down
4 changes: 4 additions & 0 deletions crates/adapters/src/transport/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,10 @@ impl UrlInputReader {
}

impl InputReader for UrlInputReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.sender.send(command);
}
Expand Down
4 changes: 4 additions & 0 deletions crates/datagen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,10 @@ fn assign_work(
}

impl InputReader for InputGenerator {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
let _ = self.command_sender.send(command);
self.datagen_unparker.unpark();
Expand Down
4 changes: 4 additions & 0 deletions crates/feldera-types/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1669,6 +1669,10 @@ impl TransportConfig {
| TransportConfig::ClockInput(_)
)
}

pub fn is_http_input(&self) -> bool {
matches!(self, TransportConfig::HttpInput(_))
}
}

/// Data format specification used to parse raw data received from the
Expand Down
7 changes: 7 additions & 0 deletions crates/feldera-types/src/pipeline_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,13 @@ impl PipelineDiff {
|| self.modified_output_connectors.contains(&connector_name)
}

pub fn is_affected_relation(&self, relation_name: &str) -> bool {
self.program_diff
.as_ref()
.map(|diff| diff.is_affected_relation(relation_name))
.unwrap_or(false)
}

pub fn program_diff(&self) -> Option<&ProgramDiff> {
self.program_diff.as_ref()
}
Expand Down
4 changes: 4 additions & 0 deletions crates/iceberg/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ impl IcebergInputReader {
}

impl InputReader for IcebergInputReader {
fn as_any(self: Arc<Self>) -> Arc<dyn std::any::Any + Send + Sync> {
self
}

fn request(&self, command: InputReaderCommand) {
match command.as_nonft().unwrap() {
NonFtInputReaderCommand::Queue => self.inner.queue.queue(),
Expand Down
Loading
Loading