Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions crates/adapters/src/integrated/delta_table/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1118,13 +1118,12 @@ impl DeltaTableInputEndpointInner {
)
.await;

if self.config.end_version.is_some()
&& self.config.end_version <= Some(new_version)
if let Some(end_version) = self.config.end_version
&& end_version <= new_version
{
info!(
"delta_table {}: reached table version {} specified as 'end_version' in connector config: stopping the connector",
&self.endpoint_name,
self.config.end_version.unwrap()
&self.endpoint_name, end_version
);
self.metrics
.phase
Expand Down
2 changes: 1 addition & 1 deletion crates/dbsp/src/circuit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub use dbsp_handle::{
LayoutError, Mode, StorageCacheConfig, StorageConfig, StorageOptions, adaptive_joins_enabled,
balancer_balance_tax, balancer_key_distribution_refresh_threshold,
balancer_min_absolute_improvement_threshold, balancer_min_relative_improvement_threshold,
splitter_output_chunk_size,
max_level0_batch_size_records, splitter_output_chunk_size,
};
pub use runtime::{
Error as RuntimeError, LocalStore, LocalStoreMarker, Runtime, RuntimeHandle, WeakRuntime,
Expand Down
9 changes: 9 additions & 0 deletions crates/dbsp/src/circuit/dbsp_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::operator::dynamic::balance::{
use crate::storage::backend::StorageError;
use crate::storage::file::BLOOM_FILTER_FALSE_POSITIVE_RATE;
use crate::trace::MergerType;
use crate::trace::spine_async::MAX_LEVEL0_BATCH_SIZE_RECORDS;
use crate::{
Error as DbspError, RootCircuit, Runtime, RuntimeError, circuit::runtime::RuntimeHandle,
profile::Profiler,
Expand Down Expand Up @@ -392,6 +393,9 @@ pub struct DevTweaks {
///
/// Values outside the valid range, such as 0.0, disable Bloom filters.
pub bloom_false_positive_rate: f64,

/// Maximum batch size in records for level 0 merges.
pub max_level0_batch_size_records: u16,
}

impl Default for DevTweaks {
Expand All @@ -409,6 +413,7 @@ impl Default for DevTweaks {
balancer_balance_tax: BALANCE_TAX,
balancer_key_distribution_refresh_threshold: KEY_DISTRIBUTION_REFRESH_THRESHOLD,
adaptive_joins: false,
max_level0_batch_size_records: MAX_LEVEL0_BATCH_SIZE_RECORDS,
}
}
}
Expand Down Expand Up @@ -456,6 +461,10 @@ pub fn adaptive_joins_enabled() -> bool {
Runtime::with_dev_tweaks(|d| d.adaptive_joins)
}

pub fn max_level0_batch_size_records() -> u16 {
Runtime::with_dev_tweaks(|d| d.max_level0_batch_size_records)
}

/// Configuration for storage in a [Runtime]-hosted circuit.
#[derive(Clone, derive_more::Debug)]
pub struct CircuitStorageConfig {
Expand Down
4 changes: 2 additions & 2 deletions crates/dbsp/src/operator/dynamic/balance/balancer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -905,11 +905,11 @@ impl BalancerInner {
// println!("fixed_policy: {:?}, hints: {:?}", fixed_policy, hints);

if fixed_policy.is_some()
&& hints.policy_hint.is_some()
&& let Some(policy_hint) = hints.policy_hint
&& fixed_policy != hints.policy_hint
{
return Err(BalancerError::InvalidPolicyHint(
hints.policy_hint.unwrap(),
policy_hint,
format!(
"the current policy {fixed_policy:?} can no longer be changed during the current transaction"
),
Expand Down
55 changes: 32 additions & 23 deletions crates/dbsp/src/operator/dynamic/group/lag.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::{GroupTransformer, Monotonicity};
use crate::Circuit;
use crate::algebra::{OrdIndexedZSetFactories, ZRingValue};
use crate::operator::dynamic::filter_map::DynFilterMap;
use crate::{
Expand Down Expand Up @@ -151,29 +152,37 @@ where
OV: DataTrait + ?Sized,
B: for<'a> DynFilterMap<DynItemRef<'a> = (&'a K, &'a V)>,
{
self.dyn_map_index(
&factories.lag_factories.input_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_lag(persistent_id, &factories.lag_factories, offset, project)
.dyn_map_index(
&factories.output_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
let (v1, v2) = v.split();
k.clone_to(out_k);
decode(v1, v2, out_v);
}),
)
let name = if offset > 0 {
format!("lag_custom_order_{offset}")
} else {
format!("lead_custom_order_{}", -offset)
};

self.circuit().region(&name, || {
self.dyn_map_index(
&factories.lag_factories.input_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_lag(persistent_id, &factories.lag_factories, offset, project)
.dyn_map_index(
&factories.output_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
let (v1, v2) = v.split();
k.clone_to(out_k);
decode(v1, v2, out_v);
}),
)
})
}
}

Expand Down
218 changes: 113 additions & 105 deletions crates/dbsp/src/operator/dynamic/group/topk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use dyn_clone::clone_box;

use super::{DiffGroupTransformer, Monotonicity, NonIncrementalGroupTransformer};
use crate::{
DBData, DBWeight, DynZWeight, RootCircuit, Stream, ZWeight,
Circuit, DBData, DBWeight, DynZWeight, RootCircuit, Stream, ZWeight,
algebra::{
AddAssignByRef, HasOne, HasZero, IndexedZSet, OrdIndexedZSet, OrdIndexedZSetFactories,
ZCursor, ZRingValue,
Expand Down Expand Up @@ -217,36 +217,38 @@ where
where
V2: DataTrait + ?Sized,
{
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.inner_factories,
Box::new(DiffGroupTransformer::new(
factories.inner_factories.val_factory(),
TopK::asc(factories.inner_factories.val_factory(), k),
)),
)
.dyn_map_index(
&factories.input_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
decode(v).clone_to(out_v);
}),
)
self.circuit().region(&format!("topk_{k}"), || {
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.inner_factories,
Box::new(DiffGroupTransformer::new(
factories.inner_factories.val_factory(),
TopK::asc(factories.inner_factories.val_factory(), k),
)),
)
.dyn_map_index(
&factories.input_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
decode(v).clone_to(out_v);
}),
)
})
}

/// See [`Stream::topk_rank_custom_order`].
Expand All @@ -263,33 +265,35 @@ where
V2: DataTrait + ?Sized,
OV: DataTrait + ?Sized,
{
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.output_factories,
Box::new(DiffGroupTransformer::new(
factories.output_factories.val_factory(),
TopKRank::sparse(
self.circuit().region(&format!("topk_rank_{k}"), || {
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.output_factories,
Box::new(DiffGroupTransformer::new(
factories.output_factories.val_factory(),
k,
rank_eq_func,
output_func,
),
)),
)
TopKRank::sparse(
factories.output_factories.val_factory(),
k,
rank_eq_func,
output_func,
),
)),
)
})
}

/// See [`Stream::topk_dense_rank_custom_order`].
Expand All @@ -306,33 +310,35 @@ where
V2: DataTrait + ?Sized,
OV: DataTrait + ?Sized,
{
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.output_factories,
Box::new(DiffGroupTransformer::new(
factories.output_factories.val_factory(),
TopKRank::dense(
self.circuit().region(&format!("topk_dense_rank_{k}"), || {
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.output_factories,
Box::new(DiffGroupTransformer::new(
factories.output_factories.val_factory(),
k,
rank_eq_func,
output_func,
),
)),
)
TopKRank::dense(
factories.output_factories.val_factory(),
k,
rank_eq_func,
output_func,
),
)),
)
})
}

/// See [`Stream::topk_row_number_custom_order`].
Expand All @@ -348,28 +354,30 @@ where
V2: DataTrait + ?Sized,
OV: DataTrait + ?Sized,
{
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.output_factories,
Box::new(DiffGroupTransformer::new(
factories.output_factories.val_factory(),
TopKRowNumber::new(factories.output_factories.val_factory(), k, output_func),
)),
)
self.circuit().region(&format!("topk_row_number_{k}"), || {
self.dyn_map_index(
&factories.inner_factories,
Box::new(move |(k, v), kv| {
let (out_k, out_v) = kv.split_mut();
k.clone_to(out_k);
encode(v, out_v);
}),
)
.set_persistent_id(
persistent_id
.map(|name| format!("{name}-ordered"))
.as_deref(),
)
.dyn_group_transform(
persistent_id,
&factories.inner_factories,
&factories.output_factories,
Box::new(DiffGroupTransformer::new(
factories.output_factories.val_factory(),
TopKRowNumber::new(factories.output_factories.val_factory(), k, output_func),
)),
)
})
}
}

Expand Down
Loading
Loading