Skip to content

Commit 350e5dd

Browse files
committed
Wire up shared indexing split cache between indexers and compactors
1 parent 00adb52 commit 350e5dd

10 files changed

Lines changed: 132 additions & 25 deletions

File tree

quickwit/quickwit-cli/src/tool.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use std::io::{IsTerminal, Stdout, Write, stdout};
1717
use std::num::NonZeroUsize;
1818
use std::path::PathBuf;
1919
use std::str::FromStr;
20+
use std::sync::Arc;
2021
use std::time::{Duration, Instant};
2122
use std::{env, fmt, io};
2223

@@ -37,9 +38,9 @@ use quickwit_config::{
3738
TransformConfig,
3839
};
3940
use quickwit_index_management::{IndexService, clear_cache_directory};
40-
use quickwit_indexing::IndexingPipeline;
4141
use quickwit_indexing::actors::IndexingService;
4242
use quickwit_indexing::models::{DetachIndexingPipeline, IndexingStatistics, SpawnPipeline};
43+
use quickwit_indexing::{IndexingPipeline, IndexingSplitCache};
4344
use quickwit_ingest::IngesterPool;
4445
use quickwit_metastore::IndexMetadataResponseExt;
4546
use quickwit_proto::indexing::CpuCapacity;
@@ -417,6 +418,8 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result<
417418
&HashSet::from_iter([QuickwitService::Indexer]),
418419
)?;
419420
let universe = Universe::new();
421+
let split_cache =
422+
Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?);
420423
let indexing_server = IndexingService::new(
421424
config.node_id.clone(),
422425
config.data_dir_path.clone(),
@@ -428,6 +431,7 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result<
428431
IngesterPool::default(),
429432
storage_resolver,
430433
EventBroker::default(),
434+
split_cache,
431435
)
432436
.await?;
433437
let (indexing_server_mailbox, indexing_server_handle) =

quickwit/quickwit-compaction/src/compaction_pipeline.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ pub struct PipelineStatusUpdate {
5656
pub index_uid: IndexUid,
5757
pub source_id: SourceId,
5858
pub split_ids: Vec<SplitId>,
59-
pub merge_level: u64,
6059
pub status: PipelineStatus,
6160
}
6261

@@ -270,7 +269,6 @@ impl CompactionPipeline {
270269
.map(|split| split.split_id().to_string())
271270
.collect(),
272271
status: self.status.clone(),
273-
merge_level: self.merge_operation.merge_level() as u64,
274272
}
275273
}
276274

quickwit/quickwit-compaction/src/compactor_supervisor.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ pub struct CompactorSupervisor {
5959
io_throughput_limiter: Option<Limiter>,
6060
metastore: MetastoreServiceClient,
6161
storage_resolver: StorageResolver,
62+
split_cache: Arc<IndexingSplitCache>,
6263
max_concurrent_split_uploads: usize,
6364
event_broker: EventBroker,
6465

@@ -75,6 +76,7 @@ impl CompactorSupervisor {
7576
io_throughput_limiter: Option<Limiter>,
7677
metastore: MetastoreServiceClient,
7778
storage_resolver: StorageResolver,
79+
split_cache: Arc<IndexingSplitCache>,
7880
max_concurrent_split_uploads: usize,
7981
event_broker: EventBroker,
8082
compaction_root_directory: TempDirectory,
@@ -87,6 +89,7 @@ impl CompactorSupervisor {
8789
io_throughput_limiter,
8890
metastore,
8991
storage_resolver,
92+
split_cache,
9093
max_concurrent_split_uploads,
9194
event_broker,
9295
compaction_root_directory,
@@ -182,8 +185,7 @@ impl CompactorSupervisor {
182185

183186
let index_storage_uri = Uri::from_str(&assignment.index_storage_uri)?;
184187
let index_storage = self.storage_resolver.resolve(&index_storage_uri).await?;
185-
let split_cache = Arc::new(IndexingSplitCache::no_caching());
186-
let split_store = IndexingSplitStore::new(index_storage, split_cache);
188+
let split_store = IndexingSplitStore::new(index_storage, self.split_cache.clone());
187189

188190
let doc_mapper = build_doc_mapper(&doc_mapping, &search_settings)?;
189191
let merge_policy = merge_policy_from_settings(&indexing_settings);
@@ -332,6 +334,7 @@ mod tests {
332334
None,
333335
metastore,
334336
StorageResolver::for_test(),
337+
Arc::new(IndexingSplitCache::no_caching()),
335338
2,
336339
EventBroker::default(),
337340
TempDirectory::for_test(),
@@ -541,6 +544,7 @@ mod tests {
541544
None,
542545
metastore,
543546
StorageResolver::for_test(),
547+
Arc::new(IndexingSplitCache::no_caching()),
544548
2,
545549
EventBroker::default(),
546550
TempDirectory::for_test(),
@@ -581,15 +585,13 @@ mod tests {
581585
source_id: "src".to_string(),
582586
split_ids: vec!["s1".to_string(), "s2".to_string()],
583587
status: PipelineStatus::InProgress,
584-
merge_level: 1,
585588
},
586589
PipelineStatusUpdate {
587590
task_id: "task-2".to_string(),
588591
index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0),
589592
source_id: "src".to_string(),
590593
split_ids: vec!["s3".to_string()],
591594
status: PipelineStatus::Completed,
592-
merge_level: 1,
593595
},
594596
PipelineStatusUpdate {
595597
task_id: "task-3".to_string(),
@@ -599,7 +601,6 @@ mod tests {
599601
status: PipelineStatus::Failed {
600602
error: "boom".to_string(),
601603
},
602-
merge_level: 1,
603604
},
604605
];
605606

quickwit/quickwit-compaction/src/lib.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,22 @@
1414

1515
#![deny(clippy::disallowed_methods)]
1616

17-
#[allow(dead_code)]
1817
mod compaction_pipeline;
19-
#[allow(dead_code)]
2018
mod compactor_supervisor;
2119
mod metrics;
2220
pub mod planner;
2321

2422
pub type TaskId = String;
2523

24+
use std::sync::Arc;
25+
2626
pub use compactor_supervisor::CompactorSupervisor;
2727
use quickwit_actors::{Mailbox, Universe};
2828
use quickwit_common::io;
2929
use quickwit_common::pubsub::EventBroker;
3030
use quickwit_common::temp_dir::TempDirectory;
3131
use quickwit_config::CompactorConfig;
32+
use quickwit_indexing::IndexingSplitCache;
3233
use quickwit_proto::compaction::CompactionPlannerServiceClient;
3334
use quickwit_proto::metastore::MetastoreServiceClient;
3435
use quickwit_proto::types::{IndexUid, NodeId, SourceId};
@@ -47,11 +48,11 @@ pub async fn start_compactor_service(
4748
compactor_config: &CompactorConfig,
4849
metastore: MetastoreServiceClient,
4950
storage_resolver: StorageResolver,
51+
split_cache: Arc<IndexingSplitCache>,
5052
event_broker: EventBroker,
5153
compaction_root_directory: TempDirectory,
5254
) -> anyhow::Result<Mailbox<CompactorSupervisor>> {
5355
info!("starting compactor service");
54-
// TODO: configure this for real
5556
let io_throughput_limiter = compactor_config.max_merge_write_throughput.map(io::limiter);
5657
let supervisor = CompactorSupervisor::new(
5758
node_id,
@@ -60,6 +61,7 @@ pub async fn start_compactor_service(
6061
io_throughput_limiter,
6162
metastore,
6263
storage_resolver,
64+
split_cache,
6365
compactor_config.max_concurrent_split_uploads,
6466
event_broker,
6567
compaction_root_directory,

quickwit/quickwit-config/src/node_config/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ impl Default for IndexerConfig {
230230
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
231231
#[serde(deny_unknown_fields)]
232232
pub struct CompactorConfig {
233-
/// Maximum number of concurrent merge pipelines. Defaults to 2/3 of CPU count.
233+
/// Maximum number of concurrent merge pipelines. Defaults to CPU count.
234234
#[serde(default = "CompactorConfig::default_max_concurrent_pipelines")]
235235
pub max_concurrent_pipelines: NonZeroUsize,
236236
/// Maximum number of concurrent split uploads across all pipelines.

quickwit/quickwit-indexing/src/actors/indexing_service.rs

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ use quickwit_actors::{
2525
Observation,
2626
};
2727
use quickwit_cluster::Cluster;
28-
use quickwit_common::fs::get_cache_directory_path;
2928
use quickwit_common::pubsub::EventBroker;
3029
use quickwit_common::temp_dir;
3130
use quickwit_config::{
@@ -56,7 +55,7 @@ use tracing::{debug, error, info, warn};
5655

5756
use crate::models::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline};
5857
use crate::source::{AssignShards, Assignment};
59-
use crate::split_store::{IndexingSplitCache, SplitStoreQuota};
58+
use crate::split_store::IndexingSplitCache;
6059
use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics};
6160

6261
/// Name of the indexing directory, usually located at `<data_dir_path>/indexing`.
@@ -94,7 +93,7 @@ pub struct IndexingService {
9493
storage_resolver: StorageResolver,
9594
indexing_pipelines: HashMap<PipelineUid, PipelineHandle>,
9695
counters: IndexingServiceCounters,
97-
local_split_store: Arc<IndexingSplitCache>,
96+
split_cache: Arc<IndexingSplitCache>,
9897
max_concurrent_split_uploads: usize,
9998
cooperative_indexing_permits: Option<Arc<Semaphore>>,
10099
event_broker: EventBroker,
@@ -124,14 +123,8 @@ impl IndexingService {
124123
ingester_pool: IngesterPool,
125124
storage_resolver: StorageResolver,
126125
event_broker: EventBroker,
126+
split_cache: Arc<IndexingSplitCache>,
127127
) -> anyhow::Result<IndexingService> {
128-
let split_store_space_quota = SplitStoreQuota::try_new(
129-
indexer_config.split_store_max_num_splits,
130-
indexer_config.split_store_max_num_bytes,
131-
)?;
132-
let split_cache_dir_path = get_cache_directory_path(&data_dir_path);
133-
let local_split_store =
134-
IndexingSplitCache::open(split_cache_dir_path, split_store_space_quota).await?;
135128
let indexing_root_directory =
136129
temp_dir::create_or_purge_directory(&data_dir_path.join(INDEXING_DIR_NAME)).await?;
137130
let queue_dir_path = data_dir_path.join(QUEUES_DIR_NAME);
@@ -149,7 +142,7 @@ impl IndexingService {
149142
ingest_api_service_opt,
150143
ingester_pool,
151144
storage_resolver,
152-
local_split_store: Arc::new(local_split_store),
145+
split_cache,
153146
indexing_pipelines: Default::default(),
154147
counters: Default::default(),
155148
max_concurrent_split_uploads: indexer_config.max_concurrent_split_uploads,
@@ -245,7 +238,7 @@ impl IndexingService {
245238
})?;
246239
let merge_policy =
247240
crate::merge_policy::merge_policy_from_settings(&index_config.indexing_settings);
248-
let split_store = IndexingSplitStore::new(storage.clone(), self.local_split_store.clone());
241+
let split_store = IndexingSplitStore::new(storage.clone(), self.split_cache.clone());
249242

250243
let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings)
251244
.map_err(|error| IndexingError::Internal(error.to_string()))?;
@@ -828,6 +821,7 @@ mod tests {
828821
IngesterPool::default(),
829822
storage_resolver.clone(),
830823
EventBroker::default(),
824+
Arc::new(IndexingSplitCache::no_caching()),
831825
)
832826
.await
833827
.unwrap();
@@ -1435,6 +1429,7 @@ mod tests {
14351429
IngesterPool::default(),
14361430
storage_resolver.clone(),
14371431
EventBroker::default(),
1432+
Arc::new(IndexingSplitCache::no_caching()),
14381433
)
14391434
.await
14401435
.unwrap();

quickwit/quickwit-indexing/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#![deny(clippy::disallowed_methods)]
1616

17+
use std::sync::Arc;
18+
1719
use quickwit_actors::{Mailbox, Universe};
1820
use quickwit_cluster::Cluster;
1921
use quickwit_common::pubsub::EventBroker;
@@ -71,6 +73,7 @@ pub async fn start_indexing_service(
7173
ingester_pool: IngesterPool,
7274
storage_resolver: StorageResolver,
7375
event_broker: EventBroker,
76+
indexing_split_cache: Arc<IndexingSplitCache>,
7477
) -> anyhow::Result<Mailbox<IndexingService>> {
7578
info!("starting indexer service");
7679
let ingest_api_service_mailbox = universe.get_one::<IngestApiService>();
@@ -85,6 +88,7 @@ pub async fn start_indexing_service(
8588
ingester_pool,
8689
storage_resolver,
8790
event_broker,
91+
indexing_split_cache,
8892
)
8993
.await?;
9094
let (indexing_service, _) = universe.spawn_builder().spawn(indexing_service);

quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ use std::time::{Duration, SystemTime};
2121

2222
use anyhow::Context;
2323
use bytesize::ByteSize;
24+
use quickwit_common::fs::get_cache_directory_path;
2425
use quickwit_common::split_file;
26+
use quickwit_config::IndexerConfig;
2527
use quickwit_directories::BundleDirectory;
2628
use quickwit_storage::StorageResult;
2729
use tantivy::Directory;
@@ -364,6 +366,31 @@ impl IndexingSplitCache {
364366
IndexingSplitCache { inner }
365367
}
366368

369+
/// Builds an [`IndexingSplitCache`] from an [`IndexerConfig`].
370+
///
371+
/// A zero quota for either dimension produces a [`IndexingSplitCache::no_caching`]
372+
/// instance — useful when compaction runs on dedicated nodes and indexers no
373+
/// longer benefit from caching freshly produced splits. Otherwise, opens the
374+
/// cache rooted at `<data_dir>/indexer-split-cache/splits`.
375+
pub async fn from_config(
376+
indexer_config: &IndexerConfig,
377+
data_dir_path: &Path,
378+
) -> anyhow::Result<IndexingSplitCache> {
379+
if indexer_config.split_store_max_num_bytes.as_u64() == 0
380+
|| indexer_config.split_store_max_num_splits == 0
381+
{
382+
return Ok(IndexingSplitCache::no_caching());
383+
}
384+
let cache_path = get_cache_directory_path(data_dir_path);
385+
let quota = SplitStoreQuota::try_new(
386+
indexer_config.split_store_max_num_splits,
387+
indexer_config.split_store_max_num_bytes,
388+
)?;
389+
IndexingSplitCache::open(cache_path, quota)
390+
.await
391+
.context("failed to open indexing split cache")
392+
}
393+
367394
/// Try to open an existing local split store directory.
368395
///
369396
/// If the directory does not exists, it will be created.
@@ -511,6 +538,7 @@ mod tests {
511538
use std::time::Duration;
512539

513540
use bytesize::ByteSize;
541+
use quickwit_config::IndexerConfig;
514542
use quickwit_directories::BundleDirectory;
515543
use quickwit_storage::{PutPayload, SplitPayloadBuilder};
516544
use tantivy::Directory;
@@ -533,6 +561,54 @@ mod tests {
533561
Ok(())
534562
}
535563

564+
#[tokio::test]
565+
async fn test_from_config() {
566+
// A zero quota in either dimension yields a no-caching cache that does
567+
// not touch the filesystem; a positive quota opens (and creates) the
568+
// cache directory at `<data_dir>/indexer-split-cache/splits`.
569+
let zero_bytes = {
570+
let mut config = IndexerConfig::for_test().unwrap();
571+
config.split_store_max_num_bytes = ByteSize(0);
572+
config
573+
};
574+
let zero_splits = {
575+
let mut config = IndexerConfig::for_test().unwrap();
576+
config.split_store_max_num_splits = 0;
577+
config
578+
};
579+
let both_zero = {
580+
let mut config = IndexerConfig::for_test().unwrap();
581+
config.split_store_max_num_bytes = ByteSize(0);
582+
config.split_store_max_num_splits = 0;
583+
config
584+
};
585+
for config in [zero_bytes, zero_splits, both_zero] {
586+
let data_dir = tempdir().unwrap();
587+
let _cache = IndexingSplitCache::from_config(&config, data_dir.path())
588+
.await
589+
.unwrap();
590+
assert!(
591+
!data_dir
592+
.path()
593+
.join("indexer-split-cache")
594+
.try_exists()
595+
.unwrap(),
596+
"no-caching variant must not create the cache directory",
597+
);
598+
}
599+
600+
let data_dir = tempdir().unwrap();
601+
let config = IndexerConfig::for_test().unwrap();
602+
let _cache = IndexingSplitCache::from_config(&config, data_dir.path())
603+
.await
604+
.unwrap();
605+
let cache_dir = data_dir.path().join("indexer-split-cache").join("splits");
606+
assert!(
607+
cache_dir.is_dir(),
608+
"positive quota must open (and create) the cache directory",
609+
);
610+
}
611+
536612
#[tokio::test]
537613
async fn test_local_split_store_load_existing_splits() -> anyhow::Result<()> {
538614
let temp_dir = tempfile::tempdir()?;

0 commit comments

Comments
 (0)