-
Notifications
You must be signed in to change notification settings - Fork 108
Expand file tree
/
Copy pathcontroller.rs
More file actions
7250 lines (6569 loc) · 271 KB
/
controller.rs
File metadata and controls
7250 lines (6569 loc) · 271 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! An I/O controller that coordinates the creation, reconfiguration,
//! teardown of input/output adapters, and implements runtime flow control.
//!
//! # Design
//!
//! The circuit thread owns the `DBSPHandle` and calls `step()` on it whenever
//! there is some input data available for the circuit. It can be configured
//! to improve batching by slightly delaying the `step()` call if the number of
//! available input records is below some used-defined threshold.
//!
//! The backpressure thread controls the flow of data through transport
//! endpoints, pausing the endpoints either when the amount of data buffered by
//! the endpoint exceeds a user-defined threshold or in response to an explicit
//! user request.
//!
//! Both tasks require monitoring the state of the input buffers. To this end,
//! the controller expects transports to report the number of bytes and records
//! buffered via `InputConsumer::buffered`.
use crate::catalog::OutputCollectionHandles;
use crate::controller::checkpoint::{
CheckpointInputEndpointMetrics, CheckpointOffsets, CheckpointOutputEndpointMetrics,
};
use crate::controller::journal::Journal;
use crate::controller::stats::{InputEndpointMetrics, OutputEndpointMetrics, ProcessedRecords};
use crate::controller::sync::{
CHECKPOINT_SYNC_PULL_DURATION_SECONDS, CHECKPOINT_SYNC_PULL_FAILURES,
CHECKPOINT_SYNC_PULL_SUCCESS, CHECKPOINT_SYNC_PULL_TRANSFER_SPEED,
CHECKPOINT_SYNC_PULL_TRANSFERRED_BYTES, CHECKPOINT_SYNC_PUSH_DURATION_SECONDS,
CHECKPOINT_SYNC_PUSH_FAILURES, CHECKPOINT_SYNC_PUSH_SUCCESS,
CHECKPOINT_SYNC_PUSH_TRANSFER_SPEED, CHECKPOINT_SYNC_PUSH_TRANSFERRED_BYTES, SYNCHRONIZER,
};
use crate::samply::SamplySpan;
use crate::server::metrics::{HistogramDiv, LabelStack, MetricsFormatter, MetricsWriter, Value};
use crate::server::{InitializationState, ServerState};
use crate::transport::Step;
use crate::transport::clock::now_endpoint_config;
use crate::transport::{input_transport_config_to_endpoint, output_transport_config_to_endpoint};
use crate::util::{LongOperationWarning, run_on_thread_pool};
use crate::{
CircuitCatalog, Encoder, InputConsumer, OutputConsumer, OutputEndpoint, ParseError,
PipelineState, TransportInputEndpoint,
};
use crate::{PipelinePhase, create_integrated_output_endpoint};
use anyhow::{Context, Error as AnyError, anyhow};
use arrow::datatypes::Schema;
use arrow::util::pretty::pretty_format_batches;
use atomic::Atomic;
use checkpoint::Checkpoint;
use chrono::{DateTime, Utc};
use crossbeam::{
queue::SegQueue,
sync::{Parker, ShardedLock, Unparker},
};
use datafusion::prelude::*;
use dbsp::circuit::circuit_builder::BootstrapInfo;
use dbsp::circuit::metrics::{
COMPACTION_STALL_TIME_NANOSECONDS, DBSP_OPERATOR_COMMIT_LATENCY_MICROSECONDS, DBSP_STEP,
DBSP_STEP_LATENCY_MICROSECONDS, FILES_CREATED, FILES_DELETED, TOTAL_LATE_RECORDS,
};
use dbsp::circuit::tokio::TOKIO;
use dbsp::circuit::{CheckpointCommitter, CircuitStorageConfig, DevTweaks, Mode};
use dbsp::storage::backend::{StorageBackend, StoragePath};
use dbsp::{
DBSPHandle,
circuit::{CircuitConfig, Layout},
profile::{DbspProfile, GraphProfile},
};
use dbsp::{Runtime, WeakRuntime};
use enum_map::EnumMap;
use feldera_adapterlib::format::BufferSize;
use feldera_adapterlib::metrics::{ConnectorMetrics, ValueType};
use feldera_adapterlib::transport::{Resume, Watermark};
use feldera_ir::LirCircuit;
use feldera_storage::histogram::{ExponentialHistogram, ExponentialHistogramSnapshot};
use feldera_storage::metrics::{
READ_BLOCKS_BYTES, READ_LATENCY_MICROSECONDS, SYNC_LATENCY_MICROSECONDS, WRITE_BLOCKS_BYTES,
WRITE_LATENCY_MICROSECONDS,
};
use feldera_types::adapter_stats::{
ConnectorHealth, ExternalControllerStatus, ExternalInputEndpointStatus,
ExternalOutputEndpointStatus,
};
use feldera_types::checkpoint::CheckpointMetadata;
use feldera_types::coordination::{
self, AdHocCatalog, AdHocTableType, CheckpointCoordination, Completion, StepAction, StepInputs,
StepRequest, StepStatus, TransactionCoordination,
};
use feldera_types::format::json::JsonLines;
use feldera_types::pipeline_diff::PipelineDiff;
use feldera_types::runtime_status::BootstrapPolicy;
use feldera_types::secret_resolver::resolve_secret_references_in_connector_config;
use feldera_types::suspend::{PermanentSuspendError, SuspendError, TemporarySuspendError};
use feldera_types::time_series::SampleStatistics;
use feldera_types::transaction::{StartTransactionResponse, TransactionId};
use governor::DefaultDirectRateLimiter;
use governor::Quota;
use governor::RateLimiter;
use itertools::Itertools;
use journal::StepMetadata;
use memory_stats::memory_stats;
use nonzero_ext::nonzero;
use rmpv::Value as RmpValue;
use serde_json::Value as JsonValue;
use stats::StepResults;
use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::HashSet;
use std::collections::btree_map::Entry;
use std::io::ErrorKind;
use std::mem::replace;
use std::ops::Range;
use std::panic::{AssertUnwindSafe, catch_unwind};
use std::path::PathBuf;
use std::sync::mpsc::{Receiver, SendError, Sender, SyncSender, channel, sync_channel};
use std::sync::{LazyLock, Mutex, Weak};
use std::thread::{self, sleep};
use std::{
collections::{BTreeMap, BTreeSet},
mem,
sync::{
Arc,
atomic::{AtomicBool, AtomicU64, Ordering},
},
thread::JoinHandle,
time::{Duration, Instant},
};
use tokio::sync::Notify;
use tokio::{
fs::File,
io::{AsyncReadExt, BufReader},
sync::{
Mutex as TokioMutex,
oneshot::{self, error::TryRecvError},
},
task::spawn_blocking,
};
use tracing::{debug, debug_span, error, info, trace, warn};
use uuid::Uuid;
use validate::validate_config;
mod checkpoint;
mod error;
mod journal;
mod pipeline_diff;
mod stats;
mod sync;
mod validate;
use crate::adhoc::table::AdHocTable;
use crate::adhoc::{create_session_context, execute_sql};
use crate::catalog::{SerBatchReader, SerTrace};
use crate::format::parquet::relation_to_arrow_fields;
use crate::format::{MessageOrientedPreprocessedParser, StreamingPreprocessedParser};
use crate::format::{get_input_format, get_output_format};
use crate::integrated::create_integrated_input_endpoint;
pub use error::{ConfigError, ControllerError};
pub use feldera_types::config::{
ConnectorConfig, FormatConfig, InputEndpointConfig, OutputEndpointConfig, PipelineConfig,
RuntimeConfig, TransportConfig,
};
use feldera_types::config::{
DEFAULT_MAX_WORKER_BATCH_SIZE, FileBackendConfig, FtConfig, FtModel, OutputBufferConfig,
StorageBackendConfig, SyncConfig,
};
use feldera_types::constants::{STATE_FILE, STEPS_FILE};
use feldera_types::format::json::{JsonFlavor, JsonParserConfig, JsonUpdateFormat};
use feldera_types::program_schema::{SqlIdentifier, canonical_identifier};
pub use pipeline_diff::compute_pipeline_diff;
pub use stats::{CompletionToken, ControllerStatus, InputEndpointStatus};
/// Maximal number of concurrent API connections per circuit
/// (including both input and output connections).
// TODO: make this configurable.
pub(crate) const MAX_API_CONNECTIONS: u64 = 100;
pub type EndpointId = u64;
/// Runtime of checkpoint operations, in microseconds, including time that the
/// pipeline could continue executing while the checkpoint completed.
static CHECKPOINT_RUNTIME: ExponentialHistogram = ExponentialHistogram::new();
/// Sub-duration of [CHECKPOINT_RUNTIME] during which pipeline execution was
/// blocked.
static CHECKPOINT_DELAY: ExponentialHistogram = ExponentialHistogram::new();
/// Amount of storage written during checkpoint operations, in megabytes.
///
/// These values are approximate because they include all storage writes while a
/// checkpoint is being written, which means that they include writes from
/// ongoing background merges.
static CHECKPOINT_WRITTEN_MEGABYTES: ExponentialHistogram = ExponentialHistogram::new();
/// Duration of transaction ingest time, that is, from transaction start to
/// start of commit.
static TRANSACTION_INGEST_TIME: ExponentialHistogram = ExponentialHistogram::new();
/// Duration of transaction commit time, that is, from starting commit to
/// finishing commit.
static TRANSACTION_COMMIT_TIME: ExponentialHistogram = ExponentialHistogram::new();
/// Number of records successfully processed at the time of the last successful
/// checkpoint.
static CHECKPOINT_PROCESSED_RECORDS: AtomicU64 = AtomicU64::new(0);
/// Interval between updating statistics for transaction commit.
static COMMIT_UPDATE_INTERVAL: Duration = Duration::from_secs(1);
/// Interval between logging updates about transaction commit.
static COMMIT_DISPLAY_INTERVAL: Duration = Duration::from_secs(10);
/// Creates a [Controller].
pub struct ControllerBuilder {
pub config: PipelineConfig,
/// Optionally, specifies a [Layout] to use for the circuit. In the
/// single-host case, `config.global.workers` is enough to create a layout,
/// so this isn't necessary. This is for the multihost case, where each
/// host has a different layout (each host has a different subset of the
/// workers).
layout: Option<Layout>,
storage: Option<CircuitStorageConfig>,
}
/// Controller creation is a three-step process:
/// 1. A `ControllerBuilder` object is created with the user-provided pipeline config.
/// The builder is responsble for retrieving pipeline's checkpoints in the Standby mode.
/// 2. A `ControllerInit` object is created by the builder. During initialization, it
/// opens the latest available checkpoint, computes the diff between the current and the
/// checkpointed programs and constructs the final pipeline configuration.
/// 3. The `ControllerInit::init` method instantiates the controller. As part of
/// initialization, the controller uses the computed pipeline diff along with the
/// contents of the initial checkpoint to determine whether the pipeline requires
/// bootstrapping, waits for the user to approve bootstrapping (if necessary),
/// initializes the connectors and finally starts running the circuit.
impl ControllerBuilder {
/// Prepares to create a new [Controller] configured with `config`.
///
/// If `config` includes storage configuration, this opens the storage
/// backend, so that it can be used before building the controller.
///
/// Use [build](Self::build) to finish building the controller.
pub(crate) fn new(config: &PipelineConfig) -> Result<Self, ControllerError> {
validate_config(config)?;
let storage = config
.storage()
.map(|(storage_config, storage_options)| {
CircuitStorageConfig::for_config(storage_config.clone(), storage_options.clone())
.map_err(|error| {
ControllerError::storage_error("failed to initialize storage", error)
})
})
.transpose()?;
if storage.is_none() {
if config.global.fault_tolerance.is_enabled() {
return Err(ControllerError::Config {
config_error: Box::new(ConfigError::FtRequiresStorage),
});
}
info!(
"storage not configured, so suspend-and-resume and fault tolerance will not be available"
);
}
Ok(Self {
config: config.clone(),
layout: None,
storage,
})
}
/// Checks if we need to pull a checkpoint from S3.
/// Useful to set the pipeline `InitializationState` to `DownloadingCheckpoint`.
pub(crate) fn is_pull_necessary(&self) -> Option<&SyncConfig> {
#[cfg(feature = "feldera-enterprise")]
{
self.storage
.as_ref()
.and_then(|s| sync::is_pull_necessary(s))
}
#[cfg(not(feature = "feldera-enterprise"))]
None
}
/// Pulls the latest checkpoint just once from S3.
pub(crate) fn pull_once(&self, _sync: &SyncConfig) -> Result<(), ControllerError> {
#[cfg(feature = "feldera-enterprise")]
if let Some(storage) = &self.storage {
return sync::pull_once(storage, _sync);
};
Ok(())
}
/// Continuously pull the latest checkpoint from S3.
pub(crate) fn continuous_pull<F>(&self, _is_activated: F) -> Result<(), ControllerError>
where
F: Fn() -> bool,
{
#[cfg(feature = "feldera-enterprise")]
if let Some(storage) = &self.storage {
sync::continuous_pull(storage, _is_activated)
} else {
Err(ControllerError::InvalidStandby(
"standby mode requires storage configuration",
))
}
#[cfg(not(feature = "feldera-enterprise"))]
Err(ControllerError::EnterpriseFeature("standby"))
}
pub(crate) fn with_layout(self, layout: Layout) -> Self {
Self {
layout: Some(layout),
..self
}
}
/// Creates a [ControllerInit] that will open the specific
/// `checkpoint_uuid`.
pub(crate) fn open_checkpoint(
self,
checkpoint_uuid: Uuid,
) -> Result<ControllerInit, ControllerError> {
ControllerInit::with_checkpoint(
self.layout,
self.config.clone(),
self.storage.clone().unwrap(),
checkpoint_uuid,
)
}
/// Creates a [ControllerInit] that will start fresh without using a
/// checkpoint.
pub(crate) fn open_without_checkpoint(self) -> Result<ControllerInit, ControllerError> {
ControllerInit::without_checkpoint(self.layout, self.config.clone(), self.storage.clone())
}
/// Creates a [ControllerInit] that will start from the latest checkpoint,
/// if there is one, or start fresh without a checkpoint otherwise.
pub(crate) fn open_latest_checkpoint(self) -> Result<ControllerInit, ControllerError> {
ControllerInit::with_latest_checkpoint(
self.layout,
self.config.clone(),
self.storage.clone(),
)
}
pub(crate) fn storage(&self) -> Option<Arc<dyn StorageBackend>> {
self.storage.as_ref().map(|storage| storage.backend.clone())
}
}
/// Controller that coordinates the creation, reconfiguration, teardown of
/// input/output adapters, and implements runtime flow control.
///
/// The controller instantiates the input and output pipelines according to a
/// user-provided [configuration](`PipelineConfig`) and exposes an API to
/// reconfigure and monitor the pipelines at runtime.
///
/// # Lifecycle
///
/// A pipeline process has a [PipelineState], which is the state requested by
/// the client, one of [Running], [Paused], or [Terminated]. This state is
/// initially as set by the `ControllerBuilder`, which defaults to [Paused].
/// Calls to [start], [pause], [initiate_stop], and [stop] change the
/// client-requested state. Once the state is set to [Terminated], it can never
/// be changed back to [Running] or [Paused].
///
/// The following diagram illustrates internal pipeline process states and their
/// possible transitions:
///
/// ```text
/// ┌──Initializing──┐
/// │ │ │
/// │ │ │
/// ▼ │ ▼
/// Replaying │ Bootstrapping
/// │ │ │
/// │ │ │
/// │ │ │
/// ▼ ▼ ▼
/// ┌───────────────────────────┐
/// │ (default) │
/// │ Paused◄────────►Running │
/// │ │ │ │
/// │ │ │ │
/// │ └──►Terminated◄──┘ │
/// └───────────────────────────┘
/// client-requested state
/// ```
///
/// The following list describes states and transitions in more details:
///
/// * Initializing: Before the circuit thread starts its main loop, the pipeline
/// can be considered to be initializing. This transitions to one of
/// replaying, bootstrapping, or the client-requested state.
///
/// * Replaying (aka restoring): If fault tolerance is enabled (whether
/// [FtModel::AtLeastOnce] or [FtModel::ExactlyOnce]), the pipeline reads and
/// replay the steps in the journal. When replay is done, the pipeline
/// transitions to the client-requested state.
///
/// Adding and removing input and output connectors, and ad-hoc queries, will
/// fail while a pipeline is replaying.
///
/// [is_replaying] reports whether the pipeline is currently replaying.
///
/// * Bootstrapping: If the pipeline is resuming from a checkpoint, and the
/// circuit was modified since the checkpoint, then the pipeline process
/// "bootstrap" the circuit to adjust the results to match the new
/// circuit. When bootstrapping is done, the pipeline transitions to the
/// client-requested state. Bootstrapping and replaying are currently
/// mutually exclusive--if both would be required, the pipeline process gives
/// up and fails the circuit.
///
/// Adding and removing input connectors will fail with an error while the
/// pipeline is bootstrapping.
///
/// * Paused: In this state, the pipeline tells input connectors to stop reading
/// new records into their input buffers. However, if accumulated records
/// already exist in their buffers, the circuit will continue to execute steps
/// until all of them are drained (for pipeline setups with deep buffers or a
/// slow circuit, this can take minutes or longer). This state transitions to
/// running or terminated in response to client request.
///
/// * Running: In this process, the pipeline tells input connectors to read new
/// records into their input buffers (up to a per-connector, configurable
/// buffer limit). When enough records accumulate or a timer expires, or when
/// the clock ticks (all of these are configurable), the circuit steps. This
/// state transitions to paused or terminated in response to client request.
///
/// * Terminated: the circuit is dead and won't come back without creating a new
/// pipeline process. This state never transitions.
///
/// [Running]: PipelineState::Running
/// [Paused]: PipelineState::Paused
/// [Terminated]: PipelineState::Terminated
/// [start]: Controller::start
/// [stop]: Controller::stop
/// [pause]: Controller::pause
/// [initiate_stop]: Controller::initiate_stop
/// [is_replaying]: Controller::is_replaying
#[derive(Clone)]
pub struct Controller {
inner: Arc<ControllerInner>,
/// The circuit thread handle (see module-level docs).
#[allow(clippy::type_complexity)]
circuit_thread_handle: Arc<Mutex<Option<JoinHandle<Result<(), ControllerError>>>>>,
}
/// Type of the callback argument to [`Controller::start_graph_profile`].
pub type GraphProfileCallbackFn = Box<dyn FnOnce(Result<GraphProfile, ControllerError>) + Send>;
/// Type of the callback argument to [`Controller::start_json_profile`].
pub type JsonProfileCallbackFn = Box<dyn FnOnce(Result<DbspProfile, ControllerError>) + Send>;
/// Type of the callback argument to [`Controller::start_checkpoint`].
pub type CheckpointCallbackFn = Box<dyn FnOnce(Result<Checkpoint, Arc<ControllerError>>) + Send>;
/// Type of the callback argument to [`Controller::start_suspend`].
pub type SuspendCallbackFn = Box<dyn FnOnce(Result<(), Arc<ControllerError>>) + Send>;
/// Type of the callback argument to [`Controller::start_sync_checkpoint`].
pub type SyncCheckpointCallbackFn = Box<dyn FnOnce(Result<(), Arc<ControllerError>>) + Send>;
/// Rebalance callback argument to [`Controller::rebalance`].
pub type RebalanceCallbackFn = Box<dyn FnOnce(Result<(), ControllerError>) + Send>;
/// A command that [Controller] can send to [Controller::circuit_thread].
///
/// There is no type for a command reply. Instead, the command implementation
/// uses a callback embedded in the command to reply.
enum Command {
GraphProfile(GraphProfileCallbackFn),
JsonProfile(JsonProfileCallbackFn),
Checkpoint(CheckpointCallbackFn),
Suspend(SuspendCallbackFn),
SyncCheckpoint((uuid::Uuid, SyncCheckpointCallbackFn)),
Rebalance(RebalanceCallbackFn),
}
impl Command {
pub fn flush(self) {
match self {
Command::GraphProfile(callback) => callback(Err(ControllerError::ControllerExit)),
Command::JsonProfile(callback) => callback(Err(ControllerError::ControllerExit)),
Command::Checkpoint(callback) => {
callback(Err(Arc::new(ControllerError::ControllerExit)))
}
Command::Suspend(callback) => callback(Err(Arc::new(ControllerError::ControllerExit))),
Command::SyncCheckpoint((_, callback)) => {
callback(Err(Arc::new(ControllerError::ControllerExit)))
}
Command::Rebalance(callback) => callback(Err(ControllerError::ControllerExit)),
}
}
}
impl Controller {
#[cfg(test)]
pub(crate) fn with_test_config<F>(
circuit_factory: F,
config: &PipelineConfig,
error_cb: Box<dyn Fn(Arc<ControllerError>, Option<String>) + Send + Sync>,
) -> Result<Self, ControllerError>
where
F: FnOnce(CircuitConfig) -> Result<(DBSPHandle, Box<dyn CircuitCatalog>), ControllerError>
+ Send
+ 'static,
{
let builder = ControllerBuilder::new(config)?;
let mut init = builder.open_latest_checkpoint()?;
if let Some(diff) = init.pipeline_diff.as_mut() {
diff.clear_program_diff()
}
init.init(None, circuit_factory, error_cb)
}
#[cfg(test)]
pub(crate) fn with_test_config_keep_program_diff<F>(
circuit_factory: F,
config: &PipelineConfig,
error_cb: Box<dyn Fn(Arc<ControllerError>, Option<String>) + Send + Sync>,
) -> Result<Self, ControllerError>
where
F: FnOnce(CircuitConfig) -> Result<(DBSPHandle, Box<dyn CircuitCatalog>), ControllerError>
+ Send
+ 'static,
{
let builder = ControllerBuilder::new(config)?;
let init = builder.open_latest_checkpoint()?;
init.init(None, circuit_factory, error_cb)
}
fn build<F>(
controller_init: ControllerInit,
state: Option<Arc<ServerState>>,
circuit_factory: F,
error_cb: Box<dyn Fn(Arc<ControllerError>, Option<String>) + Send + Sync>,
) -> Result<Self, ControllerError>
where
F: FnOnce(CircuitConfig) -> Result<(DBSPHandle, Box<dyn CircuitCatalog>), ControllerError>
+ Send
+ 'static,
{
let (circuit_thread_handle, inner) = {
// A channel to communicate circuit initialization status.
// The `circuit_factory` closure must be invoked in the context of
// the circuit thread, because the circuit handle it returns doesn't
// implement `Send`. So we need this channel to communicate circuit
// initialization status back to this thread. On success, the worker
// thread adds a catalog to `inner`, and returns it wrapped in an `Arc`.
let (init_status_sender, init_status_receiver) =
sync_channel::<Result<Arc<ControllerInner>, ControllerError>>(0);
let handle = thread::Builder::new()
.name("circuit-thread".to_string())
.spawn(move || {
match CircuitThread::new(controller_init, state, circuit_factory, error_cb) {
Err(error) => {
let _ = init_status_sender.send(Err(error));
Ok(())
}
Ok(mut circuit_thread) => {
if let Err(error) = circuit_thread.run(init_status_sender) {
circuit_thread.controller.error(error, None);
}
circuit_thread.finish().inspect_err(|error| {
// Log the error before returning it from the
// thread: otherwise, only [Controller::stop]
// will join the thread and report the error.
error!("circuit thread died with error: {error}")
})
}
}
})
.expect("failed to spawn circuit-thread");
// If `recv` fails, it indicates that the circuit thread panicked
// during initialization.
let inner = init_status_receiver
.recv()
.map_err(|_| ControllerError::dbsp_panic())??;
(Arc::new(Mutex::new(Some(handle))), inner)
};
Ok(Self {
inner,
circuit_thread_handle,
})
}
pub(crate) fn last_checkpoint_sync(&self) -> LastCheckpoint {
self.inner.last_checkpoint_sync()
}
pub fn lir(&self) -> &LirCircuit {
&self.inner.lir
}
/// Connect a new input endpoint with specified name and configuration.
///
/// Creates an endpoint with data transport and format specified by
/// `config` and starts streaming data from the endpoint if the pipeline
/// is running.
///
/// # Errors
///
/// The method may fail for the following reasons:
///
/// * The endpoint configuration is invalid, e.g., specifies an unknown
/// transport or data format.
///
/// * The endpoint fails to initialize, e.g., because the network address or
/// filename specified in the transport config is unreachable.
pub fn connect_input(
&self,
endpoint_name: &str,
config: &InputEndpointConfig,
resume_info: Option<(JsonValue, CheckpointInputEndpointMetrics)>,
) -> Result<EndpointId, ControllerError> {
debug!("Connecting input endpoint '{endpoint_name}'; config: {config:?}");
self.inner.fail_if_bootstrapping_or_restoring()?;
self.inner.connect_input(endpoint_name, config, resume_info)
}
/// Disconnect an existing input endpoint.
///
/// This method is asynchronous and may return before all endpoint
/// threads have terminated.
pub fn disconnect_input(&self, endpoint_id: &EndpointId) {
self.inner.disconnect_input(endpoint_id)
}
pub fn session_context(&self) -> Result<SessionContext, ControllerError> {
self.inner.fail_if_restoring()?;
Ok(self.inner.session_ctxt.clone())
}
/// Connect a previously instantiated input endpoint.
///
/// Used to connect an endpoint instantiated manually rather than from an
/// [`InputEndpointConfig`].
///
/// # Arguments
///
/// * `endpoint_name` - endpoint name unique within the pipeline.
///
/// * `endpoint_config` - endpoint config.
///
/// * `endpoint` - transport endpoint object.
pub fn add_input_endpoint(
&self,
endpoint_name: &str,
endpoint_config: InputEndpointConfig,
endpoint: Box<dyn TransportInputEndpoint>,
resume_info: Option<(JsonValue, CheckpointInputEndpointMetrics)>,
) -> Result<EndpointId, ControllerError> {
self.inner.fail_if_bootstrapping_or_restoring()?;
self.inner
.add_input_endpoint(endpoint_name, endpoint_config, Some(endpoint), resume_info)
}
/// Disconnect an existing output endpoint.
///
/// This method is asynchronous and may return before all endpoint
/// threads have terminated.
pub fn disconnect_output(&self, endpoint_id: &EndpointId) {
debug!("Disconnecting output endpoint {endpoint_id}");
self.inner.disconnect_output(endpoint_id)
}
/// Connect a previously instantiated output endpoint.
///
/// Used to connect an endpoint instantiated manually rather than from an
/// [`OutputEndpointConfig`].
///
/// # Arguments
///
/// * `endpoint_name` - endpoint name unique within the pipeline.
///
/// * `endpoint_config` - (partial) endpoint config. Only `format.name` and
/// `stream` fields need to be initialized.
///
/// * `endpoint` - transport endpoint object.
pub fn add_output_endpoint(
&self,
endpoint_name: &str,
endpoint_config: &OutputEndpointConfig,
endpoint: Box<dyn OutputEndpoint>,
initial_statistics: Option<&CheckpointOutputEndpointMetrics>,
) -> Result<EndpointId, ControllerError> {
debug!("Adding output endpoint '{endpoint_name}'; config: {endpoint_config:?}");
self.inner.fail_if_restoring()?;
self.inner.add_output_endpoint(
endpoint_name,
endpoint_config,
Some(endpoint),
initial_statistics,
)
}
/// Increment the number of active API connections.
///
/// API connections are created dynamically via the `ingress` and `egress`
/// REST API endpoints.
///
/// Fails if the number of connections exceeds the current limit,
/// returning the number of existing API connections.
pub fn register_api_connection(&self) -> Result<(), u64> {
self.inner.register_api_connection()
}
/// Decrement the number of active API connections.
pub fn unregister_api_connection(&self) {
self.inner.unregister_api_connection();
}
/// Return the number of active API connections.
pub fn num_api_connections(&self) -> u64 {
self.inner.num_api_connections()
}
/// Force the circuit to perform a step even if all of its
/// input buffers are empty or nearly empty.
pub fn request_step(&self) {
self.inner.request_step();
}
/// Change the state of all input endpoints to running.
///
/// Start streaming data through all connected input endpoints.
pub fn start(&self) {
debug!("Starting the pipeline");
self.inner.start();
}
/// Pause all input endpoints.
///
/// Sends a pause command to all input endpoints. Upon receiving the
/// command, the endpoints must stop pushing data to the pipeline. This
/// method is asynchronous and may return before all endpoints have been
/// fully paused.
pub fn pause(&self) {
debug!("Pausing the pipeline");
self.inner.pause();
}
/// Pause specified input endpoint.
///
/// Sets `paused_by_user` flag of the endpoint to `true`.
/// This method is asynchronous and may return before the endpoint has been
/// fully paused.
pub fn pause_input_endpoint(&self, endpoint_name: &str) -> Result<(), ControllerError> {
self.inner.pause_input_endpoint(endpoint_name)
}
// Start or resume specified input endpoint.
//
// Sets `paused_by_user` flag of the endpoint to `false`.
pub fn start_input_endpoint(&self, endpoint_name: &str) -> Result<(), ControllerError> {
self.inner.start_input_endpoint(endpoint_name)
}
// Returns whether the specified input endpoint is paused by the user.
pub fn is_input_endpoint_paused(&self, endpoint_name: &str) -> Result<bool, ControllerError> {
self.inner.is_input_endpoint_paused(endpoint_name)
}
pub fn input_endpoint_status(
&self,
endpoint_name: &str,
) -> Result<ExternalInputEndpointStatus, ControllerError> {
self.inner.input_endpoint_status(endpoint_name)
}
/// Lookup input endpoint by name.
pub fn input_endpoint_id_by_name(
&self,
endpoint_name: &str,
) -> Result<EndpointId, ControllerError> {
self.inner.input_endpoint_id_by_name(endpoint_name)
}
/// Returns whether the controller is replaying a fault tolerance log.
pub fn is_replaying(&self) -> bool {
self.inner.restoring.load(Ordering::Relaxed)
}
pub fn output_endpoint_status(
&self,
endpoint_name: &str,
) -> Result<ExternalOutputEndpointStatus, ControllerError> {
self.inner.output_endpoint_status(endpoint_name)
}
/// Returns the current controller status.
pub fn status(&self) -> &ControllerStatus {
&self.inner.status
}
/// Returns the current controller status in the form used by the external
/// API.
pub fn api_status(&self) -> ExternalControllerStatus {
self.status().to_api_type(
self.can_suspend(),
self.pipeline_complete(),
self.inner.transaction_info.lock().unwrap().clone(),
)
}
/// Returns the pipeline state.
pub fn state(&self) -> PipelineState {
self.inner.state()
}
pub fn catalog(&self) -> &Arc<Box<dyn CircuitCatalog>> {
&self.inner.catalog
}
/// Triggers a dump of the circuit's performance profile to the file system.
/// The profile will be written asynchronously, probably after this function
/// returns.
pub fn dump_profile(&self) {
debug!("Generating DBSP profile dump");
self.start_graph_profile(Box::new(|profile| {
match profile.map(|profile| {
profile
.dump("profile")
.map_err(|e| ControllerError::io_error("dumping profile", e))
}) {
Ok(Ok(path)) => info!("Dumped DBSP profile to {}", path.display()),
Ok(Err(e)) | Err(e) => error!("Failed to write circuit profile: {e}"),
}
}));
}
/// Triggers a profiling operation in the running pipeline. `cb` will be
/// called with the profile when it is ready, probably after this function
/// returns.
///
/// The callback-based nature of this function makes it useful in
/// asynchronous contexts.
pub fn start_graph_profile(&self, cb: GraphProfileCallbackFn) {
self.inner.send_command(Command::GraphProfile(cb));
}
/// Triggers a profiling operation in the running pipeline. `cb` will be
/// called with the profile (in JSON format) when it is ready, probably after this
/// function returns.
///
/// The callback-based nature of this function makes it useful in
/// asynchronous contexts.
pub fn start_json_profile(&self, cb: JsonProfileCallbackFn) {
self.inner.send_command(Command::JsonProfile(cb));
}
/// Triggers a checkpoint operation. `cb` will be called when it completes.
///
/// The callback-based nature of this function makes it useful in
/// asynchronous contexts.
pub fn start_checkpoint(&self, cb: CheckpointCallbackFn) {
self.inner.send_command(Command::Checkpoint(cb));
}
/// Prepares for a checkpoint operation. The checkpoint will not actually
/// start until it is later released with [Self::release_checkpoint].
pub fn prepare_checkpoint(&self) {
self.inner
.coordination_prepare_checkpoint
.store(true, Ordering::Release);
self.start_checkpoint(Box::new(|_| ()));
}
/// Returns an object for monitoring progress of [Self::prepare_checkpoint].
pub fn checkpoint_watcher(
&self,
) -> tokio::sync::watch::Receiver<Option<CheckpointCoordination>> {
self.inner.checkpoint_receiver.clone()
}
/// Returns an object for monitoring progress of transactions.
pub fn transaction_watcher(&self) -> tokio::sync::watch::Receiver<TransactionCoordination> {
self.inner.transaction_receiver.clone()
}
/// Returns an object for monitoring progress of input completion.
pub fn completion_watcher(&self) -> tokio::sync::watch::Receiver<Completion> {
self.inner.status.completion_notifier.subscribe()
}
pub fn release_checkpoint(&self) {
self.inner
.coordination_prepare_checkpoint
.store(false, Ordering::Release);
self.inner.unpark_circuit();
}
pub async fn async_checkpoint(&self) -> Result<Checkpoint, Arc<ControllerError>> {
let (sender, receiver) = oneshot::channel();
self.start_checkpoint(Box::new(move |profile| {
if sender.send(profile).is_err() {
error!("checkpoint result could not be sent");
}
}));
receiver.await.unwrap()
}
pub async fn async_graph_profile(&self) -> Result<GraphProfile, ControllerError> {
let (sender, receiver) = oneshot::channel();
self.start_graph_profile(Box::new(move |profile| {
if sender.send(profile).is_err() {
error!("`/dump_profile` result could not be sent");
}
}));
receiver.await.unwrap()
}
pub async fn async_json_profile(&self) -> Result<DbspProfile, ControllerError> {
let (sender, receiver) = oneshot::channel();
self.start_json_profile(Box::new(move |profile| {
if sender.send(profile).is_err() {
error!("`/dump_json_profile` result could not be sent");
}
}));
receiver.await.unwrap()
}
pub async fn async_samply_profile(&self, duration: u64) -> Result<Vec<u8>, AnyError> {
#[cfg(not(unix))]
{
anyhow::bail!(
"samply is not supported on this platform; only supported on unix platforms"
)
}
let err_msg = "is Samply installed and in `$PATH`? try: `curl --proto '=https' --tlsv1.2 -LsSf https://github.com/feldera/samply/releases/download/v0.13.2/samply-installer.sh | sh`".to_string();
let version = tokio::process::Command::new("samply")
.arg("--version")
.output()
.await
.with_context(|| format!("failed to get samply version; {err_msg}"))?;
let version = semver::Version::parse(
String::from_utf8(version.stdout)?
.split_whitespace()
.nth(1)
.unwrap_or("0.0.0"),
)
.with_context(|| format!("failed to parse samply version; {err_msg}"))?;
let req = semver::VersionReq::parse(">=0.13.2").unwrap();
if !req.matches(&version) {
anyhow::bail!(
"samply version is too old (found: {}, required: >= {}); {err_msg}",
version,
req
);
}
info!(
"collecting samply profile for the next {} seconds",
duration
);
let temp = tempfile::Builder::new()
.prefix("samply_profile_")
.suffix(".json.gz")
.rand_bytes(10)
.tempfile()
.context("failed to create tempfile to store samply profiles")?;
let profile_file = temp
.path()
.to_str()
.context("failed to convert path to samply profile to str")?;
let mut cmd = tokio::process::Command::new("samply");
let mut child = cmd
.args([
"record",
"-p",
&std::process::id().to_string(),
"-o",
profile_file,
"--save-only",
"--presymbolicate",
])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("failed to spawn samply process")?;