-
Notifications
You must be signed in to change notification settings - Fork 108
[dbsp] Memory pressure mechanism #5901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f86ddc4
d1c55fb
c24b947
14d7148
b3c8484
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,6 +61,7 @@ use dbsp::circuit::metrics::{ | |
| use dbsp::circuit::tokio::TOKIO; | ||
| use dbsp::circuit::{CheckpointCommitter, CircuitStorageConfig, DevTweaks, Mode}; | ||
| use dbsp::storage::backend::{StorageBackend, StoragePath}; | ||
| use dbsp::utils::process_rss_bytes; | ||
| use dbsp::{ | ||
| DBSPHandle, | ||
| circuit::{CircuitConfig, Layout}, | ||
|
|
@@ -99,7 +100,6 @@ use governor::Quota; | |
| use governor::RateLimiter; | ||
| use itertools::Itertools; | ||
| use journal::StepMetadata; | ||
| use memory_stats::memory_stats; | ||
| use nonzero_ext::nonzero; | ||
| use rmpv::Value as RmpValue; | ||
| use serde_json::Value as JsonValue; | ||
|
|
@@ -810,10 +810,22 @@ impl Controller { | |
| /// Returns the current controller status in the form used by the external | ||
| /// API. | ||
| pub fn api_status(&self) -> ExternalControllerStatus { | ||
| let runtime = self.inner.runtime.upgrade(); | ||
| let memory_pressure = runtime | ||
| .as_ref() | ||
| .map(|runtime| runtime.current_memory_pressure()) | ||
| .unwrap_or_default(); | ||
| let memory_pressure_epoch = runtime | ||
| .as_ref() | ||
| .map(|runtime| runtime.memory_pressure_epoch()) | ||
| .unwrap_or_default(); | ||
|
|
||
| self.status().to_api_type( | ||
| self.can_suspend(), | ||
| self.pipeline_complete(), | ||
| self.inner.transaction_info.lock().unwrap().clone(), | ||
| memory_pressure, | ||
| memory_pressure_epoch, | ||
| ) | ||
| } | ||
|
|
||
|
|
@@ -1188,6 +1200,7 @@ impl Controller { | |
| F: MetricsFormatter, | ||
| { | ||
| metrics.process_metrics(labels); | ||
|
|
||
| metrics.gauge( | ||
| "records_input_buffered", | ||
| "Total amount of data currently buffered by all endpoints, in records.", | ||
|
|
@@ -1346,6 +1359,25 @@ impl Controller { | |
| ); | ||
|
|
||
| write_fbuf_slab_metrics(metrics, labels, &runtime.fbuf_slabs_stats()); | ||
|
|
||
| metrics.gauge( | ||
| "max_rss_bytes", | ||
| "Configured maximum process RSS in bytes. A value of 0 means no limit is configured.", | ||
| labels, | ||
| runtime.max_rss_bytes().unwrap_or(0), | ||
| ); | ||
| metrics.gauge( | ||
| "memory_pressure", | ||
| "Current memory pressure level in [0..3]: low=0, moderate=1, high=2, critical=3.", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i wonder what moderate and high do seems like a binary decision to me
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. they engage different backpressure mechanisms. |
||
| labels, | ||
| runtime.current_memory_pressure() as u8, | ||
| ); | ||
| metrics.gauge( | ||
| "memory_pressure_epoch", | ||
| "Monotonic counter incremented whenever memory pressure transitions to high/critical.", | ||
| labels, | ||
| runtime.memory_pressure_epoch(), | ||
| ); | ||
| } | ||
|
|
||
| metrics.counter( | ||
|
|
@@ -4220,6 +4252,7 @@ impl ControllerInit { | |
| // Can't change number of workers or hosts yet. | ||
| hosts: checkpoint_config.global.hosts, | ||
| workers: checkpoint_config.global.workers, | ||
| max_rss_mb: checkpoint_config.global.max_rss_mb, | ||
|
|
||
| // The checkpoint determines the fault tolerance model, but the | ||
| // pipeline manager can override the details of the | ||
|
|
@@ -4293,21 +4326,50 @@ impl ControllerInit { | |
| } | ||
|
|
||
| pub fn set_incarnation_uuid(&mut self, incarnation_uuid: Uuid) { | ||
| self.incarnation_uuid = incarnation_uuid; | ||
| self.incarnation_uuid = incarnation_uuid | ||
| } | ||
|
|
||
| fn circuit_config( | ||
| layout: Option<Layout>, | ||
| pipeline_config: &PipelineConfig, | ||
| storage: Option<CircuitStorageConfig>, | ||
| ) -> Result<CircuitConfig, ControllerError> { | ||
| let dev_tweaks = DevTweaks::from_config(&pipeline_config.global.dev_tweaks); | ||
|
|
||
| let mut max_rss_mb = pipeline_config.global.max_rss_mb; | ||
|
|
||
| if max_rss_mb.is_none() | ||
| && let Some(memory_mb_max) = &pipeline_config.global.resources.memory_mb_max | ||
| { | ||
| warn!( | ||
| "RSS memory limit ('max_rss_mb') is not set, but a Kubernetes memory limit \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. print something if max_rss_mb is > than kubernetes limit?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or if there is a significant difference between the to (>50%)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's a good idea, there's no good reason for that config |
||
| ('resources.memory_mb_max' = {memory_mb_max} MB) is configured. \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. one wonders if there is even need for both
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are several reasons to keep them separate (I'd love to address them somehow)
|
||
| Using the Kubernetes limit as the RSS memory limit." | ||
| ); | ||
| max_rss_mb = Some(*memory_mb_max); | ||
| } else if max_rss_mb.is_none() && pipeline_config.global.resources.memory_mb_max.is_none() { | ||
| warn!( | ||
| "RSS memory limit ('max_rss_mb') is not set, and no Kubernetes memory limit \ | ||
| ('resources.memory_mb_max') is configured. We recommend configuring at least one of these settings to avoid out-of-memory failures." | ||
| ); | ||
| } else if let Some(max_rss_mb) = max_rss_mb | ||
| && let Some(memory_mb_max) = &pipeline_config.global.resources.memory_mb_max | ||
| && max_rss_mb > *memory_mb_max | ||
| { | ||
| warn!( | ||
| "RSS memory limit ('max_rss_mb') is set to {max_rss_mb} MB exceeds the Kubernetes memory limit \ | ||
| ('resources.memory_mb_max' = {memory_mb_max} MB) is configured. This will likely cause out-of-memory failures." | ||
| ); | ||
| } | ||
|
|
||
| Ok(CircuitConfig { | ||
| layout: layout | ||
| .unwrap_or_else(|| Layout::new_solo(pipeline_config.global.workers as usize)), | ||
| max_rss_bytes: max_rss_mb.map(|mb| mb * 1_000_000), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be a constant 1_000_000?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't expect the definition of 1MB to ever change. It's probably an overkill
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's more about documenting what you're converting to in the code e.g. you write it as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unit mismatch:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. adhoc.rs is wrong in this case. I don't think it's important the way it's used there, but I'll change it to 1_000_000 there. |
||
| pin_cpus: pipeline_config.global.pin_cpus.clone(), | ||
| storage, | ||
| mode: Mode::Persistent, | ||
| dev_tweaks: DevTweaks::from_config(&pipeline_config.global.dev_tweaks), | ||
| dev_tweaks, | ||
| }) | ||
| } | ||
|
|
||
|
|
@@ -4513,7 +4575,7 @@ impl StatisticsThread { | |
| total_processed_records: controller_status | ||
| .global_metrics | ||
| .num_total_processed_records(), | ||
| memory_bytes: memory_stats().map_or(0, |stats| stats.physical_mem as u64), | ||
| memory_bytes: process_rss_bytes().unwrap_or_default(), | ||
| storage_bytes, | ||
| }; | ||
| let mut time_series = controller_status.time_series.lock().unwrap(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,7 @@ use base64::{Engine, prelude::BASE64_URL_SAFE_NO_PAD}; | |
| use chrono::{DateTime, Utc}; | ||
| use cpu_time::ProcessTime; | ||
| use crossbeam::sync::Unparker; | ||
| use dbsp::utils::process_rss_bytes; | ||
| use feldera_adapterlib::{ | ||
| errors::journal::ControllerError, | ||
| format::{BufferSize, ParseError}, | ||
|
|
@@ -61,11 +62,11 @@ use feldera_types::{ | |
| }, | ||
| config::{FtModel, PipelineConfig}, | ||
| coordination::Completion, | ||
| memory_pressure::MemoryPressure, | ||
| suspend::SuspendError, | ||
| time_series::SampleStatistics, | ||
| transaction::CommitProgressSummary, | ||
| }; | ||
| use memory_stats::memory_stats; | ||
| use parking_lot::{RwLock, RwLockReadGuard}; | ||
| use serde::{Deserialize, Serialize}; | ||
| use std::{ | ||
|
|
@@ -1171,6 +1172,8 @@ impl ControllerStatus { | |
| suspend_error: Result<(), SuspendError>, | ||
| pipeline_complete: bool, | ||
| transaction_info: TransactionInfo, | ||
| memory_pressure: MemoryPressure, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does the epoch need to be a separate field? could it be inside of MemoryPressure? (havent looked at the structure yet)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are only loosely related. |
||
| memory_pressure_epoch: u64, | ||
| ) -> feldera_types::adapter_stats::ExternalControllerStatus { | ||
| use feldera_types::adapter_stats; | ||
|
|
||
|
|
@@ -1229,12 +1232,12 @@ impl ControllerStatus { | |
| .num_milliseconds() | ||
| .try_into() | ||
| .unwrap_or(0), | ||
| rss_bytes: if let Some(usage) = memory_stats() { | ||
| usage.physical_mem as u64 | ||
| } else { | ||
| rss_bytes: process_rss_bytes().unwrap_or_else(|| { | ||
| error!("Failed to fetch process RSS"); | ||
| 0 | ||
| }, | ||
| }), | ||
| memory_pressure, | ||
| memory_pressure_epoch, | ||
| cpu_msecs: match ProcessTime::try_now() { | ||
| Ok(time) => time.as_duration().as_millis() as u64, | ||
| Err(e) => { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.