Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions crates/pipeline-manager/src/db/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ pub enum DBError {
},
StorageStatusImmutableUnlessStopped {
resources_status: ResourcesStatus,
resources_desired_status: ResourcesDesiredStatus,
current_status: StorageStatus,
new_status: StorageStatus,
},
Expand All @@ -199,6 +200,7 @@ pub enum DBError {
NoRuntimeStatusWhileProvisioned,
PreconditionViolation(String),
CannotStartWithUndismissedError,
CannotStartWhileClearingStorage,
DismissErrorRestrictedToFullyStopped,
InitialImmutableUnlessStopped,
InitialStandbyNotAllowed,
Expand Down Expand Up @@ -592,13 +594,14 @@ impl Display for DBError {
}
DBError::StorageStatusImmutableUnlessStopped {
resources_status,
resources_desired_status,
current_status,
new_status,
} => {
write!(
f,
"Cannot transition storage status from '{current_status}' to '{new_status}' with resources status '{resources_status}'. \
Storage status cannot be changed unless the pipeline is stopped."
"Cannot transition storage status from '{current_status}' to '{new_status}' with resources status '{resources_status}' and desired status '{resources_desired_status}'. \
Storage status cannot be changed unless the pipeline is fully stopped."
)
}
DBError::IllegalPipelineAction {
Expand Down Expand Up @@ -665,6 +668,13 @@ impl Display for DBError {
first call `/dismiss_error`, after which `/start?dismiss_error=false` is again possible."
)
}
DBError::CannotStartWhileClearingStorage => {
write!(
f,
"Cannot process `/start` if the `storage_status` is `Clearing`. \
Wait for the storage to become cleared before trying again."
)
}
DBError::DismissErrorRestrictedToFullyStopped => {
write!(
f,
Expand Down Expand Up @@ -796,6 +806,7 @@ impl DetailedError for DBError {
Self::PreconditionViolation(..) => Cow::from("PreconditionViolation"),
Self::ResumeWhileNotProvisioned => Cow::from("ResumeWhileNotProvisioned"),
Self::CannotStartWithUndismissedError => Cow::from("CannotStartWithUndismissedError"),
Self::CannotStartWhileClearingStorage => Cow::from("CannotStartWhileClearingStorage"),
Self::DismissErrorRestrictedToFullyStopped => {
Cow::from("DismissErrorRestrictedToFullyStopped")
}
Expand Down Expand Up @@ -873,6 +884,7 @@ impl ResponseError for DBError {
Self::NoRuntimeStatusWhileProvisioned => StatusCode::INTERNAL_SERVER_ERROR,
Self::PreconditionViolation(..) => StatusCode::INTERNAL_SERVER_ERROR,
Self::CannotStartWithUndismissedError => StatusCode::BAD_REQUEST,
Self::CannotStartWhileClearingStorage => StatusCode::BAD_REQUEST,
Self::DismissErrorRestrictedToFullyStopped => StatusCode::BAD_REQUEST,
Self::InitialImmutableUnlessStopped => StatusCode::BAD_REQUEST,
Self::InitialStandbyNotAllowed => StatusCode::BAD_REQUEST,
Expand Down
2 changes: 2 additions & 0 deletions crates/pipeline-manager/src/db/operations/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,7 @@ pub(crate) async fn set_deployment_resources_desired_status(

// Check that the desired status can be set
validate_resources_desired_status_transition(
current.storage_status,
current.deployment_resources_status,
current.deployment_resources_desired_status,
final_deployment_error.clone(),
Expand Down Expand Up @@ -1738,6 +1739,7 @@ pub(crate) async fn set_storage_status(
// Check that the transition is permitted
validate_storage_status_transition(
current.deployment_resources_status,
current.deployment_resources_desired_status,
current.storage_status,
new_storage_status,
)?;
Expand Down
38 changes: 38 additions & 0 deletions crates/pipeline-manager/src/db/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,14 @@ async fn pipeline_deployment() {
)
.await
.unwrap();
assert!(matches!(
handle
.db
.transit_storage_status_to_clearing_if_not_cleared(tenant_id, "example1",)
.await
.unwrap_err(),
DBError::StorageStatusImmutableUnlessStopped { .. }
));
handle
.db
.transit_deployment_resources_status_to_provisioning(
Expand Down Expand Up @@ -1962,6 +1970,30 @@ async fn pipeline_deployment() {
.transit_deployment_resources_status_to_stopped(tenant_id, pipeline1.id, Version(1))
.await
.unwrap();
handle
.db
.transit_storage_status_to_clearing_if_not_cleared(tenant_id, &pipeline1.name)
.await
.unwrap();
assert!(matches!(
handle
.db
.set_deployment_resources_desired_status_provisioned(
tenant_id,
"example1",
RuntimeDesiredStatus::Paused,
BootstrapPolicy::default(),
false,
)
.await
.unwrap_err(),
DBError::CannotStartWhileClearingStorage { .. }
));
handle
.db
.transit_storage_status_to_cleared(tenant_id, pipeline1.id)
.await
.unwrap();
handle
.db
.set_deployment_resources_desired_status_provisioned(
Expand Down Expand Up @@ -4256,6 +4288,7 @@ impl Storage for Mutex<DbModel> {
};
let new_resources_desired_status = ResourcesDesiredStatus::Provisioned;
validate_resources_desired_status_transition(
pipeline.storage_status,
pipeline.deployment_resources_status,
pipeline.deployment_resources_desired_status,
new_deployment_error.clone(),
Expand Down Expand Up @@ -4306,6 +4339,7 @@ impl Storage for Mutex<DbModel> {
if pipeline.deployment_resources_status != ResourcesStatus::Provisioned {
let new_resources_desired_status = ResourcesDesiredStatus::Stopped;
validate_resources_desired_status_transition(
pipeline.storage_status,
pipeline.deployment_resources_status,
pipeline.deployment_resources_desired_status,
pipeline.deployment_error.clone(),
Expand Down Expand Up @@ -4340,6 +4374,7 @@ impl Storage for Mutex<DbModel> {
let mut pipeline = self.get_pipeline(tenant_id, pipeline_name).await?;
let new_resources_desired_status = ResourcesDesiredStatus::Stopped;
validate_resources_desired_status_transition(
pipeline.storage_status,
pipeline.deployment_resources_status,
pipeline.deployment_resources_desired_status,
pipeline.deployment_error.clone(),
Expand Down Expand Up @@ -4384,6 +4419,7 @@ impl Storage for Mutex<DbModel> {
}
validate_storage_status_transition(
pipeline.deployment_resources_status,
pipeline.deployment_resources_desired_status,
pipeline.storage_status,
StorageStatus::InUse,
)?;
Expand Down Expand Up @@ -4666,6 +4702,7 @@ impl Storage for Mutex<DbModel> {
let new_status = StorageStatus::Clearing;
validate_storage_status_transition(
pipeline.deployment_resources_status,
pipeline.deployment_resources_desired_status,
pipeline.storage_status,
new_status,
)?;
Expand All @@ -4690,6 +4727,7 @@ impl Storage for Mutex<DbModel> {
let new_status = StorageStatus::Cleared;
validate_storage_status_transition(
pipeline.deployment_resources_status,
pipeline.deployment_resources_desired_status,
pipeline.storage_status,
new_status,
)?;
Expand Down
18 changes: 17 additions & 1 deletion crates/pipeline-manager/src/db/types/resources_status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use feldera_types::error::ErrorResponse;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::fmt::Display;
use tracing::error;
use utoipa::ToSchema;

/// Pipeline resources status.
Expand Down Expand Up @@ -174,7 +175,7 @@ pub fn validate_resources_status_transition(
// Check rules on transitioning resources status
if matches!(
(storage_status, current_status, new_status),
(StorageStatus::Cleared | StorageStatus::InUse, ResourcesStatus::Stopped, ResourcesStatus::Provisioning)
(StorageStatus::Cleared | StorageStatus::InUse, ResourcesStatus::Stopped, ResourcesStatus::Provisioning)
| (StorageStatus::Cleared | StorageStatus::InUse, ResourcesStatus::Stopped, ResourcesStatus::Stopping)
| (StorageStatus::InUse, ResourcesStatus::Provisioning, ResourcesStatus::Provisioning)
| (StorageStatus::InUse, ResourcesStatus::Provisioning, ResourcesStatus::Provisioned)
Expand All @@ -184,6 +185,16 @@ pub fn validate_resources_status_transition(
| (StorageStatus::Cleared | StorageStatus::InUse, ResourcesStatus::Stopping, ResourcesStatus::Stopped)
) {
Ok(())
} else if matches!(
(storage_status, current_status, new_status),
(_, ResourcesStatus::Stopped | ResourcesStatus::Provisioning | ResourcesStatus::Provisioned, ResourcesStatus::Stopping)
| (_, ResourcesStatus::Stopping, ResourcesStatus::Stopped)
) {
// As a fail-safe, it's always possible to transition from any other status to Stopping and
// from Stopping to Stopped. This however should not occur (instead the first matches should
// have already caught them).
error!("The resources status transition {current_status:?} -> {new_status:?} (storage status: {storage_status:?}) is taking place. This is due to an internal error, and is permitted only in order to recover from an invalid status. Please file a bug report.");
Ok(())
} else {
Err(DBError::InvalidResourcesStatusTransition {
storage_status,
Expand All @@ -195,6 +206,7 @@ pub fn validate_resources_status_transition(

/// Validates the resources desired status transition from current status to a new one.
pub fn validate_resources_desired_status_transition(
storage_status: StorageStatus,
status: ResourcesStatus,
current_desired_status: ResourcesDesiredStatus,
new_deployment_error: Option<ErrorResponse>,
Expand Down Expand Up @@ -222,6 +234,10 @@ pub fn validate_resources_desired_status_transition(
// If it experienced an error, it needs to be dismissed first
return Err(DBError::CannotStartWithUndismissedError);
}
if storage_status == StorageStatus::Clearing {
// If it is still clearing storage, wait for that to complete
return Err(DBError::CannotStartWhileClearingStorage);
}
Ok(())
}
}
Expand Down
25 changes: 24 additions & 1 deletion crates/pipeline-manager/src/db/types/storage.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::db::error::DBError;
use crate::db::types::resources_status::ResourcesStatus;
use crate::db::types::resources_status::{ResourcesDesiredStatus, ResourcesStatus};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
use std::fmt;
Expand Down Expand Up @@ -79,12 +79,35 @@ impl Display for StorageStatus {
/// Validates the storage status transition from current status to a new one.
pub fn validate_storage_status_transition(
resources_status: ResourcesStatus,
resources_desired_status: ResourcesDesiredStatus,
current_status: StorageStatus,
new_status: StorageStatus,
) -> Result<(), DBError> {
if resources_status != ResourcesStatus::Stopped {
return Err(DBError::StorageStatusImmutableUnlessStopped {
resources_status,
resources_desired_status,
current_status,
new_status,
});
}
if resources_desired_status != ResourcesDesiredStatus::Stopped
&& !matches!(
(resources_desired_status, current_status, new_status),
(
ResourcesDesiredStatus::Provisioned,
StorageStatus::Cleared,
StorageStatus::InUse
) | (
ResourcesDesiredStatus::Provisioned,
StorageStatus::InUse,
StorageStatus::InUse
)
)
{
return Err(DBError::StorageStatusImmutableUnlessStopped {
resources_status,
resources_desired_status,
current_status,
new_status,
});
Expand Down
5 changes: 5 additions & 0 deletions docs.feldera.com/docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ import TabItem from '@theme/TabItem';

## Unreleased

Starting a pipeline while storage is still clearing (`storage_status=Clearing`) now returns
`CannotStartWhileClearingStorage` instead of succeeding. Clearing storage while a start
is in progress but hasn't yet transitioned to `Provisioning` now returns
`StorageStatusImmutableUnlessStopped` instead of succeeding.

Backward-incompatible Delta Lake output connector change. The new `max_retries` setting configures
the number of times the connector retries failed Delta Lake operations like writing Parquet files
and committing transactions. The setting is unset by default, causing the connector to retry
Expand Down
10 changes: 8 additions & 2 deletions python/tests/platform/test_pipeline_lifecycle.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from unittest import skip
from feldera.enums import PipelineStatus, ProgramStatus, StorageStatus
from feldera.rest.errors import FelderaAPIError
import time
Expand Down Expand Up @@ -387,8 +386,15 @@ def test_pipeline_clear_using_api(pipeline_name):
pipeline.clear_storage(wait=False)
assert pipeline.storage_status() in [StorageStatus.CLEARING, StorageStatus.CLEARED]

# Start just after might yield an error if it is still clearing
try:
pipeline.start()
except FelderaAPIError as e:
assert e.error_code == "CannotStartWhileClearingStorage"
pipeline.stop(force=True)
pipeline.clear_storage()


@skip # Passing this test requires denying clearing when desired resources status is provisioned.
@gen_pipeline_name
def test_pipeline_clear_while_desired_provisioned(pipeline_name):
"""
Expand Down
Loading