Skip to content
19 changes: 14 additions & 5 deletions sdk/python/feast/infra/registry/registry.py
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,12 @@ def list_data_sources(
def apply_data_source(
self, data_source: DataSource, project: str, commit: bool = True
):
"""Apply a data source to the registry with project-scoped deduplication.

Filters existing data sources by both name and project (fixes feast-dev/feast#6206),
preserving the original created_timestamp if the source already exists in the
target project.
"""
now = _utc_now()
if not data_source.created_timestamp:
data_source.created_timestamp = now
Expand All @@ -394,7 +400,10 @@ def apply_data_source(
registry = self._prepare_registry_for_changes(project)

for idx, existing_data_source_proto in enumerate(registry.data_sources):
if existing_data_source_proto.name == data_source.name:
if (
existing_data_source_proto.name == data_source.name
and existing_data_source_proto.project == project
):
existing_data_source = DataSource.from_proto(existing_data_source_proto)
# Check if the data source has actually changed
if existing_data_source == data_source:
Expand Down Expand Up @@ -423,7 +432,7 @@ def delete_data_source(self, name: str, project: str, commit: bool = True):
for idx, data_source_proto in enumerate(
self.cached_registry_proto.data_sources
):
if data_source_proto.name == name:
if data_source_proto.name == name and data_source_proto.project == project:
del self.cached_registry_proto.data_sources[idx]
if commit:
self.commit()
Expand Down Expand Up @@ -688,13 +697,13 @@ def apply_feature_view(

if not is_latest:
# Explicit version: check if it exists (pin/revert) or not (forward declaration).
# Note: The file registry is last-write-wins for true concurrent races
# Note: The file registry is last-write-wins for true concurrent races —
# this is a pre-existing limitation for all file registry operations.
# For multi-client environments, use the SQL registry.
record = self._get_version_record(feature_view.name, project, pin_version)

if record is not None:
# Version exists pin/revert to that snapshot
# Version exists → pin/revert to that snapshot
# Check that the user hasn't also modified the definition.
# Compare user's FV (with version="latest") against active FV.
self._prepare_registry_for_changes(project)
Expand Down Expand Up @@ -735,7 +744,7 @@ def apply_feature_view(
# Apply the restored FV using the standard path below
feature_view = restored_fv
else:
# Version doesn't exist forward declaration: create it
# Version doesn't exist → forward declaration: create it
feature_view.current_version_number = pin_version
feature_view_proto = feature_view.to_proto()
feature_view_proto.spec.project = project
Expand Down
105 changes: 105 additions & 0 deletions sdk/python/tests/integration/registration/test_universal_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,111 @@ def test_apply_data_source_with_timestamps(test_registry):
test_registry.teardown()


@pytest.mark.integration
@pytest.mark.parametrize(
"test_registry",
all_fixtures,
)
def test_apply_data_source_cross_project_isolation(test_registry):
"""Test that apply_data_source uses project-scoped filtering.

Regression test for https://github.com/feast-dev/feast/issues/6206:
applying a data source to one project must not overwrite the data source
with the same name in a different project.

See: feast-dev/feast#6298
"""
project_a = "project_a"
project_b = "project_b"

source_a = FileSource(
name="shared_source_name",
file_format=ParquetFormat(),
path="file://feast/project_a.parquet",
timestamp_field="ts_col",
)
source_b = FileSource(
name="shared_source_name",
file_format=ParquetFormat(),
path="file://feast/project_b.parquet",
timestamp_field="ts_col",
)

test_registry.apply_data_source(source_a, project_a, commit=True)
test_registry.apply_data_source(source_b, project_b, commit=True)

# Each project should have exactly its own source
sources_a = test_registry.list_data_sources(project_a)
sources_b = test_registry.list_data_sources(project_b)
assert len(sources_a) == 1
assert len(sources_b) == 1

# Paths must be project-specific — not overwritten cross-project
assert sources_a[0].path == "file://feast/project_a.parquet"
assert sources_b[0].path == "file://feast/project_b.parquet"

# Re-apply source_b with updated path: must not bleed into project_a
source_b_updated = FileSource(
name="shared_source_name",
file_format=ParquetFormat(),
path="file://feast/project_b_v2.parquet",
timestamp_field="ts_col",
)
test_registry.apply_data_source(source_b_updated, project_b, commit=True)

sources_a_after = test_registry.list_data_sources(project_a)
assert len(sources_a_after) == 1
assert sources_a_after[0].path == "file://feast/project_a.parquet", (
"apply_data_source for project_b must not overwrite project_a's source"
)

test_registry.teardown()


@pytest.mark.integration
@pytest.mark.parametrize(
"test_registry",
all_fixtures,
)
def test_delete_data_source_project_scoped(test_registry):
"""Test that delete_data_source only removes the source from the given project.

Regression test for https://github.com/feast-dev/feast/issues/6206:
deleting a data source from one project must not delete the data source
with the same name from another project.
"""
project_a = "project_a"
project_b = "project_b"

source_a = FileSource(
name="shared_source_name",
file_format=ParquetFormat(),
path="file://feast/project_a.parquet",
timestamp_field="ts_col",
)
source_b = FileSource(
name="shared_source_name",
file_format=ParquetFormat(),
path="file://feast/project_b.parquet",
timestamp_field="ts_col",
)

test_registry.apply_data_source(source_a, project_a, commit=True)
test_registry.apply_data_source(source_b, project_b, commit=True)

# Delete the source from project_a only
test_registry.delete_data_source("shared_source_name", project_a, commit=True)

# project_a should have no sources; project_b should be unaffected
sources_a = test_registry.list_data_sources(project_a)
sources_b = test_registry.list_data_sources(project_b)
assert len(sources_a) == 0, "Source should be deleted from project_a"
assert len(sources_b) == 1, "Source in project_b must not be deleted"
assert sources_b[0].path == "file://feast/project_b.parquet"

test_registry.teardown()


@pytest.mark.integration
@pytest.mark.parametrize(
"test_registry",
Expand Down