Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sdk/python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from feast import FeatureStore
from feast.wait import wait_retry_backoff
from tests.data.data_creator import create_dataset
from tests.data.data_creator import create_basic_driver_dataset
from tests.integration.feature_repos.integration_test_repo_config import (
IntegrationTestRepoConfig,
)
Expand Down Expand Up @@ -351,7 +351,7 @@ def universal_data_sources(environment) -> TestData:

@pytest.fixture
def e2e_data_sources(environment: Environment):
df = create_dataset()
df = create_basic_driver_dataset()
data_source = environment.data_source_creator.create_data_source(
df, environment.feature_store.project, field_mapping={"ts_1": "ts"},
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/tests/data/data_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from feast.types import FeastType, Float32, Int32, Int64, String


def create_dataset(
def create_basic_driver_dataset(
entity_type: FeastType = Int32,
feature_dtype: str = None,
feature_is_list: bool = False,
Expand Down
42 changes: 14 additions & 28 deletions sdk/python/tests/integration/e2e/test_python_feature_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,8 @@

from feast.feast_object import FeastObject
from feast.feature_server import get_app
from tests.integration.feature_repos.integration_test_repo_config import (
IntegrationTestRepoConfig,
)
from tests.integration.feature_repos.repo_configuration import (
construct_test_environment,
construct_universal_feature_views,
construct_universal_test_data,
)
from tests.integration.feature_repos.universal.entities import (
customer,
Expand Down Expand Up @@ -63,16 +58,13 @@ def test_get_online_features(python_fs_client):
@pytest.mark.integration
@pytest.mark.universal_online_stores
def test_push(python_fs_client):
# TODO(felixwang9817): Note that we choose an entity value of 102 here since it is not included
# in the existing range of entity values (1-49). This allows us to push data for this test
# without affecting other tests. This decision is tech debt, and should be resolved by finding a
# better way to isolate data sources across tests.
initial_temp = get_temperatures(python_fs_client, location_ids=[1])[0]
json_data = json.dumps(
{
"push_source_name": "location_stats_push_source",
"df": {
"location_id": [102],
"temperature": [4],
"location_id": [1],
"temperature": [initial_temp * 100],
"event_timestamp": [str(datetime.utcnow())],
"created": [str(datetime.utcnow())],
},
Expand All @@ -82,7 +74,7 @@ def test_push(python_fs_client):

# Check new pushed temperature is fetched
assert response.status_code == 200
assert get_temperatures(python_fs_client, location_ids=[102]) == [4]
assert get_temperatures(python_fs_client, location_ids=[1]) == [initial_temp * 100]


def get_temperatures(client, location_ids: List[int]):
Expand All @@ -102,20 +94,14 @@ def get_temperatures(client, location_ids: List[int]):


@pytest.fixture
def python_fs_client(request):
config = IntegrationTestRepoConfig()
environment = construct_test_environment(config, fixture_request=request)
def python_fs_client(environment, universal_data_sources, request):
fs = environment.feature_store
try:
entities, datasets, data_sources = construct_universal_test_data(environment)
feature_views = construct_universal_feature_views(data_sources)
feast_objects: List[FeastObject] = []
feast_objects.extend(feature_views.values())
feast_objects.extend([driver(), customer(), location()])
fs.apply(feast_objects)
fs.materialize(environment.start_date, environment.end_date)
client = TestClient(get_app(fs))
yield client
finally:
fs.teardown()
environment.data_source_creator.teardown()
entities, datasets, data_sources = universal_data_sources
feature_views = construct_universal_feature_views(data_sources)
feast_objects: List[FeastObject] = []
feast_objects.extend(feature_views.values())
feast_objects.extend([driver(), customer(), location()])
fs.apply(feast_objects)
fs.materialize(environment.start_date, environment.end_date)
client = TestClient(get_app(fs))
yield client
83 changes: 42 additions & 41 deletions sdk/python/tests/integration/offline_store/test_offline_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,52 +7,54 @@

from feast import FeatureView, Field
from feast.types import Float32, Int32
from tests.integration.feature_repos.repo_configuration import (
construct_universal_feature_views,
)
from tests.integration.feature_repos.universal.entities import driver

# TODO(felixwang9817): Add a unit test that checks that write_to_offline_store can reorder columns.
# This should only happen after https://github.com/feast-dev/feast/issues/2797 is fixed.


@pytest.mark.integration
@pytest.mark.universal_offline_stores
@pytest.mark.universal_online_stores(only=["sqlite"])
def test_writing_incorrect_schema_fails(environment, universal_data_sources):
"""Tests that writing a dataframe with an incorrect schema fails."""
def test_reorder_columns(environment, universal_data_sources):
"""Tests that a dataframe with columns in the wrong order is reordered."""
store = environment.feature_store
_, _, data_sources = universal_data_sources
driver_entity = driver()
driver_stats = FeatureView(
name="driver_stats",
entities=[driver_entity],
schema=[
Field(name="avg_daily_trips", dtype=Int32),
Field(name="conv_rate", dtype=Float32),
Field(name="acc_rate", dtype=Float32),
],
source=data_sources.driver,
)
feature_views = construct_universal_feature_views(data_sources)
driver_fv = feature_views.driver
store.apply([driver(), driver_fv])

now = datetime.utcnow()
ts = pd.Timestamp(now).round("ms")

entity_df = pd.DataFrame.from_dict(
{"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts]}
# This dataframe has columns in the wrong order.
df_to_write = pd.DataFrame.from_dict(
{
"avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
"created": [ts, ts],
"conv_rate": [random.random(), random.random()],
"event_timestamp": [ts, ts],
"acc_rate": [random.random(), random.random()],
"driver_id": [1001, 1001],
},
)

store.apply([driver_entity, driver_stats])
df = store.get_historical_features(
entity_df=entity_df,
features=[
"driver_stats:conv_rate",
"driver_stats:acc_rate",
"driver_stats:avg_daily_trips",
],
full_feature_names=False,
).to_df()
store.write_to_offline_store(
driver_fv.name, df_to_write, allow_registry_cache=False
)

assert df["conv_rate"].isnull().all()
assert df["acc_rate"].isnull().all()
assert df["avg_daily_trips"].isnull().all()

@pytest.mark.integration
@pytest.mark.universal_offline_stores
def test_writing_incorrect_schema_fails(environment, universal_data_sources):
"""Tests that writing a dataframe with an incorrect schema fails."""
store = environment.feature_store
_, _, data_sources = universal_data_sources
feature_views = construct_universal_feature_views(data_sources)
driver_fv = feature_views.driver
store.apply([driver(), driver_fv])

now = datetime.utcnow()
ts = pd.Timestamp(now).round("ms")

expected_df = pd.DataFrame.from_dict(
{
Expand All @@ -65,13 +67,12 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
)
with pytest.raises(ValueError):
store.write_to_offline_store(
driver_stats.name, expected_df, allow_registry_cache=False
driver_fv.name, expected_df, allow_registry_cache=False
)


@pytest.mark.integration
@pytest.mark.universal_offline_stores
@pytest.mark.universal_online_stores(only=["sqlite"])
def test_writing_consecutively_to_offline_store(environment, universal_data_sources):
store = environment.feature_store
_, _, data_sources = universal_data_sources
Expand All @@ -96,7 +97,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
entity_df = pd.DataFrame.from_dict(
{
"driver_id": [1001, 1001],
"event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
"event_timestamp": [ts + timedelta(hours=3), ts + timedelta(hours=4)],
}
)

Expand All @@ -117,7 +118,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour

first_df = pd.DataFrame.from_dict(
{
"event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
"event_timestamp": [ts + timedelta(hours=3), ts + timedelta(hours=4)],
"driver_id": [1001, 1001],
"conv_rate": [random.random(), random.random()],
"acc_rate": [random.random(), random.random()],
Expand Down Expand Up @@ -155,7 +156,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour

second_df = pd.DataFrame.from_dict(
{
"event_timestamp": [ts - timedelta(hours=1), ts],
"event_timestamp": [ts + timedelta(hours=5), ts + timedelta(hours=6)],
"driver_id": [1001, 1001],
"conv_rate": [random.random(), random.random()],
"acc_rate": [random.random(), random.random()],
Expand All @@ -172,10 +173,10 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
{
"driver_id": [1001, 1001, 1001, 1001],
"event_timestamp": [
ts - timedelta(hours=4),
ts - timedelta(hours=3),
ts - timedelta(hours=1),
ts,
ts + timedelta(hours=3),
ts + timedelta(hours=4),
ts + timedelta(hours=5),
ts + timedelta(hours=6),
],
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,30 @@
from tests.integration.feature_repos.repo_configuration import (
construct_universal_feature_views,
)
from tests.integration.feature_repos.universal.entities import (
customer,
driver,
location,
)
from tests.integration.feature_repos.universal.entities import location


@pytest.mark.integration
@pytest.mark.universal_offline_stores
@pytest.mark.universal_online_stores(only=["sqlite"])
def test_push_features_and_read_from_offline_store(environment, universal_data_sources):
def test_push_features_and_read(environment, universal_data_sources):
store = environment.feature_store

(_, _, data_sources) = universal_data_sources
_, _, data_sources = universal_data_sources
feature_views = construct_universal_feature_views(data_sources)
now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
location_fv = feature_views.pushed_locations
store.apply([location(), location_fv])

store.apply([driver(), customer(), location(), *feature_views.values()])
entity_df = pd.DataFrame.from_dict({"location_id": [100], "event_timestamp": [now]})
now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
entity_df = pd.DataFrame.from_dict({"location_id": [1], "event_timestamp": [now]})

before_df = store.get_historical_features(
entity_df=entity_df,
features=["pushable_location_stats:temperature"],
full_feature_names=False,
).to_df()

# TODO(felixwang9817): Note that we choose an entity value of 100 here since it is not included
# in the existing range of entity values (1-49). This allows us to push data for this test
# without affecting other tests. This decision is tech debt, and should be resolved by finding a
# better way to isolate data sources across tests.
data = {
"event_timestamp": [now],
"location_id": [100],
"location_id": [1],
"temperature": [4],
"created": [now],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,20 @@
from tests.integration.feature_repos.repo_configuration import (
construct_universal_feature_views,
)
from tests.integration.feature_repos.universal.entities import (
customer,
driver,
location,
)
from tests.integration.feature_repos.universal.entities import location


@pytest.mark.integration
@pytest.mark.universal_online_stores
def test_push_features_and_read(environment, universal_data_sources):
store = environment.feature_store

(_, datasets, data_sources) = universal_data_sources
_, _, data_sources = universal_data_sources
feature_views = construct_universal_feature_views(data_sources)
location_fv = feature_views.pushed_locations
store.apply([location(), location_fv])

store.apply([driver(), customer(), location(), *feature_views.values()])

# TODO(felixwang9817): Note that we choose an entity value of 101 here since it is not included
# in the existing range of entity values (1-49). This allows us to push data for this test
# without affecting other tests. This decision is tech debt, and should be resolved by finding a
# better way to isolate data sources across tests.
data = {
"location_id": [101],
"location_id": [1],
"temperature": [4],
"event_timestamp": [pd.Timestamp(datetime.datetime.utcnow()).round("ms")],
"created": [pd.Timestamp(datetime.datetime.utcnow()).round("ms")],
Expand All @@ -39,8 +30,8 @@ def test_push_features_and_read(environment, universal_data_sources):

online_resp = store.get_online_features(
features=["pushable_location_stats:temperature"],
entity_rows=[{"location_id": 101}],
entity_rows=[{"location_id": 1}],
)
online_resp_dict = online_resp.to_dict()
assert online_resp_dict["location_id"] == [101]
assert online_resp_dict["location_id"] == [1]
assert online_resp_dict["temperature"] == [4]
Loading