Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions docs/getting-started/concepts/feature-view.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ Feature names must be unique within a [feature view](feature-view.md#feature-vie
On demand feature views allows users to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths:

```python
from feast import Field, Float64, RequestSource

# Define a request data source which encodes features / information only
# available at request time (e.g. part of the user initiated HTTP request)
input_request = RequestSource(
Expand All @@ -150,13 +152,13 @@ input_request = RequestSource(

# Use the input data and feature view features to create new features
@on_demand_feature_view(
inputs={
sources={
'driver_hourly_stats': driver_hourly_stats_view,
'vals_to_add': input_request
},
features=[
Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE),
Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE)
schema=[
Field(name='conv_rate_plus_val1', dtype=Float64),
Field(name='conv_rate_plus_val2', dtype=Float64)
]
)
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
Expand Down
10 changes: 6 additions & 4 deletions docs/reference/alpha-on-demand-feature-view.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ See [https://github.com/feast-dev/on-demand-feature-views-demo](https://github.c
We register `RequestDataSource` inputs and the transform in `on_demand_feature_view`:

```python
from feast import Field, Float64, RequestSource

# Define a request data source which encodes features / information only
# available at request time (e.g. part of the user initiated HTTP request)
input_request = RequestDataSource(
Expand All @@ -40,13 +42,13 @@ input_request = RequestDataSource(

# Use the input data and feature view features to create new features
@on_demand_feature_view(
inputs={
sources={
'driver_hourly_stats': driver_hourly_stats_view,
'vals_to_add': input_request
},
features=[
Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE),
Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE)
schema=[
Field(name='conv_rate_plus_val1', dtype=Float64),
Field(name='conv_rate_plus_val2', dtype=Float64)
]
)
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
Expand Down
12 changes: 6 additions & 6 deletions docs/tutorials/validating-historical-features.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ pyarrow.parquet.write_table(entities_2019_table, "entities.parquet")
import pyarrow.parquet
import pandas as pd

from feast import Feature, FeatureView, Entity, FeatureStore, Field, Float64, Int64
from feast import FeatureView, Entity, FeatureStore, Field, Float64, Int64
from feast.value_type import ValueType
from feast.data_format import ParquetFormat
from feast.on_demand_feature_view import on_demand_feature_view
Expand Down Expand Up @@ -153,11 +153,11 @@ trips_stats_fv = FeatureView(

```python
@on_demand_feature_view(
features=[
Feature("avg_fare", ValueType.DOUBLE),
Feature("avg_speed", ValueType.DOUBLE),
Feature("avg_trip_seconds", ValueType.DOUBLE),
Feature("earned_per_hour", ValueType.DOUBLE),
schema=[
Field("avg_fare", Float64),
Field("avg_speed", Float64),
Field("avg_trip_seconds", Float64),
Field("earned_per_hour", Float64),
],
inputs={
"stats": trips_stats_fv
Expand Down
21 changes: 12 additions & 9 deletions examples/java-demo/feature_repo/driver_repo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import pandas as pd
from feast import Entity, Feature, FeatureView, FileSource, ValueType
from feast.data_source import RequestSource
from feast.field import Field
from feast.on_demand_feature_view import on_demand_feature_view
from feast.request_feature_view import RequestFeatureView
from feast.types import Float32, Float64, Int64, String
from google.protobuf.duration_pb2 import Duration

from feast import Entity, Feature, FeatureView, FileSource, ValueType

driver_hourly_stats = FileSource(
path="data/driver_stats_with_string.parquet",
timestamp_field="event_timestamp",
Expand All @@ -15,11 +18,11 @@
name="driver_hourly_stats",
entities=["driver_id"],
ttl=Duration(seconds=86400000),
features=[
Feature(name="conv_rate", dtype=ValueType.FLOAT),
Feature(name="acc_rate", dtype=ValueType.FLOAT),
Feature(name="avg_daily_trips", dtype=ValueType.INT64),
Feature(name="string_feature", dtype=ValueType.STRING),
schema=[
Field(name="conv_rate", dtype=Float32),
Field(name="acc_rate", dtype=Float32),
Field(name="avg_daily_trips", dtype=Int64),
Field(name="string_feature", dtype=String),
],
online=True,
batch_source=driver_hourly_stats,
Expand All @@ -40,9 +43,9 @@
"driver_hourly_stats": driver_hourly_stats_view,
"vals_to_add": input_request,
},
features=[
Feature(name="conv_rate_plus_val1", dtype=ValueType.DOUBLE),
Feature(name="conv_rate_plus_val2", dtype=ValueType.DOUBLE),
schema=[
Field(name="conv_rate_plus_val1", dtype=Float64),
Field(name="conv_rate_plus_val2", dtype=Float64),
],
)
def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import pandas as pd

from google.protobuf.duration_pb2 import Duration

from feast.value_type import ValueType
from feast.feature import Feature
from feast.feature_view import FeatureView
from feast.data_source import RequestSource
from feast.entity import Entity
from feast.feature_service import FeatureService
from feast.data_source import RequestSource
from feast.feature_view import FeatureView
from feast.field import Field
from feast.on_demand_feature_view import on_demand_feature_view
from feast import FileSource
from feast.types import Float32, Float64, Int64
from feast.value_type import ValueType
from google.protobuf.duration_pb2 import Duration

from feast import FileSource

file_path = "driver_stats.parquet"
driver_hourly_stats = FileSource(
Expand All @@ -30,10 +29,10 @@
name="driver_hourly_stats",
entities=["driver_id"],
ttl=Duration(seconds=86400 * 7),
features=[
Feature(name="conv_rate", dtype=ValueType.DOUBLE),
Feature(name="acc_rate", dtype=ValueType.FLOAT),
Feature(name="avg_daily_trips", dtype=ValueType.INT64),
schema=[
Field(name="conv_rate", dtype=Float64),
Field(name="acc_rate", dtype=Float32),
Field(name="avg_daily_trips", dtype=Int64),
],
online=True,
batch_source=driver_hourly_stats,
Expand All @@ -43,56 +42,45 @@

input_request = RequestSource(
name="vals_to_add",
schema={
"val_to_add": ValueType.INT64,
"val_to_add_2": ValueType.INT64
}
schema={"val_to_add": ValueType.INT64, "val_to_add_2": ValueType.INT64},
)


@on_demand_feature_view(
sources={
'driver_hourly_stats': driver_hourly_stats_view,
'vals_to_add': input_request
},
features=[
Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE),
Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE)
]
sources={
"driver_hourly_stats": driver_hourly_stats_view,
"vals_to_add": input_request,
},
schema=[
Field(name="conv_rate_plus_val1", dtype=Float64),
Field(name="conv_rate_plus_val2", dtype=Float64),
],
)
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
df = pd.DataFrame()
df['conv_rate_plus_val1'] = (features_df['conv_rate'] + features_df['val_to_add'])
df['conv_rate_plus_val2'] = (features_df['conv_rate'] + features_df['val_to_add_2'])
df["conv_rate_plus_val1"] = features_df["conv_rate"] + features_df["val_to_add"]
df["conv_rate_plus_val2"] = features_df["conv_rate"] + features_df["val_to_add_2"]
return df


generated_data_source = FileSource(
path="benchmark_data.parquet",
timestamp_field="event_timestamp",
path="benchmark_data.parquet", timestamp_field="event_timestamp",
)

entity = Entity(
name="entity",
value_type=ValueType.STRING,
)
entity = Entity(name="entity", value_type=ValueType.STRING,)

benchmark_feature_views = [
FeatureView(
name=f"feature_view_{i}",
entities=["entity"],
ttl=Duration(seconds=86400),
features=[
Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64)
for j in range(10)
],
schema=[Field(name=f"feature_{10 * i + j}", dtype=Int64) for j in range(10)],
online=True,
batch_source=generated_data_source,
)
for i in range(25)
]

benchmark_feature_service = FeatureService(
name=f"benchmark_feature_service",
features=benchmark_feature_views,
name=f"benchmark_feature_service", features=benchmark_feature_views,
)
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
from feast import FeatureStore

from definitions import driver_hourly_stats_view, driver, entity,\
benchmark_feature_service, benchmark_feature_views, transformed_conv_rate
import numpy as np
import pandas as pd
from definitions import (
benchmark_feature_service,
benchmark_feature_views,
driver,
driver_hourly_stats_view,
entity,
transformed_conv_rate,
)

from feast import FeatureStore

print("Running materialize.py")

Expand All @@ -21,7 +26,9 @@
df["avg_daily_trips"] = np.arange(0, 1000, 100)

# some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status
df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(lambda days: timedelta(days=days))
df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(
lambda days: timedelta(days=days)
)

# Store data in parquet files. Parquet is convenient for local development mode. For
# production, you can use your favorite DWH, such as BigQuery. See Feast documentation
Expand All @@ -41,21 +48,27 @@ def generate_data(num_rows: int, num_features: int, destination: str) -> pd.Data
for column in features:
df[column] = np.random.randint(1, num_rows, num_rows)

df["entity"] = "key-" + \
pd.Series(np.arange(1, num_rows + 1)).astype(pd.StringDtype())
df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype(
pd.StringDtype()
)

df.to_parquet(destination)


generate_data(10**3, 250, "benchmark_data.parquet")
generate_data(10 ** 3, 250, "benchmark_data.parquet")


fs = FeatureStore(".")
fs.apply([driver_hourly_stats_view,
transformed_conv_rate,
driver,
entity, benchmark_feature_service,
*benchmark_feature_views])
fs.apply(
[
driver_hourly_stats_view,
transformed_conv_rate,
driver,
entity,
benchmark_feature_service,
*benchmark_feature_views,
]
)

now = datetime.now()
fs.materialize(start, now)
Expand Down
Loading