Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/internal/feast/ondemandfeatureview.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
)

type OnDemandFeatureView struct {
base *BaseFeatureView
base *BaseFeatureView
sourceFeatureViewProjections map[string]*FeatureViewProjection
sourceRequestDataSources map[string]*core.DataSource_RequestDataOptions
}
Expand Down
8 changes: 7 additions & 1 deletion protos/feast/core/DataSource.proto
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import "feast/core/DataFormat.proto";
import "feast/types/Value.proto";

// Defines a Data Source that can be used source Feature data
// Next available id: 23
// Next available id: 26
message DataSource {
// Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not,
// but they are going to be reserved for backwards compatibility.
Expand All @@ -53,6 +53,12 @@ message DataSource {
// Name of Feast project that this data source belongs to.
string project = 21;

string description = 23;

map<string, string> tags = 24;

string owner = 25;

SourceType type = 1;

// Defines mapping between fields in the sourced data
Expand Down
98 changes: 92 additions & 6 deletions sdk/python/feast/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,20 @@ class DataSource(ABC):
source to feature names in a feature table or view. Only used for feature
columns, not entity or timestamp columns.
date_partition_column (optional): Timestamp column used for partitioning.
description (optional) A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the data source, typically the email of the primary
maintainer.
"""

name: str
event_timestamp_column: str
created_timestamp_column: str
field_mapping: Dict[str, str]
date_partition_column: str
description: str
tags: Dict[str, str]
owner: str

def __init__(
self,
Expand All @@ -177,8 +184,27 @@ def __init__(
created_timestamp_column: Optional[str] = None,
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = None,
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
"""Creates a DataSource object."""
"""
Creates a DataSource object.
Args:
name: Name of data source, which should be unique within a project
event_timestamp_column (optional): Event timestamp column used for point in time
joins of feature values.
created_timestamp_column (optional): Timestamp column indicating when the row
was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to feature names in a feature table or view. Only used for feature
columns, not entity or timestamp columns.
date_partition_column (optional): Timestamp column used for partitioning.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the data source, typically the email of the primary
maintainer.
"""
self.name = name
self.event_timestamp_column = (
event_timestamp_column if event_timestamp_column else ""
Expand All @@ -190,6 +216,9 @@ def __init__(
self.date_partition_column = (
date_partition_column if date_partition_column else ""
)
self.description = description or ""
self.tags = tags or {}
self.owner = owner or ""

def __hash__(self):
return hash((id(self), self.name))
Expand All @@ -207,6 +236,9 @@ def __eq__(self, other):
or self.created_timestamp_column != other.created_timestamp_column
or self.field_mapping != other.field_mapping
or self.date_partition_column != other.date_partition_column
or self.tags != other.tags
or self.owner != other.owner
or self.description != other.description
):
return False

Expand Down Expand Up @@ -303,13 +335,19 @@ def __init__(
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
super().__init__(
name,
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
description=description,
tags=tags,
owner=owner,
)
self.kafka_options = KafkaOptions(
bootstrap_servers=bootstrap_servers,
Expand Down Expand Up @@ -346,6 +384,9 @@ def from_proto(data_source: DataSourceProto):
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
)

def to_proto(self) -> DataSourceProto:
Expand All @@ -354,12 +395,14 @@ def to_proto(self) -> DataSourceProto:
type=DataSourceProto.STREAM_KAFKA,
field_mapping=self.field_mapping,
kafka_options=self.kafka_options.to_proto(),
description=self.description,
tags=self.tags,
owner=self.owner,
)

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

@staticmethod
Expand All @@ -377,16 +420,25 @@ class RequestDataSource(DataSource):
Args:
name: Name of the request data source
schema: Schema mapping from the input feature name to a ValueType
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the request data source, typically the email of the primary
maintainer.
"""

name: str
schema: Dict[str, ValueType]

def __init__(
self, name: str, schema: Dict[str, ValueType],
self,
name: str,
schema: Dict[str, ValueType],
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
"""Creates a RequestDataSource object."""
super().__init__(name)
super().__init__(name, description=description, tags=tags, owner=owner)
self.schema = schema

def validate(self, config: RepoConfig):
Expand All @@ -403,7 +455,13 @@ def from_proto(data_source: DataSourceProto):
schema = {}
for key, val in schema_pb.items():
schema[key] = ValueType(val)
return RequestDataSource(name=data_source.name, schema=schema)
return RequestDataSource(
name=data_source.name,
schema=schema,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
)

def to_proto(self) -> DataSourceProto:
schema_pb = {}
Expand All @@ -414,6 +472,9 @@ def to_proto(self) -> DataSourceProto:
name=self.name,
type=DataSourceProto.REQUEST_SOURCE,
request_data_options=options,
description=self.description,
tags=self.tags,
owner=self.owner,
)

return data_source_proto
Expand Down Expand Up @@ -448,6 +509,9 @@ def from_proto(data_source: DataSourceProto):
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
)

@staticmethod
Expand All @@ -467,13 +531,19 @@ def __init__(
stream_name: str,
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
super().__init__(
name,
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
description=description,
tags=tags,
owner=owner,
)
self.kinesis_options = KinesisOptions(
record_format=record_format, region=region, stream_name=stream_name
Expand Down Expand Up @@ -504,6 +574,9 @@ def to_proto(self) -> DataSourceProto:
type=DataSourceProto.STREAM_KINESIS,
field_mapping=self.field_mapping,
kinesis_options=self.kinesis_options.to_proto(),
description=self.description,
tags=self.tags,
owner=self.owner,
)

data_source_proto.event_timestamp_column = self.event_timestamp_column
Expand All @@ -529,6 +602,9 @@ def __init__(
schema: Dict[str, ValueType],
batch_source: DataSource,
event_timestamp_column="timestamp",
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
"""
Creates a PushSource object.
Expand All @@ -539,8 +615,12 @@ def __init__(
store to the online store, and when retrieving historical features.
event_timestamp_column (optional): Event timestamp column used for point in time
joins of feature values.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the data source, typically the email of the primary
maintainer.
"""
super().__init__(name)
super().__init__(name, description=description, tags=tags, owner=owner)
self.schema = schema
self.batch_source = batch_source
if not self.batch_source:
Expand Down Expand Up @@ -574,6 +654,9 @@ def from_proto(data_source: DataSourceProto):
schema=schema,
batch_source=batch_source,
event_timestamp_column=data_source.event_timestamp_column,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
)

def to_proto(self) -> DataSourceProto:
Expand All @@ -592,6 +675,9 @@ def to_proto(self) -> DataSourceProto:
type=DataSourceProto.PUSH_SOURCE,
push_options=options,
event_timestamp_column=self.event_timestamp_column,
description=self.description,
tags=self.tags,
owner=self.owner,
)

return data_source_proto
Expand Down
19 changes: 19 additions & 0 deletions sdk/python/feast/infra/offline_stores/bigquery_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def __init__(
date_partition_column: Optional[str] = "",
query: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
"""Create a BigQuerySource from an existing table or query.

Expand All @@ -37,6 +40,10 @@ def __init__(
date_partition_column (optional): Timestamp column used for partitioning.
query (optional): SQL query to execute to generate data for this data source.
name (optional): Name for the source. Defaults to the table_ref if not specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
owner (optional): The owner of the bigquery source, typically the email of the primary
maintainer.
Example:
>>> from feast import BigQuerySource
>>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table")
Expand Down Expand Up @@ -75,6 +82,9 @@ def __init__(
created_timestamp_column,
field_mapping,
date_partition_column,
description=description,
tags=tags,
owner=owner,
)

# Note: Python requires redefining hash in child classes that override __eq__
Expand All @@ -94,6 +104,9 @@ def __eq__(self, other):
and self.event_timestamp_column == other.event_timestamp_column
and self.created_timestamp_column == other.created_timestamp_column
and self.field_mapping == other.field_mapping
and self.description == other.description
and self.tags == other.tags
and self.owner == other.owner
)

@property
Expand All @@ -117,6 +130,9 @@ def from_proto(data_source: DataSourceProto):
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.bigquery_options.query,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
)

def to_proto(self) -> DataSourceProto:
Expand All @@ -125,6 +141,9 @@ def to_proto(self) -> DataSourceProto:
type=DataSourceProto.BATCH_BIGQUERY,
field_mapping=self.field_mapping,
bigquery_options=self.bigquery_options.to_proto(),
description=self.description,
tags=self.tags,
owner=self.owner,
)

data_source_proto.event_timestamp_column = self.event_timestamp_column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def __init__(
created_timestamp_column: Optional[str] = None,
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = None,
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
owner: Optional[str] = "",
):
# If no name, use the table_ref as the default name
_name = name
Expand All @@ -54,6 +57,9 @@ def __init__(
created_timestamp_column,
field_mapping,
date_partition_column,
description=description,
tags=tags,
owner=owner,
)
warnings.warn(
"The spark data source API is an experimental feature in alpha development. "
Expand Down Expand Up @@ -125,6 +131,9 @@ def from_proto(data_source: DataSourceProto) -> Any:
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
)

def to_proto(self) -> DataSourceProto:
Expand All @@ -133,6 +142,9 @@ def to_proto(self) -> DataSourceProto:
type=DataSourceProto.CUSTOM_SOURCE,
field_mapping=self.field_mapping,
custom_options=self.spark_options.to_proto(),
description=self.description,
tags=self.tags,
owner=self.owner,
)

data_source_proto.event_timestamp_column = self.event_timestamp_column
Expand Down
Loading