Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ format-python:
cd ${ROOT_DIR}/sdk/python; python -m black --target-version py38 feast tests

lint-python:
cd ${ROOT_DIR}/sdk/python; python -m mypy --exclude=/tests/ --follow-imports=skip feast
cd ${ROOT_DIR}/sdk/python; python -m mypy --exclude=/tests/ feast/
cd ${ROOT_DIR}/sdk/python; python -m isort feast/ tests/ --check-only
cd ${ROOT_DIR}/sdk/python; python -m flake8 feast/ tests/
cd ${ROOT_DIR}/sdk/python; python -m black --check feast tests
Expand Down
11 changes: 3 additions & 8 deletions sdk/python/feast/infra/contrib/spark_kafka_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.avro.functions import from_avro
from pyspark.sql.functions import col, from_json
from pyspark.sql.streaming import StreamingQuery

from feast.data_format import AvroFormat, JsonFormat
from feast.data_source import KafkaSource, PushMode
Expand Down Expand Up @@ -68,13 +67,10 @@ def __init__(
# data_source type has been checked to be an instance of KafkaSource.
self.data_source: KafkaSource = self.data_source # type: ignore

def ingest_stream_feature_view(
self, to: PushMode = PushMode.ONLINE
) -> StreamingQuery:
def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE):
ingested_stream_df = self._ingest_stream_data()
transformed_df = self._construct_transformation_plan(ingested_stream_df)
online_store_query = self._write_stream_data(transformed_df, to)
return online_store_query
self._write_stream_data(transformed_df, to)

def _ingest_stream_data(self) -> StreamTable:
"""Only supports json and avro formats currently."""
Expand Down Expand Up @@ -129,7 +125,7 @@ def _ingest_stream_data(self) -> StreamTable:
def _construct_transformation_plan(self, df: StreamTable) -> StreamTable:
return self.sfv.udf.__call__(df) if self.sfv.udf else df

def _write_stream_data(self, df: StreamTable, to: PushMode) -> StreamingQuery:
def _write_stream_data(self, df: StreamTable, to: PushMode):
# Validation occurs at the fs.write_to_online_store() phase against the stream feature view schema.
def batch_write(row: DataFrame, batch_id: int):
rows: pd.DataFrame = row.toPandas()
Expand Down Expand Up @@ -168,4 +164,3 @@ def batch_write(row: DataFrame, batch_id: int):
)

query.awaitTermination(timeout=self.query_timeout)
return query
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import uuid
from typing import Dict, List, Optional
from typing import List

import pandas as pd

Expand Down Expand Up @@ -47,12 +47,14 @@ def __init__(self, project_name: str, *args, **kwargs):
def create_data_source(
self,
df: pd.DataFrame,
destination_name: str,
suffix: Optional[str] = None,
timestamp_field="ts",
created_timestamp_column="created_ts",
field_mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> DataSource:
destination_name = kwargs.get("destination_name")
if not destination_name:
raise ValueError("destination_name is required")
timestamp_field = kwargs.get("timestamp_field", "ts")
created_timestamp_column = kwargs.get("created_timestamp_column", "created_ts")
field_mapping = kwargs.get("field_mapping", None)

table_name = destination_name
s3_target = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from feast.infra.provider import RetrievalJob
from feast.infra.registry.base_registry import BaseRegistry
from feast.on_demand_feature_view import OnDemandFeatureView
from feast.repo_config import FeastBaseModel, RepoConfig
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
from feast.type_map import pa_to_mssql_type
from feast.usage import log_exceptions_and_usage
Expand All @@ -43,7 +43,7 @@
EntitySchema = Dict[str, np.dtype]


class MsSqlServerOfflineStoreConfig(FeastBaseModel):
class MsSqlServerOfflineStoreConfig(FeastConfigBaseModel):
"""Offline store config for SQL Server"""

type: Literal["mssql"] = "mssql"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, Optional
from typing import List

import pandas as pd
import pytest
Expand Down Expand Up @@ -63,12 +63,15 @@ def create_offline_store_config(self) -> MsSqlServerOfflineStoreConfig:
def create_data_source(
self,
df: pd.DataFrame,
destination_name: str,
timestamp_field="ts",
created_timestamp_column="created_ts",
field_mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> DataSource:
destination_name = kwargs.get("destination_name")
if not destination_name:
raise ValueError("destination_name is required")
timestamp_field = kwargs.get("timestamp_field", "ts")
created_timestamp_column = kwargs.get("created_timestamp_column", "created_ts")
field_mapping = kwargs.get("field_mapping", None)

# Make sure the field mapping is correct and convert the datetime datasources.
if timestamp_field in df:
df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True).fillna(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Dict, Optional
from typing import Dict

import pandas as pd
import pytest
Expand Down Expand Up @@ -81,12 +81,15 @@ def __init__(
def create_data_source(
self,
df: pd.DataFrame,
destination_name: str,
suffix: Optional[str] = None,
timestamp_field="ts",
created_timestamp_column="created_ts",
field_mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> DataSource:
destination_name = kwargs.get("destination_name")
if not destination_name:
raise ValueError("destination_name is required")
timestamp_field = kwargs.get("timestamp_field", "ts")
created_timestamp_column = kwargs.get("created_timestamp_column", "created_ts")
field_mapping = kwargs.get("field_mapping", None)

destination_name = self.get_prefixed_table_name(destination_name)

if self.offline_store_config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import shutil
import tempfile
import uuid
from typing import Dict, List, Optional
from typing import List

import pandas as pd
from pyspark import SparkConf
Expand Down Expand Up @@ -67,12 +67,15 @@ def create_offline_store_config(self):
def create_data_source(
self,
df: pd.DataFrame,
destination_name: str,
timestamp_field="ts",
created_timestamp_column="created_ts",
field_mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> DataSource:
destination_name = kwargs.get("destination_name")
if not destination_name:
raise ValueError("destination_name is required")
timestamp_field = kwargs.get("timestamp_field", "ts")
created_timestamp_column = kwargs.get("created_timestamp_column", "created_ts")
field_mapping = kwargs.get("field_mapping", None)

if timestamp_field in df:
df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True)
# Make sure the field mapping is correct and convert the datetime datasources.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pathlib
import uuid
from typing import Dict, List, Optional
from typing import List

import pandas as pd
import pytest
Expand Down Expand Up @@ -80,12 +80,15 @@ def teardown(self):
def create_data_source(
self,
df: pd.DataFrame,
destination_name: str,
suffix: Optional[str] = None,
timestamp_field="ts",
created_timestamp_column="created_ts",
field_mapping: Optional[Dict[str, str]] = None,
**kwargs,
) -> DataSource:
destination_name = kwargs.get("destination_name")
if not destination_name:
raise ValueError("destination_name is required")
timestamp_field = kwargs.get("timestamp_field", "ts")
created_timestamp_column = kwargs.get("created_timestamp_column", "created_ts")
field_mapping = kwargs.get("field_mapping", None)

destination_name = self.get_prefixed_table_name(destination_name)
self.client.execute_query(
f"CREATE SCHEMA IF NOT EXISTS memory.{self.project_name}"
Expand Down
46 changes: 3 additions & 43 deletions sdk/python/feast/infra/offline_stores/snowflake.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import contextlib
import json
import os
import uuid
import warnings
from datetime import datetime
Expand All @@ -23,7 +22,6 @@
import numpy as np
import pandas as pd
import pyarrow
from pydantic import Field, StrictStr
from pydantic.typing import Literal
from pytz import utc

Expand All @@ -46,11 +44,12 @@
from feast.infra.registry.base_registry import BaseRegistry
from feast.infra.utils.snowflake.snowflake_utils import (
GetSnowflakeConnection,
SnowflakeStoreConfig,
execute_snowflake_statement,
write_pandas,
write_parquet,
)
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.repo_config import RepoConfig
from feast.saved_dataset import SavedDatasetStorage
from feast.types import (
Array,
Expand Down Expand Up @@ -78,51 +77,12 @@
warnings.filterwarnings("ignore", category=DeprecationWarning)


class SnowflakeOfflineStoreConfig(FeastConfigBaseModel):
class SnowflakeOfflineStoreConfig(SnowflakeStoreConfig):
"""Offline store config for Snowflake"""

type: Literal["snowflake.offline"] = "snowflake.offline"
""" Offline store type selector """

config_path: Optional[str] = os.path.expanduser("~/.snowsql/config")
""" Snowflake config path -- absolute path required (Cant use ~)"""

account: Optional[str] = None
""" Snowflake deployment identifier -- drop .snowflakecomputing.com """

user: Optional[str] = None
""" Snowflake user name """

password: Optional[str] = None
""" Snowflake password """

role: Optional[str] = None
""" Snowflake role name """

warehouse: Optional[str] = None
""" Snowflake warehouse name """

authenticator: Optional[str] = None
""" Snowflake authenticator name """

database: StrictStr
""" Snowflake database name """

schema_: Optional[str] = Field("PUBLIC", alias="schema")
""" Snowflake schema name """

storage_integration_name: Optional[str] = None
""" Storage integration name in snowflake """

blob_export_location: Optional[str] = None
""" Location (in S3, Google storage or Azure storage) where data is offloaded """

convert_timestamp_columns: Optional[bool] = None
""" Convert timestamp columns on export to a Parquet-supported format """

class Config:
allow_population_by_field_name = True


class SnowflakeOfflineStore(OfflineStore):
@staticmethod
Expand Down
37 changes: 3 additions & 34 deletions sdk/python/feast/infra/online_stores/snowflake.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import itertools
import os
from binascii import hexlify
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple

import pandas as pd
from pydantic import Field, StrictStr
from pydantic.schema import Literal

from feast.entity import Entity
Expand All @@ -14,53 +12,24 @@
from feast.infra.online_stores.online_store import OnlineStore
from feast.infra.utils.snowflake.snowflake_utils import (
GetSnowflakeConnection,
SnowflakeStoreConfig,
execute_snowflake_statement,
get_snowflake_online_store_path,
write_pandas_binary,
)
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
from feast.repo_config import FeastConfigBaseModel, RepoConfig
from feast.repo_config import RepoConfig
from feast.usage import log_exceptions_and_usage
from feast.utils import to_naive_utc


class SnowflakeOnlineStoreConfig(FeastConfigBaseModel):
class SnowflakeOnlineStoreConfig(SnowflakeStoreConfig):
"""Online store config for Snowflake"""

type: Literal["snowflake.online"] = "snowflake.online"
""" Online store type selector """

config_path: Optional[str] = os.path.expanduser("~/.snowsql/config")
""" Snowflake config path -- absolute path required (Can't use ~)"""

account: Optional[str] = None
""" Snowflake deployment identifier -- drop .snowflakecomputing.com """

user: Optional[str] = None
""" Snowflake user name """

password: Optional[str] = None
""" Snowflake password """

role: Optional[str] = None
""" Snowflake role name """

warehouse: Optional[str] = None
""" Snowflake warehouse name """

authenticator: Optional[str] = None
""" Snowflake authenticator name """

database: StrictStr
""" Snowflake database name """

schema_: Optional[str] = Field("PUBLIC", alias="schema")
""" Snowflake schema name """

class Config:
allow_population_by_field_name = True


class SnowflakeOnlineStore(OnlineStore):
@log_exceptions_and_usage(online_store="snowflake")
Expand Down
Loading