Skip to content
47 changes: 47 additions & 0 deletions docs/reference/type-system.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,25 @@ All primitive types (except `Map` and `Json`) have corresponding set types for s
- Set types are best suited for **online serving** use cases where feature values are written as Python sets and retrieved via `get_online_features`.
{% endhint %}

### Nested Collection Types

Feast supports arbitrarily nested collections using a recursive `VALUE_LIST` / `VALUE_SET` design. The outer container determines the proto enum (`VALUE_LIST` for `Array(…)`, `VALUE_SET` for `Set(…)`), while the full inner type structure is persisted via a mandatory `feast:nested_inner_type` Field tag.

| Feast Type | Python Type | ValueType | Description |
|------------|-------------|-----------|-------------|
| `Array(Array(T))` | `List[List[T]]` | `VALUE_LIST` | List of lists |
| `Array(Set(T))` | `List[List[T]]` | `VALUE_LIST` | List of sets |
| `Set(Array(T))` | `List[List[T]]` | `VALUE_SET` | Set of lists |
| `Set(Set(T))` | `List[List[T]]` | `VALUE_SET` | Set of sets |
| `Array(Array(Array(T)))` | `List[List[List[T]]]` | `VALUE_LIST` | 3-level nesting |

Where `T` is any supported primitive type (Int32, Int64, Float32, Float64, String, Bytes, Bool, UnixTimestamp) or another nested collection type.

**Notes:**
- Nesting depth is **unlimited**. `Array(Array(Array(T)))`, `Set(Array(Set(T)))`, etc. are all supported.
- Inner type information is preserved via Field tags (`feast:nested_inner_type`) and restored during deserialization. This tag is mandatory for nested collection types.
- Empty inner collections (`[]`) are stored as empty proto values and round-trip as `None`. For example, `[[1, 2], [], [3]]` becomes `[[1, 2], None, [3]]` after a write-read cycle.

### Map Types

Map types allow storing dictionary-like data structures:
Expand Down Expand Up @@ -221,6 +240,10 @@ user_features = FeatureView(
Field(name="metadata", dtype=Map),
Field(name="activity_log", dtype=Array(Map)),

# Nested collection types
Field(name="weekly_scores", dtype=Array(Array(Float64))),
Field(name="unique_tags_per_category", dtype=Array(Set(String))),

# JSON type
Field(name="raw_event", dtype=Json),

Expand Down Expand Up @@ -250,6 +273,30 @@ tag_list = [100, 200, 300, 100, 200]
tag_ids = set(tag_list) # {100, 200, 300}
```

### Nested Collection Type Usage Examples

Nested collections allow storing multi-dimensional data with unlimited depth:

```python
# List of lists — e.g., weekly score history per user
weekly_scores = [[85.0, 90.5, 78.0], [92.0, 88.5], [95.0, 91.0, 87.5]]

# List of sets — e.g., unique tags assigned per category
unique_tags_per_category = [["python", "ml"], ["rust", "systems"], ["python", "web"]]

# 3-level nesting — e.g., multi-dimensional matrices
Field(name="tensor", dtype=Array(Array(Array(Float64))))

# Mixed nesting
Field(name="grouped_tags", dtype=Array(Set(Array(String))))
```

**Limitation:** Empty inner collections round-trip as `None`:
```python
# Input: [[1, 2], [], [3]]
# Output: [[1, 2], None, [3]] (empty [] becomes None after write-read cycle)
```

### Map Type Usage Examples

Maps can store complex nested data structures:
Expand Down
6 changes: 6 additions & 0 deletions protos/feast/types/Value.proto
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ message ValueType {
JSON_LIST = 33;
STRUCT = 34;
STRUCT_LIST = 35;
// 36-39 were LIST_LIST, LIST_SET, SET_LIST, SET_SET (removed, replaced by VALUE_LIST/VALUE_SET)
reserved 36, 37, 38, 39;
VALUE_LIST = 40;
VALUE_SET = 41;
}
}

Expand Down Expand Up @@ -96,6 +100,8 @@ message Value {
StringList json_list_val = 33;
Map struct_val = 34;
MapList struct_list_val = 35;
RepeatedValue list_val = 40;
RepeatedValue set_val = 41;
}
}

Expand Down
38 changes: 31 additions & 7 deletions sdk/python/feast/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from feast.value_type import ValueType

STRUCT_SCHEMA_TAG = "feast:struct_schema"
NESTED_COLLECTION_INNER_TYPE_TAG = "feast:nested_inner_type"


@typechecked
Expand Down Expand Up @@ -118,7 +119,7 @@ def __str__(self):

def to_proto(self) -> FieldProto:
"""Converts a Field object to its protobuf representation."""
from feast.types import Array
from feast.types import Array, Set

value_type = self.dtype.to_value_type()
vector_search_metric = self.vector_search_metric or ""
Expand All @@ -128,6 +129,11 @@ def to_proto(self) -> FieldProto:
tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype)
elif isinstance(self.dtype, Array) and isinstance(self.dtype.base_type, Struct):
tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype.base_type)
# Persist nested collection type info in tags
if isinstance(self.dtype, (Array, Set)) and isinstance(
self.dtype.base_type, (Array, Set)
):
tags[NESTED_COLLECTION_INNER_TYPE_TAG] = _feast_type_to_str(self.dtype)
return FieldProto(
name=self.name,
value_type=value_type.value,
Expand Down Expand Up @@ -155,17 +161,24 @@ def from_proto(cls, field_proto: FieldProto):
# Reconstruct Struct type from persisted schema in tags
from feast.types import Array

internal_tags = {STRUCT_SCHEMA_TAG, NESTED_COLLECTION_INNER_TYPE_TAG}
dtype: FeastType
if value_type == ValueType.STRUCT and STRUCT_SCHEMA_TAG in tags:
dtype = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG])
user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG}
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}
elif value_type == ValueType.STRUCT_LIST and STRUCT_SCHEMA_TAG in tags:
inner_struct = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG])
dtype = Array(inner_struct)
user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG}
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}
elif (
value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET)
and NESTED_COLLECTION_INNER_TYPE_TAG in tags
):
dtype = _str_to_feast_type(tags[NESTED_COLLECTION_INNER_TYPE_TAG])
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}
else:
dtype = from_value_type(value_type=value_type)
user_tags = tags
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}

return cls(
name=field_proto.name,
Expand Down Expand Up @@ -198,6 +211,7 @@ def _feast_type_to_str(feast_type: FeastType) -> str:
from feast.types import (
Array,
PrimitiveFeastType,
Set,
)

if isinstance(feast_type, PrimitiveFeastType):
Expand All @@ -209,6 +223,8 @@ def _feast_type_to_str(feast_type: FeastType) -> str:
return json.dumps({"__struct__": nested})
elif isinstance(feast_type, Array):
return f"Array({_feast_type_to_str(feast_type.base_type)})"
elif isinstance(feast_type, Set):
return f"Set({_feast_type_to_str(feast_type.base_type)})"
else:
return str(feast_type)

Expand All @@ -218,6 +234,7 @@ def _str_to_feast_type(type_str: str) -> FeastType:
from feast.types import (
Array,
PrimitiveFeastType,
Set,
)

# Check if it's an Array type
Expand All @@ -226,6 +243,12 @@ def _str_to_feast_type(type_str: str) -> FeastType:
base_type = _str_to_feast_type(inner)
return Array(base_type)

# Check if it's a Set type
if type_str.startswith("Set(") and type_str.endswith(")"):
inner = type_str[4:-1]
base_type = _str_to_feast_type(inner)
return Set(base_type)

# Check if it's a nested Struct (JSON encoded)
if type_str.startswith("{"):
try:
Expand All @@ -243,9 +266,10 @@ def _str_to_feast_type(type_str: str) -> FeastType:
try:
return PrimitiveFeastType[type_str]
except KeyError:
from feast.types import String

return String
raise ValueError(
f"Unknown FeastType: {type_str!r}. "
f"Valid primitive types: {[t.name for t in PrimitiveFeastType]}"
)


def _serialize_struct_schema(struct_type: Struct) -> str:
Expand Down
16 changes: 15 additions & 1 deletion sdk/python/feast/infra/online_stores/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def _proto_value_to_transport_value(proto_value: ValueProto) -> Any:
if val_attr == "json_list_val":
return list(getattr(proto_value, val_attr).val)

# Nested collection types use feast_value_type_to_python_type
# which handles recursive conversion of RepeatedValue protos.
if val_attr in ("list_val", "set_val"):
return feast_value_type_to_python_type(proto_value)

# Map/Struct types are converted to Python dicts by
# feast_value_type_to_python_type. Serialise them to JSON strings
# so the server-side DataFrame gets VARCHAR columns instead of
Expand Down Expand Up @@ -183,6 +188,12 @@ def online_read(
logger.debug("Able to retrieve the online features from feature server.")
response_json = json.loads(response.text)
event_ts = self._get_event_ts(response_json)
# Build feature name -> ValueType mapping so we can reconstruct
# complex types (nested collections, sets, etc.) that cannot be
# inferred from raw JSON values alone.
feature_type_map: Dict[str, ValueType] = {
f.name: f.dtype.to_value_type() for f in table.features
}
# Iterating over results and converting the API results in column format to row format.
result_tuples: List[
Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]
Expand All @@ -202,13 +213,16 @@ def online_read(
]
== "PRESENT"
):
feature_value_type = feature_type_map.get(
feature_name, ValueType.UNKNOWN
)
message = python_values_to_proto_values(
[
response_json["results"][index]["values"][
feature_value_index
]
],
ValueType.UNKNOWN,
feature_value_type,
)
feature_values_dict[feature_name] = message[0]
else:
Expand Down
17 changes: 17 additions & 0 deletions sdk/python/feast/proto_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def to_json_object(printer: _Printer, message: ProtoMessage) -> JsonObject:
# to JSON. The parse back result will be different from original message.
if which is None or which == "null_val":
return None
elif which in ("list_val", "set_val"):
# Nested collection: RepeatedValue containing Values
repeated = getattr(message, which)
value = [
printer._MessageToJsonObject(inner_val) for inner_val in repeated.val
]
elif "_list_" in which:
value = list(getattr(message, which).val)
else:
Expand All @@ -86,6 +92,17 @@ def from_json_object(
if len(value) == 0:
# Clear will mark the struct as modified so it will be created even if there are no values
message.int64_list_val.Clear()
elif isinstance(value[0], list) or any(isinstance(v, list) for v in value):
# Nested collection (list of lists).
# Check any() to handle cases where the first element is None
# (empty inner collections round-trip through proto as None).
# Default to list_val since JSON transport loses the
# outer/inner set distinction.
rv = RepeatedValue()
for inner in value:
inner_val = rv.val.add()
from_json_object(parser, inner, inner_val)
message.list_val.CopyFrom(rv)
elif isinstance(value[0], bool):
message.bool_list_val.val.extend(value)
elif isinstance(value[0], str):
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/protos/feast/core/Aggregation_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion sdk/python/feast/protos/feast/core/Aggregation_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ class Aggregation(google.protobuf.message.Message):
NAME_FIELD_NUMBER: builtins.int
column: builtins.str
function: builtins.str
name: builtins.str
@property
def time_window(self) -> google.protobuf.duration_pb2.Duration: ...
@property
def slide_interval(self) -> google.protobuf.duration_pb2.Duration: ...
name: builtins.str
def __init__(
self,
*,
Expand Down
8 changes: 6 additions & 2 deletions sdk/python/feast/protos/feast/core/FeatureView_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,18 @@ class FeatureViewSpec(google.protobuf.message.Message):
"""
@property
def batch_source(self) -> feast.core.DataSource_pb2.DataSource:
"""Batch/Offline DataSource where this view can retrieve offline feature data."""
"""Batch/Offline DataSource where this view can retrieve offline feature data.
Optional: if not set, the feature view has no associated batch data source (e.g. purely derived views).
"""
online: builtins.bool
"""Whether these features should be served online or not
This is also used to determine whether the features should be written to the online store
"""
@property
def stream_source(self) -> feast.core.DataSource_pb2.DataSource:
"""Streaming DataSource from where this view can consume "online" feature data."""
"""Streaming DataSource from where this view can consume "online" feature data.
Optional: only required for streaming feature views.
"""
description: builtins.str
"""Description of the feature view."""
owner: builtins.str
Expand Down
18 changes: 9 additions & 9 deletions sdk/python/feast/protos/feast/serving/GrpcServer_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ isort:skip_file
"""
import builtins
import collections.abc
import feast.protos.feast.types.Value_pb2
import feast.types.Value_pb2
import google.protobuf.descriptor
import google.protobuf.internal.containers
import google.protobuf.message
Expand Down Expand Up @@ -42,12 +42,12 @@ class PushRequest(google.protobuf.message.Message):
VALUE_FIELD_NUMBER: builtins.int
key: builtins.str
@property
def value(self) -> feast.protos.feast.types.Value_pb2.Value: ...
def value(self) -> feast.types.Value_pb2.Value: ...
def __init__(
self,
*,
key: builtins.str = ...,
value: feast.protos.feast.types.Value_pb2.Value | None = ...,
value: feast.types.Value_pb2.Value | None = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["value", b"value"]) -> builtins.bool: ...
def ClearField(self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]) -> None: ...
Expand All @@ -63,15 +63,15 @@ class PushRequest(google.protobuf.message.Message):
allow_registry_cache: builtins.bool
to: builtins.str
@property
def typed_features(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, feast.protos.feast.types.Value_pb2.Value]: ...
def typed_features(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, feast.types.Value_pb2.Value]: ...
def __init__(
self,
*,
features: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
stream_feature_view: builtins.str = ...,
allow_registry_cache: builtins.bool = ...,
to: builtins.str = ...,
typed_features: collections.abc.Mapping[builtins.str, feast.protos.feast.types.Value_pb2.Value] | None = ...,
typed_features: collections.abc.Mapping[builtins.str, feast.types.Value_pb2.Value] | None = ...,
) -> None: ...
def ClearField(self, field_name: typing_extensions.Literal["allow_registry_cache", b"allow_registry_cache", "features", b"features", "stream_feature_view", b"stream_feature_view", "to", b"to", "typed_features", b"typed_features"]) -> None: ...

Expand Down Expand Up @@ -116,12 +116,12 @@ class WriteToOnlineStoreRequest(google.protobuf.message.Message):
VALUE_FIELD_NUMBER: builtins.int
key: builtins.str
@property
def value(self) -> feast.protos.feast.types.Value_pb2.Value: ...
def value(self) -> feast.types.Value_pb2.Value: ...
def __init__(
self,
*,
key: builtins.str = ...,
value: feast.protos.feast.types.Value_pb2.Value | None = ...,
value: feast.types.Value_pb2.Value | None = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["value", b"value"]) -> builtins.bool: ...
def ClearField(self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]) -> None: ...
Expand All @@ -135,14 +135,14 @@ class WriteToOnlineStoreRequest(google.protobuf.message.Message):
feature_view_name: builtins.str
allow_registry_cache: builtins.bool
@property
def typed_features(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, feast.protos.feast.types.Value_pb2.Value]: ...
def typed_features(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, feast.types.Value_pb2.Value]: ...
def __init__(
self,
*,
features: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
feature_view_name: builtins.str = ...,
allow_registry_cache: builtins.bool = ...,
typed_features: collections.abc.Mapping[builtins.str, feast.protos.feast.types.Value_pb2.Value] | None = ...,
typed_features: collections.abc.Mapping[builtins.str, feast.types.Value_pb2.Value] | None = ...,
) -> None: ...
def ClearField(self, field_name: typing_extensions.Literal["allow_registry_cache", b"allow_registry_cache", "feature_view_name", b"feature_view_name", "features", b"features", "typed_features", b"typed_features"]) -> None: ...

Expand Down
Loading