Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Entity Key Re-Serialization from Version 2 to 3

Entity Key Serialization version 2 will soon be deprecated, hence we need to shift the serialization and deserilization to version 3.

But here comes the challegnge where existing FeatuteViews on stores has written features with version 2. A version 2 serialized entity key cant be retrived using version 3 deserilization algorithm.

## Reserialize the Feature Views entity Keys to version 3

The solution is to reserialize the entity keys from version 2 to version 3.

Follow the following procedures to reserialize the entity key to version 3 in feature View in an offline / online store.

In hosrt, you need to iterate through all the feature views in your Feast repository, retrieve their serialized entity keys (if stored in version 2), reserialize them to version 3, and then update the online/offline store or wherever the serialized keys are stored.

### 1. Initialize the Feature Store

Load the FeatureStore object to access all feature views in your repository.

### 2. Iterate Through Feature Views

Use the list_feature_views() method to retrieve all feature views in the repository.

### 3. Retrieve Serialized Entity Keys

For each feature view, retrieve the serialized entity keys stored in the online/offline store or other storage

### 4. Reserialize Entity Keys

Use the reserialize_entity_v2_key_to_v3 function to convert the serialized keys from version 2 to version 3. Use [entity key encoding utils](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/key_encoding_utils.py) function `reserialize_entity_v2_key_to_v3`.

### 5. Update the Online/offline Store

Write the reserialized keys back to the online/offline store or the appropriate storage
46 changes: 46 additions & 0 deletions sdk/python/feast/infra/key_encoding_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,52 @@ def serialize_entity_key_prefix(entity_keys: List[str]) -> bytes:
return b"".join(output)


def reserialize_entity_v2_key_to_v3(
serialized_key_v2: bytes,
) -> bytes:
"""
Deserialize version 2 entity key and reserialize it to version 3.

Args:
serialized_key_v2: serialized entity key of version 2

Returns: bytes of the serialized entity key in version 3
"""
offset = 0
keys = []
values = []
num_keys = 1
for _ in range(num_keys):
value_type = struct.unpack_from("<I", serialized_key_v2, offset)[0]
offset += 4
print(f"Value Type: {value_type}")

fixed_tail_size = 4 + 4 + 8
string_end = len(serialized_key_v2) - fixed_tail_size

key = serialized_key_v2[offset:string_end].decode("utf-8")
keys.append(key)
offset = string_end

while offset < len(serialized_key_v2):
(value_type,) = struct.unpack_from("<I", serialized_key_v2, offset)
offset += 4

(value_length,) = struct.unpack_from("<I", serialized_key_v2, offset)
offset += 4

# Read the value based on its type and length
value_bytes = serialized_key_v2[offset : offset + value_length]
value = _deserialize_value(value_type, value_bytes)
values.append(value)
offset += value_length

return serialize_entity_key(
EntityKeyProto(join_keys=keys, entity_values=values),
entity_key_serialization_version=3,
)


def serialize_entity_key(
entity_key: EntityKeyProto, entity_key_serialization_version=1
) -> bytes:
Expand Down
11 changes: 4 additions & 7 deletions sdk/python/feast/repo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,11 @@ def __init__(self, **data: Any):
self.feature_server["type"]
)(**self.feature_server)

if self.entity_key_serialization_version <= 1:
if self.entity_key_serialization_version <= 2:
warnings.warn(
"`entity_key_serialization_version` is either not specified in the feature_store.yaml, "
"or is specified to a value <= 1."
"This serialization version may cause errors when trying to write fields with the `Long` data type"
" into the online store. Specifying `entity_key_serialization_version` to 2 is recommended for"
" new projects. ",
RuntimeWarning,
"The serialization version 2 and below would be deprecated in the next release. "

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not

Suggested change
"The serialization version 2 and below would be deprecated in the next release. "
"The serialization version 2 and below will be deprecated in the next release. "

"Specifying `entity_key_serialization_version` to 3 is recommended.",
DeprecationWarning,
)

@property
Expand Down
24 changes: 24 additions & 0 deletions sdk/python/tests/unit/infra/test_key_encoding_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
_deserialize_value,
_serialize_val,
deserialize_entity_key,
reserialize_entity_v2_key_to_v3,
serialize_entity_key,
)
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
Expand Down Expand Up @@ -115,3 +116,26 @@ def test_deserialize_value():

v = _deserialize_value(ValueType.INT64, b"\x01\x00\x00\x00\x00\x00\x00\x00")
assert v.int64_val == 1


def test_reserialize_entity_v2_key_to_v3():
entity_key_proto_v2 = EntityKeyProto(
join_keys=["user"],
entity_values=[ValueProto(int64_val=int(2**15))],
)
serialized_key_v2 = serialize_entity_key(
entity_key_proto_v2,
entity_key_serialization_version=2,
)

serialized_key_v3 = reserialize_entity_v2_key_to_v3(serialized_key_v2)

deserialized_key_v3 = deserialize_entity_key(
serialized_key_v3,
entity_key_serialization_version=3,
)

assert deserialized_key_v3 == EntityKeyProto(
join_keys=["user"],
entity_values=[ValueProto(int64_val=int(2**15))],
)
Loading