Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ ibis = [
ikv = [
"ikvpy>=0.0.36",
]
image = [
"feast[pytorch]",
"timm>=0.6.0",
"Pillow>=8.0.0",
"scikit-learn>=1.0.0",
]
k8s = ["kubernetes<=20.13.0"]
milvus = [
"pymilvus==2.4.9",
Expand Down Expand Up @@ -168,9 +174,9 @@ ci = [
"types-setuptools",
"types-tabulate",
"virtualenv<20.24.2",
"feast[aws, azure, cassandra, clickhouse, couchbase, delta, docling, duckdb, elasticsearch, faiss, gcp, ge, go, grpcio, hazelcast, hbase, ibis, ikv, k8s, mcp, milvus, mssql, mysql, opentelemetry, spark, trino, postgres, pytorch, qdrant, rag, ray, redis, singlestore, snowflake, sqlite_vec]"
"feast[aws, azure, cassandra, clickhouse, couchbase, delta, docling, duckdb, elasticsearch, faiss, gcp, ge, go, grpcio, hazelcast, hbase, ibis, ikv, image, k8s, mcp, milvus, mssql, mysql, opentelemetry, spark, trino, postgres, pytorch, qdrant, rag, ray, redis, singlestore, snowflake, sqlite_vec]"
]
nlp = ["feast[docling, milvus, pytorch, rag]"]
nlp = ["feast[docling, image, milvus, pytorch, rag]"]
dev = ["feast[ci]"]
docs = ["feast[ci]"]
# used for the 'feature-server' container image build
Expand Down
108 changes: 103 additions & 5 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,6 +2239,12 @@ def retrieve_online_documents_v2(
query: Optional[List[float]] = None,
query_string: Optional[str] = None,
distance_metric: Optional[str] = "L2",
query_image_bytes: Optional[bytes] = None,
Copy link
Member

@franciscojavierarceo franciscojavierarceo Aug 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this the right way to do this? See I would have updated the proto values to support ImageBytes like we support PDFBytes and then just queried embedding the standard way.

You can then enrich the image vector embeddings with text and semantic embeddings and then allow a composite search of both. In that sense, you still would use 1 query but searching across multiple vectors.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I'm realizing my mistake here. ImageBytes is required for retrieval, what you here is appropriate for image search when the image is passed in for the query.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added IMAGE_BYTES as well 👍

query_image_model: Optional[str] = "resnet34",
combine_with_text: bool = False,
text_weight: float = 0.5,
image_weight: float = 0.5,
combine_strategy: str = "weighted_sum",
) -> OnlineResponse:
"""
Retrieves the top k closest document features. Note, embeddings are a subset of features.
Expand All @@ -2247,13 +2253,105 @@ def retrieve_online_documents_v2(
features: The list of features that should be retrieved from the online document store. These features can be
specified either as a list of string document feature references or as a feature service. String feature
references must have format "feature_view:feature", e.g, "document_fv:document_embeddings".
query: The embeded query to retrieve the closest document features for (optional)
top_k: The number of closest document features to retrieve.
query_string: Text query for hybrid search (alternative to query parameter)
distance_metric: The distance metric to use for retrieval.
query_string: The query string to retrieve the closest document features using keyword search (bm25).
query_image_bytes: Query image as bytes (for image similarity search)
query_image_model: Model name for image embedding generation
combine_with_text: Whether to combine text and image embeddings for multi-modal search
text_weight: Weight for text embedding in combined search (0.0 to 1.0)
image_weight: Weight for image embedding in combined search (0.0 to 1.0)
combine_strategy: Strategy for combining embeddings ("weighted_sum", "concatenate", "average")

Returns:
OnlineResponse with similar documents and metadata

Examples:
Text search only::

results = store.retrieve_online_documents_v2(
features=["documents:embedding", "documents:title"],
query=[0.1, 0.2, 0.3], # text embedding vector
top_k=5
)

Image search only::

results = store.retrieve_online_documents_v2(
features=["images:embedding", "images:filename"],
query_image_bytes=b"image_data", # image bytes
top_k=5
)

Combined text + image search::

results = store.retrieve_online_documents_v2(
features=["documents:embedding", "documents:title"],
query=[0.1, 0.2, 0.3], # text embedding vector
query_image_bytes=b"image_data", # image bytes
combine_with_text=True,
text_weight=0.3,
image_weight=0.7,
top_k=5
)
"""
assert query is not None or query_string is not None, (
"Either query or query_string must be provided."
if query is None and not query_image_bytes and not query_string:
raise ValueError(
"Must provide either query (text embedding), "
"query_image_bytes, or query_string"
)

if combine_with_text and not (query is not None and query_image_bytes):
raise ValueError(
"combine_with_text=True requires both query (text embedding) "
"and query_image_bytes"
)

if combine_with_text and abs(text_weight + image_weight - 1.0) > 1e-6:
raise ValueError("text_weight + image_weight must equal 1.0 when combining")

image_embedding = None
if query_image_bytes is not None:
try:
from feast.image_utils import ImageFeatureExtractor

model_name = query_image_model or "resnet34"
extractor = ImageFeatureExtractor(model_name)
image_embedding = extractor.extract_embedding(query_image_bytes)
except ImportError:
raise ImportError(
"Image processing dependencies are not installed. "
"Please install with: pip install feast[image]"
)

text_embedding = query

if (
combine_with_text
and text_embedding is not None
and image_embedding is not None
):
# Combine text and image embeddings
from feast.image_utils import combine_embeddings

final_query = combine_embeddings(
text_embedding=text_embedding,
image_embedding=image_embedding,
strategy=combine_strategy,
text_weight=text_weight,
image_weight=image_weight,
)
elif image_embedding is not None:
final_query = image_embedding
elif text_embedding is not None:
final_query = text_embedding
else:
final_query = None

effective_query = final_query

assert effective_query is not None or query_string is not None, (
"Either query embedding or query_string must be provided."
)

(
Expand Down Expand Up @@ -2295,7 +2393,7 @@ def retrieve_online_documents_v2(
provider,
requested_feature_view,
requested_features,
query,
effective_query,
top_k,
distance_metric,
query_string,
Expand Down
Loading
Loading