Skip to content

Commit de37f66

Browse files
authored
fix: Search API to return all matching tags in matched_tags field (#5843)
* fix: Search API to return all matching tags in matched_tags field Signed-off-by: Aniket Paluskar <apaluska@redhat.com> * Added unit tests to check if multiple tags are returned if they are matching the query Signed-off-by: Aniket Paluskar <apaluska@redhat.com> --------- Signed-off-by: Aniket Paluskar <apaluska@redhat.com>
1 parent 3c10b6e commit de37f66

File tree

3 files changed

+112
-65
lines changed

3 files changed

+112
-65
lines changed

docs/reference/feature-servers/registry-server.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,28 +1207,33 @@ Please refer the [page](./../../../docs/getting-started/concepts/permission.md)
12071207
"name": "user_id",
12081208
"description": "Primary identifier for users",
12091209
"project": "project1",
1210-
"match_score": 100
1210+
"match_score": 100,
1211+
"matched_tags": {}
12111212
},
12121213
{
12131214
"type": "featureView",
12141215
"name": "user_features",
12151216
"description": "User demographic and behavioral features",
12161217
"project": "project1",
1217-
"match_score": 100
1218+
"match_score": 100,
1219+
"matched_tags": {"team": "user_analytics"}
12181220
},
12191221
{
12201222
"type": "feature",
12211223
"name": "user_age",
12221224
"description": "Age of the user in years",
12231225
"project": "project1",
1224-
"match_score": 80
1226+
"featureView": "user_features",
1227+
"match_score": 80,
1228+
"matched_tags": {}
12251229
},
12261230
{
12271231
"type": "dataSource",
12281232
"name": "user_analytics",
12291233
"description": "Analytics data for user behavior tracking",
12301234
"project": "project2",
1231-
"match_score": 80
1235+
"match_score": 80,
1236+
"matched_tags": {"source": "user_data"}
12321237
}
12331238
],
12341239
"pagination": {

sdk/python/feast/api/registry/rest/rest_utils.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
MATCH_SCORE_NAME = 100
1919
MATCH_SCORE_DESCRIPTION = 80
2020
MATCH_SCORE_TAGS = 60
21-
MATCH_SCORE_PARTIAL = 40
2221

2322

2423
def grpc_call(handler_fn, request):
@@ -537,12 +536,42 @@ def filter_search_results_and_match_score(
537536
) -> List[Dict]:
538537
"""Filter search results based on query string"""
539538
if not query:
539+
# Add all tags as matched_tags when no query (all tags match)
540+
for result in results:
541+
result["matched_tags"] = result.get("tags", {})
540542
return results
541543

542544
query_lower = query.lower()
543545
filtered_results = []
544546

545547
for result in results:
548+
matched_tags = {}
549+
best_fuzzy_tag_score = 0.0
550+
551+
# Collect all matching tags (exact and fuzzy) upfront
552+
tags = result.get("tags", {})
553+
has_exact_tag_match = False
554+
555+
for key, value in tags.items():
556+
key_lower = str(key).lower()
557+
value_str = str(value).lower()
558+
tag_combined = f"{key_lower}={value_str}"
559+
560+
# Exact match in key or value
561+
if query_lower in tag_combined:
562+
has_exact_tag_match = True
563+
matched_tags[key] = value
564+
else:
565+
# Fuzzy match for tags (on combined "key:value" string)
566+
tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)
567+
568+
if tag_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
569+
matched_tags[key] = value
570+
if tag_fuzzy_score > best_fuzzy_tag_score:
571+
best_fuzzy_tag_score = tag_fuzzy_score
572+
573+
result["matched_tags"] = matched_tags
574+
546575
# Search in name
547576
if query_lower in result.get("name", "").lower():
548577
result["match_score"] = MATCH_SCORE_NAME
@@ -555,42 +584,15 @@ def filter_search_results_and_match_score(
555584
filtered_results.append(result)
556585
continue
557586

558-
# Search in tags
559-
tags = result.get("tags", {})
560-
tag_match = False
561-
matched_tag = None
562-
best_fuzzy_score = 0.0
563-
best_fuzzy_tag = None
564-
565-
for key, value in tags.items():
566-
key_lower = key.lower()
567-
value_str = str(value).lower()
568-
569-
# Exact match in key or value
570-
if query_lower in key_lower or query_lower in value_str:
571-
tag_match = True
572-
# Store the matched tag as a dictionary
573-
matched_tag = {key: value}
574-
break
575-
576-
# Fuzzy match for tags (on combined "key:value" string)
577-
tag_combined = f"{key_lower}={value_str}"
578-
tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)
579-
580-
if tag_fuzzy_score > best_fuzzy_score:
581-
best_fuzzy_score = tag_fuzzy_score
582-
best_fuzzy_tag = {key: value}
583-
584-
if tag_match:
587+
# Exact tag match
588+
if has_exact_tag_match:
585589
result["match_score"] = MATCH_SCORE_TAGS
586-
result["matched_tag"] = matched_tag
587590
filtered_results.append(result)
588591
continue
589592

590593
# Fuzzy tag match
591-
if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
592-
result["match_score"] = best_fuzzy_score * 100
593-
result["matched_tag"] = best_fuzzy_tag
594+
if best_fuzzy_tag_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
595+
result["match_score"] = best_fuzzy_tag_score * 100
594596
filtered_results.append(result)
595597
continue
596598

sdk/python/tests/unit/api/test_search_api.py

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses):
734734
f"Expected to find some of {expected_resources} but found none in {found_resources}"
735735
)
736736

737-
def test_search_matched_tag_exact_match(self, search_test_app):
738-
"""Test that matched_tag field is present when a tag matches exactly"""
737+
def test_search_matched_tags_exact_match(self, search_test_app):
738+
"""Test that matched_tags field is present when a tag matches exactly"""
739739
# Search for "data" which should match tag key "team" with value "data"
740740
response = search_test_app.get("/search?query=data")
741741
assert response.status_code == 200
@@ -745,34 +745,75 @@ def test_search_matched_tag_exact_match(self, search_test_app):
745745

746746
# Find results that matched via tags (match_score = 60)
747747
tag_matched_results = [
748-
r for r in results if r.get("match_score") == 60 and "matched_tag" in r
748+
r for r in results if r.get("match_score") == 60 and "matched_tags" in r
749749
]
750750

751751
assert len(tag_matched_results) > 0, (
752-
"Expected to find at least one result with matched_tag from tag matching"
752+
"Expected to find at least one result with matched_tags from tag matching"
753753
)
754754

755-
# Verify matched_tag is present and has a valid dictionary value
755+
# Verify matched_tags is present and has a valid dictionary value
756756
for result in tag_matched_results:
757-
matched_tag = result.get("matched_tag")
758-
assert matched_tag is not None, (
759-
f"matched_tag should not be None for result {result['name']}"
757+
matched_tags = result.get("matched_tags")
758+
assert matched_tags is not None, (
759+
f"matched_tags should not be None for result {result['name']}"
760760
)
761-
assert isinstance(matched_tag, dict), (
762-
f"matched_tag should be a dictionary, got {type(matched_tag)}"
761+
assert isinstance(matched_tags, dict), (
762+
f"matched_tags should be a dictionary, got {type(matched_tags)}"
763763
)
764-
# matched_tag should be a dictionary with key:value format
765-
assert len(matched_tag) > 0, "matched_tag should not be empty"
766-
assert len(matched_tag) == 1, (
767-
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
764+
# matched_tags should be a non-empty dict for tag-matched results
765+
assert len(matched_tags) > 0, (
766+
"matched_tags should not be empty for tag matches"
768767
)
769768

770769
logger.debug(
771-
f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
770+
f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}"
772771
)
773772

774-
def test_search_matched_tag_fuzzy_match(self, search_test_app):
775-
"""Test that matched_tag field is present when a tag matches via fuzzy matching"""
773+
def test_search_matched_tags_multiple_tags(self, search_test_app):
774+
"""Test that multiple matching tags are returned in matched_tags"""
775+
# Search for "a" which should match:
776+
# - Names containing "a" (e.g., user_training_dataset, data sources)
777+
# - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value)
778+
response = search_test_app.get("/search?query=a")
779+
logger.info(response.json())
780+
assert response.status_code == 200
781+
782+
data = response.json()
783+
results = data["results"]
784+
785+
# Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"}
786+
# "team" contains "a", "data" contains "a", "training" contains "a"
787+
# So matched_tags should have at least 2 entries: "purpose" and "team"
788+
dataset_results = [
789+
r for r in results if r.get("name") == "user_training_dataset"
790+
]
791+
792+
assert len(dataset_results) > 0, (
793+
"Expected to find user_training_dataset in results"
794+
)
795+
796+
dataset_result = dataset_results[0]
797+
matched_tags = dataset_result.get("matched_tags", {})
798+
799+
assert isinstance(matched_tags, dict), (
800+
f"matched_tags should be a dictionary, got {type(matched_tags)}"
801+
)
802+
803+
# Should have multiple matching tags: "purpose" and "team"
804+
assert len(matched_tags) >= 2, (
805+
f"Expected at least 2 matching tags for 'a' query, got {len(matched_tags)}: {matched_tags}"
806+
)
807+
808+
# Verify the expected tags are present
809+
assert "team" in matched_tags and "purpose" in matched_tags, (
810+
f"Expected 'team' and 'purpose' in matched_tags, got: {matched_tags}"
811+
)
812+
813+
logger.debug(f"user_training_dataset matched_tags: {matched_tags}")
814+
815+
def test_search_matched_tags_fuzzy_match(self, search_test_app):
816+
"""Test that matched_tags field is present when a tag matches via fuzzy matching"""
776817
# Search for "te" which should fuzzy match tag key "team"
777818
# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
778819
# Try "tea" which should fuzzy match "team" better
@@ -789,7 +830,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
789830
for r in results
790831
if r.get("match_score", 0) >= 40
791832
and r.get("match_score", 0) < 60
792-
and "matched_tag" in r
833+
and "matched_tags" in r
793834
]
794835

795836
# If we don't find fuzzy matches, try a different query that's more likely to match
@@ -805,30 +846,29 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
805846
for r in results
806847
if r.get("match_score", 0) >= 40
807848
and r.get("match_score", 0) < 60
808-
and "matched_tag" in r
849+
and "matched_tags" in r
809850
]
810851

811852
if len(fuzzy_tag_matched_results) > 0:
812-
# Verify matched_tag is present for fuzzy matches
853+
# Verify matched_tags is present for fuzzy matches
813854
for result in fuzzy_tag_matched_results:
814-
matched_tag = result.get("matched_tag")
815-
assert matched_tag is not None, (
816-
f"matched_tag should not be None for fuzzy-matched result {result['name']}"
855+
matched_tags = result.get("matched_tags")
856+
assert matched_tags is not None, (
857+
f"matched_tags should not be None for fuzzy-matched result {result['name']}"
817858
)
818-
assert isinstance(matched_tag, dict), (
819-
f"matched_tag should be a dictionary, got {type(matched_tag)}"
859+
assert isinstance(matched_tags, dict), (
860+
f"matched_tags should be a dictionary, got {type(matched_tags)}"
820861
)
821-
assert len(matched_tag) > 0, "matched_tag should not be empty"
822-
assert len(matched_tag) == 1, (
823-
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
862+
assert len(matched_tags) > 0, (
863+
"matched_tags should not be empty for fuzzy tag matches"
824864
)
825865
# Verify the match_score is in the fuzzy range
826866
assert 40 <= result.get("match_score", 0) < 60, (
827867
f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
828868
)
829869

830870
logger.debug(
831-
f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
871+
f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
832872
)
833873

834874
def test_search_sorting_functionality(self, shared_search_responses):

0 commit comments

Comments
 (0)