Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion sdk/python/feast/api/registry/rest/rest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,13 +558,39 @@ def filter_search_results_and_match_score(
# Search in tags
tags = result.get("tags", {})
tag_match = False
matched_tag = None
best_fuzzy_score = 0.0
best_fuzzy_tag = None

for key, value in tags.items():
if query_lower in key.lower() or query_lower in str(value).lower():
key_lower = key.lower()
value_str = str(value).lower()

# Exact match in key or value
if query_lower in key_lower or query_lower in value_str:
tag_match = True
# Store the matched tag as a dictionary
matched_tag = {key: value}
break

# Fuzzy match for tags (on combined "key:value" string)
tag_combined = f"{key_lower}={value_str}"
tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)

if tag_fuzzy_score > best_fuzzy_score:
best_fuzzy_score = tag_fuzzy_score
best_fuzzy_tag = {key: value}

if tag_match:
result["match_score"] = MATCH_SCORE_TAGS
result["matched_tag"] = matched_tag
filtered_results.append(result)
continue

# Fuzzy tag match
if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
result["match_score"] = best_fuzzy_score * 100
result["matched_tag"] = best_fuzzy_tag
filtered_results.append(result)
continue

Expand Down
97 changes: 97 additions & 0 deletions sdk/python/tests/unit/api/test_search_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,103 @@ def test_search_by_tags(self, shared_search_responses):
f"Expected to find some of {expected_resources} but found none in {found_resources}"
)

def test_search_matched_tag_exact_match(self, search_test_app):
"""Test that matched_tag field is present when a tag matches exactly"""
# Search for "data" which should match tag key "team" with value "data"
response = search_test_app.get("/search?query=data")
assert response.status_code == 200

data = response.json()
results = data["results"]

# Find results that matched via tags (match_score = 60)
tag_matched_results = [
r for r in results if r.get("match_score") == 60 and "matched_tag" in r
]

assert len(tag_matched_results) > 0, (
"Expected to find at least one result with matched_tag from tag matching"
)

# Verify matched_tag is present and has a valid dictionary value
for result in tag_matched_results:
matched_tag = result.get("matched_tag")
assert matched_tag is not None, (
f"matched_tag should not be None for result {result['name']}"
)
assert isinstance(matched_tag, dict), (
f"matched_tag should be a dictionary, got {type(matched_tag)}"
)
# matched_tag should be a dictionary with key:value format
assert len(matched_tag) > 0, "matched_tag should not be empty"
assert len(matched_tag) == 1, (
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
)

logger.debug(
f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
)

def test_search_matched_tag_fuzzy_match(self, search_test_app):
"""Test that matched_tag field is present when a tag matches via fuzzy matching"""
# Search for "te" which should fuzzy match tag key "team"
# "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
# Try "tea" which should fuzzy match "team" better
# "tea" vs "team": overlap={'t','e','a'}/union={'t','e','a','m'} = 3/4 = 75% (above threshold)
response = search_test_app.get("/search?query=tea")
assert response.status_code == 200

data = response.json()
results = data["results"]

# Find results that matched via fuzzy tag matching (match_score < 60 but >= 40)
fuzzy_tag_matched_results = [
r
for r in results
if r.get("match_score", 0) >= 40
and r.get("match_score", 0) < 60
and "matched_tag" in r
]

# If we don't find fuzzy matches, try a different query that's more likely to match
if len(fuzzy_tag_matched_results) == 0:
# Try "dat" which should fuzzy match tag value "data"
# "dat" vs "data": overlap={'d','a','t'}/union={'d','a','t','a'} = 3/4 = 75% (above threshold)
response = search_test_app.get("/search?query=dat")
assert response.status_code == 200
data = response.json()
results = data["results"]
fuzzy_tag_matched_results = [
r
for r in results
if r.get("match_score", 0) >= 40
and r.get("match_score", 0) < 60
and "matched_tag" in r
]

if len(fuzzy_tag_matched_results) > 0:
# Verify matched_tag is present for fuzzy matches
for result in fuzzy_tag_matched_results:
matched_tag = result.get("matched_tag")
assert matched_tag is not None, (
f"matched_tag should not be None for fuzzy-matched result {result['name']}"
)
assert isinstance(matched_tag, dict), (
f"matched_tag should be a dictionary, got {type(matched_tag)}"
)
assert len(matched_tag) > 0, "matched_tag should not be empty"
assert len(matched_tag) == 1, (
f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
)
# Verify the match_score is in the fuzzy range
assert 40 <= result.get("match_score", 0) < 60, (
f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
)

logger.debug(
f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
)

def test_search_sorting_functionality(self, shared_search_responses):
"""Test search results sorting using pre-computed responses"""
# Test match_score descending sort
Expand Down
Loading