fix: Search API to return all matching tags in matched_tags field (#5843)

aniketpalu · web-flow · commit de37f6677ad3 · 2026-01-13T18:36:57.000+05:30
* fix: Search API to return all matching tags in matched_tags field

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;

* Added unit tests to check if multiple tags are returned if they are matching the query

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;

---------

Signed-off-by: Aniket Paluskar &lt;apaluska@redhat.com&gt;
diff --git a/docs/reference/feature-servers/registry-server.md b/docs/reference/feature-servers/registry-server.md
@@ -1207,28 +1207,33 @@ Please refer the [page](./../../../docs/getting-started/concepts/permission.md)
         "name": "user_id",
         "description": "Primary identifier for users",
         "project": "project1",
-        "match_score": 100
+        "match_score": 100,
+        "matched_tags": {}
       },
       {
         "type": "featureView",
         "name": "user_features",
         "description": "User demographic and behavioral features",
         "project": "project1",
-        "match_score": 100
+        "match_score": 100,
+        "matched_tags": {"team": "user_analytics"}
       },
       {
         "type": "feature",
         "name": "user_age",
         "description": "Age of the user in years",
         "project": "project1",
-        "match_score": 80
+        "featureView": "user_features",
+        "match_score": 80,
+        "matched_tags": {}
       },
       {
         "type": "dataSource",
         "name": "user_analytics",
         "description": "Analytics data for user behavior tracking",
         "project": "project2",
-        "match_score": 80
+        "match_score": 80,
+        "matched_tags": {"source": "user_data"}
       }
     ],
     "pagination": {
diff --git a/sdk/python/feast/api/registry/rest/rest_utils.py b/sdk/python/feast/api/registry/rest/rest_utils.py
@@ -18,7 +18,6 @@
 MATCH_SCORE_NAME = 100
 MATCH_SCORE_DESCRIPTION = 80
 MATCH_SCORE_TAGS = 60
-MATCH_SCORE_PARTIAL = 40
 
 
 def grpc_call(handler_fn, request):
@@ -537,12 +536,42 @@ def filter_search_results_and_match_score(
 ) -> List[Dict]:
     """Filter search results based on query string"""
     if not query:
+        # Add all tags as matched_tags when no query (all tags match)
+        for result in results:
+            result["matched_tags"] = result.get("tags", {})
         return results
 
     query_lower = query.lower()
     filtered_results = []
 
     for result in results:
+        matched_tags = {}
+        best_fuzzy_tag_score = 0.0
+
+        # Collect all matching tags (exact and fuzzy) upfront
+        tags = result.get("tags", {})
+        has_exact_tag_match = False
+
+        for key, value in tags.items():
+            key_lower = str(key).lower()
+            value_str = str(value).lower()
+            tag_combined = f"{key_lower}={value_str}"
+
+            # Exact match in key or value
+            if query_lower in tag_combined:
+                has_exact_tag_match = True
+                matched_tags[key] = value
+            else:
+                # Fuzzy match for tags (on combined "key:value" string)
+                tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)
+
+                if tag_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
+                    matched_tags[key] = value
+                    if tag_fuzzy_score > best_fuzzy_tag_score:
+                        best_fuzzy_tag_score = tag_fuzzy_score
+
+        result["matched_tags"] = matched_tags
+
         # Search in name
         if query_lower in result.get("name", "").lower():
             result["match_score"] = MATCH_SCORE_NAME
@@ -555,42 +584,15 @@ def filter_search_results_and_match_score(
             filtered_results.append(result)
             continue
 
-        # Search in tags
-        tags = result.get("tags", {})
-        tag_match = False
-        matched_tag = None
-        best_fuzzy_score = 0.0
-        best_fuzzy_tag = None
-
-        for key, value in tags.items():
-            key_lower = key.lower()
-            value_str = str(value).lower()
-
-            # Exact match in key or value
-            if query_lower in key_lower or query_lower in value_str:
-                tag_match = True
-                # Store the matched tag as a dictionary
-                matched_tag = {key: value}
-                break
-
-            # Fuzzy match for tags (on combined "key:value" string)
-            tag_combined = f"{key_lower}={value_str}"
-            tag_fuzzy_score = fuzzy_match(query_lower, tag_combined)
-
-            if tag_fuzzy_score > best_fuzzy_score:
-                best_fuzzy_score = tag_fuzzy_score
-                best_fuzzy_tag = {key: value}
-
-        if tag_match:
+        # Exact tag match
+        if has_exact_tag_match:
             result["match_score"] = MATCH_SCORE_TAGS
-            result["matched_tag"] = matched_tag
             filtered_results.append(result)
             continue
 
         # Fuzzy tag match
-        if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
-            result["match_score"] = best_fuzzy_score * 100
-            result["matched_tag"] = best_fuzzy_tag
+        if best_fuzzy_tag_score >= MATCH_SCORE_DEFAULT_THRESHOLD:
+            result["match_score"] = best_fuzzy_tag_score * 100
             filtered_results.append(result)
             continue
 
diff --git a/sdk/python/tests/unit/api/test_search_api.py b/sdk/python/tests/unit/api/test_search_api.py
@@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses):
             f"Expected to find some of {expected_resources} but found none in {found_resources}"
         )
 
-    def test_search_matched_tag_exact_match(self, search_test_app):
-        """Test that matched_tag field is present when a tag matches exactly"""
+    def test_search_matched_tags_exact_match(self, search_test_app):
+        """Test that matched_tags field is present when a tag matches exactly"""
         # Search for "data" which should match tag key "team" with value "data"
         response = search_test_app.get("/search?query=data")
         assert response.status_code == 200
@@ -745,34 +745,75 @@ def test_search_matched_tag_exact_match(self, search_test_app):
 
         # Find results that matched via tags (match_score = 60)
         tag_matched_results = [
-            r for r in results if r.get("match_score") == 60 and "matched_tag" in r
+            r for r in results if r.get("match_score") == 60 and "matched_tags" in r
         ]
 
         assert len(tag_matched_results) > 0, (
-            "Expected to find at least one result with matched_tag from tag matching"
+            "Expected to find at least one result with matched_tags from tag matching"
         )
 
-        # Verify matched_tag is present and has a valid dictionary value
+        # Verify matched_tags is present and has a valid dictionary value
         for result in tag_matched_results:
-            matched_tag = result.get("matched_tag")
-            assert matched_tag is not None, (
-                f"matched_tag should not be None for result {result['name']}"
+            matched_tags = result.get("matched_tags")
+            assert matched_tags is not None, (
+                f"matched_tags should not be None for result {result['name']}"
             )
-            assert isinstance(matched_tag, dict), (
-                f"matched_tag should be a dictionary, got {type(matched_tag)}"
+            assert isinstance(matched_tags, dict), (
+                f"matched_tags should be a dictionary, got {type(matched_tags)}"
             )
-            # matched_tag should be a dictionary with key:value format
-            assert len(matched_tag) > 0, "matched_tag should not be empty"
-            assert len(matched_tag) == 1, (
-                f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
+            # matched_tags should be a non-empty dict for tag-matched results
+            assert len(matched_tags) > 0, (
+                "matched_tags should not be empty for tag matches"
             )
 
         logger.debug(
-            f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}"
+            f"Found {len(tag_matched_results)} results with matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) for r in tag_matched_results]}"
         )
 
-    def test_search_matched_tag_fuzzy_match(self, search_test_app):
-        """Test that matched_tag field is present when a tag matches via fuzzy matching"""
+    def test_search_matched_tags_multiple_tags(self, search_test_app):
+        """Test that multiple matching tags are returned in matched_tags"""
+        # Search for "a" which should match:
+        # - Names containing "a" (e.g., user_training_dataset, data sources)
+        # - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value)
+        response = search_test_app.get("/search?query=a")
+        logger.info(response.json())
+        assert response.status_code == 200
+
+        data = response.json()
+        results = data["results"]
+
+        # Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"}
+        # "team" contains "a", "data" contains "a", "training" contains "a"
+        # So matched_tags should have at least 2 entries: "purpose" and "team"
+        dataset_results = [
+            r for r in results if r.get("name") == "user_training_dataset"
+        ]
+
+        assert len(dataset_results) > 0, (
+            "Expected to find user_training_dataset in results"
+        )
+
+        dataset_result = dataset_results[0]
+        matched_tags = dataset_result.get("matched_tags", {})
+
+        assert isinstance(matched_tags, dict), (
+            f"matched_tags should be a dictionary, got {type(matched_tags)}"
+        )
+
+        # Should have multiple matching tags: "purpose" and "team"
+        assert len(matched_tags) >= 2, (
+            f"Expected at least 2 matching tags for 'a' query, got {len(matched_tags)}: {matched_tags}"
+        )
+
+        # Verify the expected tags are present
+        assert "team" in matched_tags and "purpose" in matched_tags, (
+            f"Expected 'team' and 'purpose' in matched_tags, got: {matched_tags}"
+        )
+
+        logger.debug(f"user_training_dataset matched_tags: {matched_tags}")
+
+    def test_search_matched_tags_fuzzy_match(self, search_test_app):
+        """Test that matched_tags field is present when a tag matches via fuzzy matching"""
         # Search for "te" which should fuzzy match tag key "team"
         # "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
         # Try "tea" which should fuzzy match "team" better
@@ -789,7 +830,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
             for r in results
             if r.get("match_score", 0) >= 40
             and r.get("match_score", 0) < 60
-            and "matched_tag" in r
+            and "matched_tags" in r
         ]
 
         # If we don't find fuzzy matches, try a different query that's more likely to match
@@ -805,30 +846,29 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
                 for r in results
                 if r.get("match_score", 0) >= 40
                 and r.get("match_score", 0) < 60
-                and "matched_tag" in r
+                and "matched_tags" in r
             ]
 
         if len(fuzzy_tag_matched_results) > 0:
-            # Verify matched_tag is present for fuzzy matches
+            # Verify matched_tags is present for fuzzy matches
             for result in fuzzy_tag_matched_results:
-                matched_tag = result.get("matched_tag")
-                assert matched_tag is not None, (
-                    f"matched_tag should not be None for fuzzy-matched result {result['name']}"
+                matched_tags = result.get("matched_tags")
+                assert matched_tags is not None, (
+                    f"matched_tags should not be None for fuzzy-matched result {result['name']}"
                 )
-                assert isinstance(matched_tag, dict), (
-                    f"matched_tag should be a dictionary, got {type(matched_tag)}"
+                assert isinstance(matched_tags, dict), (
+                    f"matched_tags should be a dictionary, got {type(matched_tags)}"
                 )
-                assert len(matched_tag) > 0, "matched_tag should not be empty"
-                assert len(matched_tag) == 1, (
-                    f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}"
+                assert len(matched_tags) > 0, (
+                    "matched_tags should not be empty for fuzzy tag matches"
                 )
                 # Verify the match_score is in the fuzzy range
                 assert 40 <= result.get("match_score", 0) < 60, (
                     f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}"
                 )
 
             logger.debug(
-                f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
+                f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tags: {[r['name'] + ' -> ' + str(r.get('matched_tags', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}"
             )
 
     def test_search_sorting_functionality(self, shared_search_responses):