@@ -734,8 +734,8 @@ def test_search_by_tags(self, shared_search_responses):
734734 f"Expected to find some of { expected_resources } but found none in { found_resources } "
735735 )
736736
737- def test_search_matched_tag_exact_match (self , search_test_app ):
738- """Test that matched_tag field is present when a tag matches exactly"""
737+ def test_search_matched_tags_exact_match (self , search_test_app ):
738+ """Test that matched_tags field is present when a tag matches exactly"""
739739 # Search for "data" which should match tag key "team" with value "data"
740740 response = search_test_app .get ("/search?query=data" )
741741 assert response .status_code == 200
@@ -745,34 +745,75 @@ def test_search_matched_tag_exact_match(self, search_test_app):
745745
746746 # Find results that matched via tags (match_score = 60)
747747 tag_matched_results = [
748- r for r in results if r .get ("match_score" ) == 60 and "matched_tag " in r
748+ r for r in results if r .get ("match_score" ) == 60 and "matched_tags " in r
749749 ]
750750
751751 assert len (tag_matched_results ) > 0 , (
752- "Expected to find at least one result with matched_tag from tag matching"
752+ "Expected to find at least one result with matched_tags from tag matching"
753753 )
754754
755- # Verify matched_tag is present and has a valid dictionary value
755+ # Verify matched_tags is present and has a valid dictionary value
756756 for result in tag_matched_results :
757- matched_tag = result .get ("matched_tag " )
758- assert matched_tag is not None , (
759- f"matched_tag should not be None for result { result ['name' ]} "
757+ matched_tags = result .get ("matched_tags " )
758+ assert matched_tags is not None , (
759+ f"matched_tags should not be None for result { result ['name' ]} "
760760 )
761- assert isinstance (matched_tag , dict ), (
762- f"matched_tag should be a dictionary, got { type (matched_tag )} "
761+ assert isinstance (matched_tags , dict ), (
762+ f"matched_tags should be a dictionary, got { type (matched_tags )} "
763763 )
764- # matched_tag should be a dictionary with key:value format
765- assert len (matched_tag ) > 0 , "matched_tag should not be empty"
766- assert len (matched_tag ) == 1 , (
767- f"matched_tag should contain exactly one key-value pair, got { len (matched_tag )} "
764+ # matched_tags should be a non-empty dict for tag-matched results
765+ assert len (matched_tags ) > 0 , (
766+ "matched_tags should not be empty for tag matches"
768767 )
769768
770769 logger .debug (
771- f"Found { len (tag_matched_results )} results with matched_tag : { [r ['name' ] + ' -> ' + str (r .get ('matched_tag ' , 'N/A' )) for r in tag_matched_results ]} "
770+ f"Found { len (tag_matched_results )} results with matched_tags : { [r ['name' ] + ' -> ' + str (r .get ('matched_tags ' , 'N/A' )) for r in tag_matched_results ]} "
772771 )
773772
774- def test_search_matched_tag_fuzzy_match (self , search_test_app ):
775- """Test that matched_tag field is present when a tag matches via fuzzy matching"""
773+ def test_search_matched_tags_multiple_tags (self , search_test_app ):
774+ """Test that multiple matching tags are returned in matched_tags"""
775+ # Search for "a" which should match:
776+ # - Names containing "a" (e.g., user_training_dataset, data sources)
777+ # - Tags where key/value contains "a": "team" (key), "data" (value), "training" (value)
778+ response = search_test_app .get ("/search?query=a" )
779+ logger .info (response .json ())
780+ assert response .status_code == 200
781+
782+ data = response .json ()
783+ results = data ["results" ]
784+
785+ # Find user_training_dataset which has tags: {"environment": "test", "purpose": "training", "team": "data"}
786+ # "team" contains "a", "data" contains "a", "training" contains "a"
787+ # So matched_tags should have at least 2 entries: "purpose" and "team"
788+ dataset_results = [
789+ r for r in results if r .get ("name" ) == "user_training_dataset"
790+ ]
791+
792+ assert len (dataset_results ) > 0 , (
793+ "Expected to find user_training_dataset in results"
794+ )
795+
796+ dataset_result = dataset_results [0 ]
797+ matched_tags = dataset_result .get ("matched_tags" , {})
798+
799+ assert isinstance (matched_tags , dict ), (
800+ f"matched_tags should be a dictionary, got { type (matched_tags )} "
801+ )
802+
803+ # Should have multiple matching tags: "purpose" and "team"
804+ assert len (matched_tags ) >= 2 , (
805+ f"Expected at least 2 matching tags for 'a' query, got { len (matched_tags )} : { matched_tags } "
806+ )
807+
808+ # Verify the expected tags are present
809+ assert "team" in matched_tags and "purpose" in matched_tags , (
810+ f"Expected 'team' and 'purpose' in matched_tags, got: { matched_tags } "
811+ )
812+
813+ logger .debug (f"user_training_dataset matched_tags: { matched_tags } " )
814+
815+ def test_search_matched_tags_fuzzy_match (self , search_test_app ):
816+ """Test that matched_tags field is present when a tag matches via fuzzy matching"""
776817 # Search for "te" which should fuzzy match tag key "team"
777818 # "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold)
778819 # Try "tea" which should fuzzy match "team" better
@@ -789,7 +830,7 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
789830 for r in results
790831 if r .get ("match_score" , 0 ) >= 40
791832 and r .get ("match_score" , 0 ) < 60
792- and "matched_tag " in r
833+ and "matched_tags " in r
793834 ]
794835
795836 # If we don't find fuzzy matches, try a different query that's more likely to match
@@ -805,30 +846,29 @@ def test_search_matched_tag_fuzzy_match(self, search_test_app):
805846 for r in results
806847 if r .get ("match_score" , 0 ) >= 40
807848 and r .get ("match_score" , 0 ) < 60
808- and "matched_tag " in r
849+ and "matched_tags " in r
809850 ]
810851
811852 if len (fuzzy_tag_matched_results ) > 0 :
812- # Verify matched_tag is present for fuzzy matches
853+ # Verify matched_tags is present for fuzzy matches
813854 for result in fuzzy_tag_matched_results :
814- matched_tag = result .get ("matched_tag " )
815- assert matched_tag is not None , (
816- f"matched_tag should not be None for fuzzy-matched result { result ['name' ]} "
855+ matched_tags = result .get ("matched_tags " )
856+ assert matched_tags is not None , (
857+ f"matched_tags should not be None for fuzzy-matched result { result ['name' ]} "
817858 )
818- assert isinstance (matched_tag , dict ), (
819- f"matched_tag should be a dictionary, got { type (matched_tag )} "
859+ assert isinstance (matched_tags , dict ), (
860+ f"matched_tags should be a dictionary, got { type (matched_tags )} "
820861 )
821- assert len (matched_tag ) > 0 , "matched_tag should not be empty"
822- assert len (matched_tag ) == 1 , (
823- f"matched_tag should contain exactly one key-value pair, got { len (matched_tag )} "
862+ assert len (matched_tags ) > 0 , (
863+ "matched_tags should not be empty for fuzzy tag matches"
824864 )
825865 # Verify the match_score is in the fuzzy range
826866 assert 40 <= result .get ("match_score" , 0 ) < 60 , (
827867 f"Fuzzy tag match should have score in [40, 60), got { result .get ('match_score' )} "
828868 )
829869
830870 logger .debug (
831- f"Found { len (fuzzy_tag_matched_results )} results with fuzzy matched_tag : { [r ['name' ] + ' -> ' + str (r .get ('matched_tag ' , 'N/A' )) + ' (score: ' + str (r .get ('match_score' , 'N/A' )) + ')' for r in fuzzy_tag_matched_results ]} "
871+ f"Found { len (fuzzy_tag_matched_results )} results with fuzzy matched_tags : { [r ['name' ] + ' -> ' + str (r .get ('matched_tags ' , 'N/A' )) + ' (score: ' + str (r .get ('match_score' , 'N/A' )) + ')' for r in fuzzy_tag_matched_results ]} "
832872 )
833873
834874 def test_search_sorting_functionality (self , shared_search_responses ):
0 commit comments