Add tests

JunyiXu-nv · JunyiXu-nv · commit 1378f314dbdf · 2025-11-24T18:21:12.000+08:00
Signed-off-by: Junyi Xu &lt;219237550+JunyiXu-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/serve/responses_utils.py b/tensorrt_llm/serve/responses_utils.py
@@ -620,6 +620,46 @@ def finish_reason_mapping(finish_reason: str) -> str:
     raise RuntimeError("Should never reach here!")
 
 
+def _response_output_item_to_chat_completion_message(
+        item: Union[Dict,
+                    ResponseInputOutputItem]) -> ChatCompletionMessageParam:
+    if not isinstance(item, dict):
+        item = item.model_dump()
+
+    item_type = item.get("type", "")
+
+    match item_type:
+        case "":
+            if "role" in item:
+                return item
+            else:
+                raise ValueError(f"Invalid input message item: {item}")
+        case "message":
+            return {
+                "role": "assistant",
+                "content": item["content"][0]["text"],
+            }
+        case "reasoning":
+            return {
+                "role": "assistant",
+                "reasoning": item["content"][0]["text"],
+            }
+        case "function_call":
+            return {
+                "role": "function",
+                "content": item["arguments"],
+            }
+        case "function_call_output":
+            return {
+                "role": "tool",
+                "content": item["output"],
+                "tool_call_id": item["call_id"],
+            }
+        case _:
+            raise ValueError(
+                f"Unsupported input item type: {item_type}, item: {item}")
+
+
 async def _create_input_messages(
     request: ResponsesRequest,
     prev_msgs: List[ChatCompletionMessageParam],
@@ -643,15 +683,8 @@ async def _create_input_messages(
         messages.append({"role": "user", "content": request.input})
     else:
         for inp in request.input:
-            if inp.get("type", "") == "function_call_output":
-                tool_call_inp = {
-                    "role": "tool",
-                    "content": inp["output"],
-                    "tool_call_id": inp["call_id"],
-                }
-                messages.append(tool_call_inp)
-            else:
-                messages.append(inp)
+            messages.append(
+                _response_output_item_to_chat_completion_message(inp))
 
     return messages
 
@@ -824,7 +857,7 @@ async def request_preprocess(
         sampling_params.return_perf_metrics = True
 
     prev_msgs = []
-    if enable_store:
+    if enable_store and prev_response_id is not None:
         prev_msgs = await conversation_store.get_conversation_history(
             prev_response_id)
 
diff --git a/tests/unittest/llmapi/apps/_test_openai_responses.py b/tests/unittest/llmapi/apps/_test_openai_responses.py
@@ -12,15 +12,29 @@
 pytestmark = pytest.mark.threadleak(enabled=False)
 
 
-@pytest.fixture(scope="module", ids=["GPT-OSS-20B"])
-def model():
-    return "gpt_oss/gpt-oss-20b/"
+@pytest.fixture(scope="module",
+                params=[
+                    "gpt_oss/gpt-oss-20b", "DeepSeek-R1-Distill-Qwen-1.5B",
+                    "Qwen3/Qwen3-0.6B"
+                ])
+def model(request):
+    return request.param
 
 
 @pytest.fixture(scope="module")
 def server(model: str):
     model_path = get_model_path(model)
-    with RemoteOpenAIServer(model_path) as remote_server:
+
+    args = []
+    if model.startswith("Qwen3"):
+        args.extend(["--reasoning_parser", "qwen3"])
+    elif model.startswith("DeepSeek-R1"):
+        args.extend(["--reasoning_parser", "deepseek-r1"])
+
+    if not model.startswith("gpt_oss"):
+        args.extend(["--tool_parser", "qwen3"])
+
+    with RemoteOpenAIServer(model_path, args) as remote_server:
         yield remote_server
 
 
@@ -43,24 +57,30 @@ def check_reponse(response, prefix=""):
 
 def check_tool_calling(response, first_resp=True, prefix=""):
     reasoning_exist, tool_call_exist, message_exist = False, False, False
+    reasoning_content, message_content = "", ""
     function_call = None
     for output in response.output:
         if output.type == "reasoning":
             reasoning_exist = True
+            reasoning_content = output.content[0].text
         elif output.type == "function_call":
             tool_call_exist = True
             function_call = output
         elif output.type == "message":
             message_exist = True
+            message_content = output.content[0].text
 
+    err_msg = f"{prefix}Invalid tool calling {'1st' if first_resp else '2nd'} response:"
     if first_resp:
-        assert reasoning_exist and tool_call_exist, f"{prefix}Invalid tool calling 1st response"
-        assert not message_exist, f"{prefix}Invalid tool calling 1st response"
+        assert reasoning_exist, f"{err_msg} reasoning content not exists! ({reasoning_content})"
+        assert tool_call_exist, f"{err_msg} tool call content not exists! ({function_call})"
+        assert not message_exist, f"{err_msg} message content should not exist! ({message_content})"
 
         return function_call
     else:
-        assert reasoning_exist and message_exist, f"{prefix}Invalid tool calling 2nd response"
-        assert not tool_call_exist, f"{prefix}Invalid tool calling 2nd response"
+        assert reasoning_exist, f"{err_msg} reasoning content not exists! ({reasoning_content})"
+        assert message_exist, f"{err_msg} message content not exists! ({message_content})"
+        assert not tool_call_exist, f"{err_msg} tool call content should not exist! ({function_call})"
 
 
 @pytest.mark.asyncio(loop_scope="module")
@@ -124,6 +144,9 @@ def get_current_weather(location: str, format: str = "celsius") -> dict:
 
 @pytest.mark.asyncio(loop_scope="module")
 async def test_tool_calls(client: openai.AsyncOpenAI, model: str):
+    if model.startswith("DeepSeek-R1"):
+        pytest.skip("DeepSeek-R1 does not support tool calls")
+
     tool_get_current_weather = {
         "type": "function",
         "name": "get_current_weather",
@@ -193,6 +216,9 @@ async def test_streaming(client: openai.AsyncOpenAI, model: str):
 
 @pytest.mark.asyncio(loop_scope="module")
 async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
+    if model.startswith("DeepSeek-R1"):
+        pytest.skip("DeepSeek-R1 does not support tool calls")
+
     tool_get_current_weather = {
         "type": "function",
         "name": "get_current_weather",
@@ -231,6 +257,8 @@ async def test_streaming_tool_call(client: openai.AsyncOpenAI, model: str):
         elif isinstance(event, ResponseReasoningTextDeltaEvent):
             reasoning_deltas.append(event.delta)
 
+    assert function_call is not None, "function call not exists!"
+
     reasoning = "".join(reasoning_deltas)
     tool_args = json.loads(function_call.arguments)