3131 "hmellor/tiny-random-BambaForCausalLM" ,
3232 "ibm-granite/granite-4.0-tiny-preview" ,
3333 "tiiuae/Falcon-H1-0.5B-Base" ,
34+ "LiquidAI/LFM2-1.2B" ,
3435]
3536
3637HF_UNSUPPORTED_MODELS = [
5253 "hmellor/tiny-random-BambaForCausalLM" ,
5354 "ibm-granite/granite-4.0-tiny-preview" ,
5455 "tiiuae/Falcon-H1-0.5B-Base" ,
56+ "LiquidAI/LFM2-1.2B" ,
5557]
5658
5759FULL_CUDA_GRAPH_MODELS = [
5860 "ai21labs/Jamba-tiny-dev" ,
5961 "Zyphra/Zamba2-1.2B-instruct" ,
6062]
6163
64+ V0_UNSUPPORTED_MODELS = [
65+ "LiquidAI/LFM2-1.2B" ,
66+ ]
67+
6268# Avoid OOM
6369MAX_NUM_SEQS = 4
6470
@@ -94,9 +100,12 @@ def test_models(
94100 else :
95101 hf_outputs = None
96102
97- with vllm_runner (model , max_num_seqs = MAX_NUM_SEQS ) as vllm_model :
98- vllm_v0_outputs = vllm_model .generate_greedy_logprobs (
99- example_prompts , max_tokens , num_logprobs )
103+ if model not in V0_UNSUPPORTED_MODELS :
104+ with vllm_runner (model , max_num_seqs = MAX_NUM_SEQS ) as vllm_model :
105+ vllm_v0_outputs = vllm_model .generate_greedy_logprobs (
106+ example_prompts , max_tokens , num_logprobs )
107+ else :
108+ vllm_v0_outputs = None
100109
101110 if model in V1_SUPPORTED_MODELS :
102111 with monkeypatch .context () as m :
@@ -112,7 +121,7 @@ def test_models(
112121 else :
113122 vllm_v1_outputs = None
114123
115- if hf_outputs is not None :
124+ if hf_outputs is not None and vllm_v0_outputs is not None :
116125 check_logprobs_close (
117126 outputs_0_lst = hf_outputs ,
118127 outputs_1_lst = vllm_v0_outputs ,
@@ -122,6 +131,7 @@ def test_models(
122131
123132 if model in V1_SUPPORTED_MODELS :
124133 ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
134+ assert ref_outputs is not None
125135 check_logprobs_close (
126136 outputs_0_lst = ref_outputs ,
127137 outputs_1_lst = vllm_v1_outputs ,
@@ -140,6 +150,9 @@ def test_batching(
140150 max_tokens : int ,
141151 num_logprobs : int ,
142152) -> None :
153+ if model in V0_UNSUPPORTED_MODELS :
154+ pytest .skip (
155+ f"Unsupported V0 Engine. Skipping `test_batching` on { model } ." )
143156
144157 try :
145158 model_info = HF_EXAMPLE_MODELS .find_hf_info (model )
@@ -392,9 +405,12 @@ def test_full_cuda_graph(
392405 else :
393406 hf_outputs = None
394407
395- with vllm_runner (model , max_num_seqs = MAX_NUM_SEQS ) as vllm_model :
396- vllm_v0_outputs = vllm_model .generate_greedy_logprobs (
397- example_prompts , max_tokens , num_logprobs )
408+ if model not in V0_UNSUPPORTED_MODELS :
409+ with vllm_runner (model , max_num_seqs = MAX_NUM_SEQS ) as vllm_model :
410+ vllm_v0_outputs = vllm_model .generate_greedy_logprobs (
411+ example_prompts , max_tokens , num_logprobs )
412+ else :
413+ vllm_v0_outputs = None
398414
399415 with monkeypatch .context () as m :
400416 m .setenv ("VLLM_USE_V1" , "1" )
@@ -408,7 +424,7 @@ def test_full_cuda_graph(
408424 vllm_v1_outputs = vllm_model .generate_greedy_logprobs (
409425 example_prompts , max_tokens , num_logprobs )
410426
411- if hf_outputs is not None :
427+ if hf_outputs is not None and vllm_v0_outputs is not None :
412428 check_logprobs_close (
413429 outputs_0_lst = hf_outputs ,
414430 outputs_1_lst = vllm_v0_outputs ,
@@ -417,6 +433,7 @@ def test_full_cuda_graph(
417433 )
418434
419435 ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
436+ assert ref_outputs is not None
420437 check_logprobs_close (
421438 outputs_0_lst = ref_outputs ,
422439 outputs_1_lst = vllm_v1_outputs ,
0 commit comments