vllm-project
diff --git a/‎docs/models/supported_models.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/models/supported_models.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/models/language/generation/test_hybrid.py‎
Lines changed: 25 additions & 8 deletions b/‎tests/models/language/generation/test_hybrid.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎tests/models/registry.py‎
Lines changed: 2 additions & 0 deletions b/‎tests/models/registry.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/models/test_initialization.py‎
Lines changed: 2 additions & 0 deletions b/‎tests/models/test_initialization.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎vllm/config/compilation.py‎
Lines changed: 1 addition & 0 deletions b/‎vllm/config/compilation.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vllm/model_executor/layers/mamba/mamba_utils.py‎
Lines changed: 24 additions & 0 deletions b/‎vllm/model_executor/layers/mamba/mamba_utils.py‎
Lines changed: 24 additions & 0 deletions
@@ -373,6 +373,7 @@ th {
 | `InternLM3ForCausalLM` | InternLM3 | `internlm/internlm3-8b-instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `JAISLMHeadModel` | Jais | `inceptionai/jais-13b`, `inceptionai/jais-13b-chat`, `inceptionai/jais-30b-v3`, `inceptionai/jais-30b-chat-v3`, etc. | | ✅︎ | ✅︎ |
 | `JambaForCausalLM` | Jamba | `ai21labs/AI21-Jamba-1.5-Large`, `ai21labs/AI21-Jamba-1.5-Mini`, `ai21labs/Jamba-v0.1`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `Lfm2ForCausalLM`  | LFM2  | `LiquidAI/LFM2-1.2B`, `LiquidAI/LFM2-700M`, `LiquidAI/LFM2-350M`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `LlamaForCausalLM` | Llama 3.1, Llama 3, Llama 2, LLaMA, Yi | `meta-llama/Meta-Llama-3.1-405B-Instruct`, `meta-llama/Meta-Llama-3.1-70B`, `meta-llama/Meta-Llama-3-70B-Instruct`, `meta-llama/Llama-2-70b-hf`, `01-ai/Yi-34B`, etc. | ✅︎ | ✅︎ | ✅︎ |
 | `MambaForCausalLM` | Mamba | `state-spaces/mamba-130m-hf`, `state-spaces/mamba-790m-hf`, `state-spaces/mamba-2.8b-hf`, etc. | | ✅︎ | ✅︎ |
 | `Mamba2ForCausalLM` | Mamba2 | `mistralai/Mamba-Codestral-7B-v0.1`, etc. | | ✅︎ | ✅︎ |
 
@@ -31,6 +31,7 @@
     "hmellor/tiny-random-BambaForCausalLM",
     "ibm-granite/granite-4.0-tiny-preview",
     "tiiuae/Falcon-H1-0.5B-Base",
+    "LiquidAI/LFM2-1.2B",
 ]
 
 HF_UNSUPPORTED_MODELS = [
@@ -52,13 +53,18 @@
     "hmellor/tiny-random-BambaForCausalLM",
     "ibm-granite/granite-4.0-tiny-preview",
     "tiiuae/Falcon-H1-0.5B-Base",
+    "LiquidAI/LFM2-1.2B",
 ]
 
 FULL_CUDA_GRAPH_MODELS = [
     "ai21labs/Jamba-tiny-dev",
     "Zyphra/Zamba2-1.2B-instruct",
 ]
 
+V0_UNSUPPORTED_MODELS = [
+    "LiquidAI/LFM2-1.2B",
+]
+
 # Avoid OOM
 MAX_NUM_SEQS = 4
 
@@ -94,9 +100,12 @@ def test_models(
         else:
             hf_outputs = None
 
-    with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
-        vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
-            example_prompts, max_tokens, num_logprobs)
+    if model not in V0_UNSUPPORTED_MODELS:
+        with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
+            vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
+                example_prompts, max_tokens, num_logprobs)
+    else:
+        vllm_v0_outputs = None
 
     if model in V1_SUPPORTED_MODELS:
         with monkeypatch.context() as m:
@@ -112,7 +121,7 @@ def test_models(
     else:
         vllm_v1_outputs = None
 
-    if hf_outputs is not None:
+    if hf_outputs is not None and vllm_v0_outputs is not None:
         check_logprobs_close(
             outputs_0_lst=hf_outputs,
             outputs_1_lst=vllm_v0_outputs,
@@ -122,6 +131,7 @@ def test_models(
 
     if model in V1_SUPPORTED_MODELS:
         ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
+        assert ref_outputs is not None
         check_logprobs_close(
             outputs_0_lst=ref_outputs,
             outputs_1_lst=vllm_v1_outputs,
@@ -140,6 +150,9 @@ def test_batching(
     max_tokens: int,
     num_logprobs: int,
 ) -> None:
+    if model in V0_UNSUPPORTED_MODELS:
+        pytest.skip(
+            f"Unsupported V0 Engine. Skipping `test_batching` on {model}.")
 
     try:
         model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
@@ -392,9 +405,12 @@ def test_full_cuda_graph(
         else:
             hf_outputs = None
 
-    with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
-        vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
-            example_prompts, max_tokens, num_logprobs)
+    if model not in V0_UNSUPPORTED_MODELS:
+        with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model:
+            vllm_v0_outputs = vllm_model.generate_greedy_logprobs(
+                example_prompts, max_tokens, num_logprobs)
+    else:
+        vllm_v0_outputs = None
 
     with monkeypatch.context() as m:
         m.setenv("VLLM_USE_V1", "1")
@@ -408,7 +424,7 @@ def test_full_cuda_graph(
             vllm_v1_outputs = vllm_model.generate_greedy_logprobs(
                 example_prompts, max_tokens, num_logprobs)
 
-    if hf_outputs is not None:
+    if hf_outputs is not None and vllm_v0_outputs is not None:
         check_logprobs_close(
             outputs_0_lst=hf_outputs,
             outputs_1_lst=vllm_v0_outputs,
@@ -417,6 +433,7 @@ def test_full_cuda_graph(
         )
 
     ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
+    assert ref_outputs is not None
     check_logprobs_close(
         outputs_0_lst=ref_outputs,
         outputs_1_lst=vllm_v1_outputs,
 
@@ -230,6 +230,8 @@ def check_available_online(
                                             "tiny": "ai21labs/Jamba-tiny-dev",
                                             "random": "ai21labs/Jamba-tiny-random",  # noqa: E501
                                         }),
+    "Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B",
+                                       min_transformers_version="4.54"),
     "LlamaForCausalLM": _HfExamplesInfo("meta-llama/Llama-3.2-1B-Instruct",
                                         extras={"guard": "meta-llama/Llama-Guard-3-1B",  # noqa: E501
                                                 "hermes": "NousResearch/Hermes-3-Llama-3.1-8B", # noqa: E501
 
@@ -95,6 +95,8 @@ def _initialize_kv_caches_v1(self, vllm_config):
 
 @pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs())
 def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
+    if model_arch == "Lfm2ForCausalLM":
+        pytest.skip("Skipping until test supports V1-only models")
     can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)
 
 
 
@@ -337,6 +337,7 @@ class CompilationConfig:
         "vllm.unified_attention_with_output",
         "vllm.mamba_mixer2",
         "vllm.mamba_mixer",
+        "vllm.short_conv",
     ]
 
     def compute_hash(self) -> str:
 
@@ -54,6 +54,16 @@ def mamba2_state_dtype(
 
         return (conv_state_dtype, temporal_state_dtype)
 
+    @classmethod
+    def short_conv_state_dtype(
+        cls,
+        model_dtype: Union[ModelDType, torch.dtype],
+        mamba_cache_dtype: MambaDType,
+    ) -> tuple[torch.dtype, ...]:
+        conv_state_dtype = get_kv_cache_torch_dtype(mamba_cache_dtype,
+                                                    model_dtype)
+        return (conv_state_dtype, )
+
 
 class MambaStateShapeCalculator:
 
@@ -122,6 +132,20 @@ def mamba2_state_shape(
                                        tp_world_size), head_dim, state_size)
         return conv_state_shape, temporal_state_shape
 
+    @classmethod
+    def short_conv_state_shape(
+        cls,
+        tp_world_size: int,
+        intermediate_size: int,
+        conv_kernel: int,
+        use_v1: bool = True,
+    ) -> tuple[tuple[int, int]]:
+        conv_dim = divide(intermediate_size, tp_world_size)
+        conv_state_shape = (conv_kernel - 1, conv_dim)
+        if not use_v1:
+            conv_state_shape = conv_state_shape[1], conv_state_shape[0]
+        return (conv_state_shape, )
+
     @classmethod
     def extra_groups_for_head_shards(cls, ngroups: int, tp_size: int):
         """Compute the increase in group numbers to account for
Original file line number	Diff line number	Diff line change
`@@ -337,6 +337,7 @@ class CompilationConfig:`
`337`	`337`	`"vllm.unified_attention_with_output",`
`338`	`338`	`"vllm.mamba_mixer2",`
`339`	`339`	`"vllm.mamba_mixer",`
	`340`	`+ "vllm.short_conv",`
`340`	`341`	`]`
`341`	`342`
`342`	`343`	`def compute_hash(self) -> str:`