fix(proxy): address greptile review on nested access groups

wtfashwin · wtfashwin · commit 4f1252cb71b7 · 2026-05-17T01:13:47.000+05:30
Four issues from the bot review: 1. validate_models_exist no longer short-circuits to "all missing" when llm_router is None. In DB-only deployments (no in-memory router), known_access_groups is still authoritative for nested-group composition; only names absent from it are reported missing. This unblocks the core use case of creating pure-composition parent groups in that config. 2. get_group_memberships_from_db now catches Exception, not just AttributeError/TypeError. Transient DB/network errors degrade to an empty map instead of 500-ing model-listing requests. Matches the docstring's "resilient by design" promise. 3. Added @@index([child_group]) to LiteLLM_AccessGroupMembership across all three schema.prisma files + a new migration. Previously delete_group_membership_edges' WHERE parent_group = X OR child_group = X would full-scan on the child side as the table grew. 4. Added a 60s per-process TTL cache for the membership map. Hot-path callers (get_available_models_for_user, model_info_v1) now go through get_cached_group_memberships, which is explicitly invalidated by every write (upsert, delete edges, parent-edge clear in update_access_group). Matches the per-process cache model already used for llm_router.get_model_access_groups(). Added 9 tests covering the new validate fall-through (2), broader exception handling (1), and cache hit/miss/invalidation/TTL/error-cache semantics (6). Total nested-group tests: 61. Refs #28032
diff --git a/litellm-proxy-extras/litellm_proxy_extras/migrations/20260517100000_index_child_group_on_access_group_membership/migration.sql b/litellm-proxy-extras/litellm_proxy_extras/migrations/20260517100000_index_child_group_on_access_group_membership/migration.sql
@@ -0,0 +1,2 @@
+-- CreateIndex
+CREATE INDEX IF NOT EXISTS "LiteLLM_AccessGroupMembership_child_group_idx" ON "LiteLLM_AccessGroupMembership"("child_group");
diff --git a/litellm-proxy-extras/litellm_proxy_extras/schema.prisma b/litellm-proxy-extras/litellm_proxy_extras/schema.prisma
@@ -1388,4 +1388,5 @@ model LiteLLM_AccessGroupMembership {
 
   @@unique([parent_group, child_group])
   @@index([parent_group])
+  @@index([child_group])
 }
diff --git a/litellm/proxy/management_endpoints/model_access_group_management_endpoints.py b/litellm/proxy/management_endpoints/model_access_group_management_endpoints.py
@@ -6,6 +6,7 @@
 """
 
 import json
+import time
 from typing import Any, Dict, List, Optional, Set, Tuple
 
 from fastapi import APIRouter, Depends, HTTPException
@@ -32,6 +33,29 @@
 router = APIRouter()
 
 
+# ---------------------------------------------------------------------------
+# Per-process membership-map cache
+# ---------------------------------------------------------------------------
+# get_group_memberships_from_db is called on every /v1/models and /model/info
+# request via get_available_models_for_user. Without a cache that's one extra
+# Prisma roundtrip per request - bad under burst traffic. We cache the map
+# in process memory for a short TTL and invalidate explicitly on writes
+# (upsert/delete) so the consistency window inside the writing process is
+# zero. Across processes, eventual consistency is bounded by the TTL -
+# matches today's behavior for llm_router.get_model_access_groups() which is
+# also per-process.
+
+_MEMBERSHIPS_CACHE_TTL_SECONDS = 60.0
+_MEMBERSHIPS_CACHE: Optional[Tuple[float, Dict[str, List[str]]]] = None
+
+
+def invalidate_group_memberships_cache() -> None:
+    """Drop the in-process membership cache. Call after any write that
+    mutates the LiteLLM_AccessGroupMembership table."""
+    global _MEMBERSHIPS_CACHE
+    _MEMBERSHIPS_CACHE = None
+
+
 def validate_models_exist(
     model_names: List[str],
     llm_router,
@@ -44,11 +68,17 @@ def validate_models_exist(
     Returns:
         Tuple[bool, List[str]]: (all_valid, missing_names)
     """
+    known_groups = known_access_groups or set()
+
     if llm_router is None:
-        return False, model_names
+        # DB-only deployment: no in-memory router means we cannot validate
+        # real model names, but known_access_groups is still authoritative
+        # for nested-group composition. Anything not in known_groups is
+        # reported as missing (fail-closed).
+        missing = [m for m in model_names if m not in known_groups]
+        return (len(missing) == 0, missing)
 
     router_model_names = set(llm_router.get_model_names())
-    known_groups = known_access_groups or set()
     missing = [
         m for m in model_names if m not in router_model_names and m not in known_groups
     ]
@@ -87,17 +117,18 @@ async def get_group_memberships_from_db(
     Build parent_group -> [child_groups] map from the membership table.
     Single query, in-memory bucketing - no N+1.
 
-    Resilient by design: if the table isn't available (Prisma client predates
-    this migration, the proxy started before `prisma migrate deploy` finished,
-    or the membership Prisma model was stripped from a downstream build) we
-    return an empty map. The auth path then falls back to today's flat-group
-    semantics instead of 500-ing the whole request.
+    Resilient by design: any failure to read the membership table (missing
+    Prisma model, migration race, transient DB/network error, query timeout)
+    degrades to an empty map. The auth path then falls back to today's
+    flat-group semantics instead of 500-ing the whole request. We log at
+    debug so ops can correlate fallback periods with incidents without
+    drowning normal traffic in warnings.
     """
     try:
         rows = await prisma_client.db.litellm_accessgroupmembership.find_many()
-    except (AttributeError, TypeError) as e:
+    except Exception as e:  # noqa: BLE001 - intentional broad catch on auth path
         verbose_proxy_logger.debug(
-            "litellm_accessgroupmembership unavailable - "
+            "litellm_accessgroupmembership read failed - "
             "skipping nested group resolution: %s",
             e,
         )
@@ -109,6 +140,25 @@ async def get_group_memberships_from_db(
     return memberships
 
 
+async def get_cached_group_memberships(
+    prisma_client: PrismaClient,
+) -> Dict[str, List[str]]:
+    """
+    TTL-cached wrapper around get_group_memberships_from_db. Hot-path
+    callers (model-listing endpoints) should use this; tests and write
+    paths that need fresh data can call the underlying helper directly.
+    """
+    global _MEMBERSHIPS_CACHE
+    now = time.monotonic()
+    if _MEMBERSHIPS_CACHE is not None:
+        cached_at, value = _MEMBERSHIPS_CACHE
+        if now - cached_at < _MEMBERSHIPS_CACHE_TTL_SECONDS:
+            return value
+    fresh = await get_group_memberships_from_db(prisma_client=prisma_client)
+    _MEMBERSHIPS_CACHE = (now, fresh)
+    return fresh
+
+
 async def upsert_group_memberships(
     parent_group: str,
     child_groups: List[str],
@@ -142,6 +192,7 @@ async def upsert_group_memberships(
         data=rows,
         skip_duplicates=True,
     )
+    invalidate_group_memberships_cache()
     return result
 
 
@@ -164,6 +215,7 @@ async def delete_group_membership_edges(
             ]
         }
     )
+    invalidate_group_memberships_cache()
     return result
 
 
@@ -850,6 +902,7 @@ async def update_access_group(
         await prisma_client.db.litellm_accessgroupmembership.delete_many(
             where={"parent_group": access_group}
         )
+        invalidate_group_memberships_cache()
 
         # Step 2: re-add membership using the appropriate write path
         if use_model_ids:
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -367,7 +367,7 @@ def generate_feedback_box():
     router as key_management_router,
 )
 from litellm.proxy.management_endpoints.model_access_group_management_endpoints import (
-    get_group_memberships_from_db,
+    get_cached_group_memberships,
     router as model_access_group_management_router,
 )
 from litellm.proxy.management_endpoints.model_management_endpoints import (
@@ -11978,11 +11978,11 @@ async def model_info_v1(  # noqa: PLR0915
         proxy_model_list = llm_router.get_model_names()
         model_access_groups = llm_router.get_model_access_groups()
 
-    # Parent->child edges for nested access groups. Empty when no DB is
-    # configured, preserving today's flat behavior.
+    # Parent->child edges for nested access groups (TTL-cached per process).
+    # Empty when no DB is configured, preserving today's flat behavior.
     group_memberships: Dict[str, List[str]] = {}
     if prisma_client is not None:
-        group_memberships = await get_group_memberships_from_db(
+        group_memberships = await get_cached_group_memberships(
             prisma_client=prisma_client
         )
 
diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
@@ -1388,4 +1388,5 @@ model LiteLLM_AccessGroupMembership {
 
   @@unique([parent_group, child_group])
   @@index([parent_group])
+  @@index([child_group])
 }
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -5848,7 +5848,7 @@ async def get_available_models_for_user(
         get_team_models,
     )
     from litellm.proxy.management_endpoints.model_access_group_management_endpoints import (
-        get_group_memberships_from_db,
+        get_cached_group_memberships,
     )
     from litellm.proxy.management_endpoints.team_endpoints import validate_membership
 
@@ -5860,11 +5860,12 @@ async def get_available_models_for_user(
         proxy_model_list = llm_router.get_model_names()
         model_access_groups = llm_router.get_model_access_groups()
 
-    # Parent->child edges for nested access groups. Empty when no DB is
-    # configured (e.g. SDK-only mode), preserving today's flat behavior.
+    # Parent->child edges for nested access groups (TTL-cached per process).
+    # Empty when no DB is configured (e.g. SDK-only mode), preserving
+    # today's flat behavior.
     group_memberships: Dict[str, List[str]] = {}
     if prisma_client is not None:
-        group_memberships = await get_group_memberships_from_db(
+        group_memberships = await get_cached_group_memberships(
             prisma_client=prisma_client
         )
 
diff --git a/schema.prisma b/schema.prisma
@@ -1388,4 +1388,5 @@ model LiteLLM_AccessGroupMembership {
 
   @@unique([parent_group, child_group])
   @@index([parent_group])
+  @@index([child_group])
 }
diff --git a/tests/test_litellm/proxy/auth/test_nested_access_groups.py b/tests/test_litellm/proxy/auth/test_nested_access_groups.py
@@ -541,16 +541,29 @@ def get_model_names(self):
     assert missing == ["z-missing", "y-missing"]
 
 
-def test_validate_models_exist_with_null_router_returns_false():
-    """No router - everything reports as missing (matches today's defensive behavior)."""
+def test_validate_models_exist_with_null_router_still_accepts_known_groups():
+    """DB-only deployment: llm_router is None but known_access_groups is still authoritative
+    for nested-group composition - only names not in known_groups are reported missing.
+    """
+    all_valid, missing = validate_models_exist(
+        model_names=["image", "reasoning"],
+        llm_router=None,
+        known_access_groups={"image", "reasoning"},
+    )
+    assert all_valid is True
+    assert missing == []
+
+
+def test_validate_models_exist_with_null_router_rejects_unknown_real_models():
+    """Without a router we can't validate real model names, so anything not in
+    known_access_groups is fail-closed reported as missing."""
     all_valid, missing = validate_models_exist(
-        model_names=["any"],
+        model_names=["gpt-4", "image"],
         llm_router=None,
-        known_access_groups={"any"},
+        known_access_groups={"image"},
     )
-    # Without a router we can't say what's a model, so we fall back to fail-closed
     assert all_valid is False
-    assert missing == ["any"]
+    assert missing == ["gpt-4"]
 
 
 def test_resolve_with_empty_models_and_empty_memberships_returns_empty():
diff --git a/tests/test_litellm/proxy/auth/test_nested_access_groups_cache.py b/tests/test_litellm/proxy/auth/test_nested_access_groups_cache.py
@@ -0,0 +1,150 @@
+"""
+Cache-behavior tests for the nested-access-group membership map (#28032).
+
+Hot-path callers go through get_cached_group_memberships() which TTL-caches
+get_group_memberships_from_db() and is invalidated by every membership
+write. These tests pin the cache hit/miss/invalidation semantics so the
+optimization can't silently break later.
+"""
+
+import os
+import sys
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+sys.path.insert(0, os.path.abspath("../../.."))
+
+import pytest
+
+import litellm.proxy.management_endpoints.model_access_group_management_endpoints as mgmt
+from litellm.proxy.management_endpoints.model_access_group_management_endpoints import (
+    delete_group_membership_edges,
+    get_cached_group_memberships,
+    invalidate_group_memberships_cache,
+    upsert_group_memberships,
+)
+
+
+def _row(parent: str, child: str) -> SimpleNamespace:
+    return SimpleNamespace(parent_group=parent, child_group=child)
+
+
+def _make_prisma(membership_rows=None):
+    membership_rows = membership_rows or []
+    db = MagicMock()
+    db.litellm_accessgroupmembership = MagicMock()
+    db.litellm_accessgroupmembership.find_many = AsyncMock(return_value=membership_rows)
+    db.litellm_accessgroupmembership.create_many = AsyncMock(return_value=0)
+    db.litellm_accessgroupmembership.delete_many = AsyncMock(return_value=0)
+    client = MagicMock()
+    client.db = db
+    return client
+
+
+@pytest.fixture(autouse=True)
+def _reset_cache_between_tests():
+    """Module-level cache state must not leak between tests."""
+    invalidate_group_memberships_cache()
+    yield
+    invalidate_group_memberships_cache()
+
+
+@pytest.mark.asyncio
+async def test_cache_miss_then_hit_avoids_second_db_query():
+    prisma = _make_prisma(membership_rows=[_row("project-x", "image")])
+
+    first = await get_cached_group_memberships(prisma_client=prisma)
+    second = await get_cached_group_memberships(prisma_client=prisma)
+
+    assert first == second == {"project-x": ["image"]}
+    # Only the first call should hit the DB
+    prisma.db.litellm_accessgroupmembership.find_many.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_cache_invalidation_forces_db_refetch():
+    prisma = _make_prisma(membership_rows=[_row("project-x", "image")])
+
+    await get_cached_group_memberships(prisma_client=prisma)
+    invalidate_group_memberships_cache()
+    await get_cached_group_memberships(prisma_client=prisma)
+
+    assert prisma.db.litellm_accessgroupmembership.find_many.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_upsert_invalidates_cache():
+    """Writing edges must drop the cache so the next read sees the change."""
+    prisma = _make_prisma(membership_rows=[_row("project-x", "image")])
+    prisma.db.litellm_accessgroupmembership.create_many = AsyncMock(return_value=1)
+
+    await get_cached_group_memberships(prisma_client=prisma)  # populates cache
+    await upsert_group_memberships(
+        parent_group="project-x",
+        child_groups=["reasoning"],
+        prisma_client=prisma,
+    )
+    await get_cached_group_memberships(prisma_client=prisma)  # must re-fetch
+
+    assert prisma.db.litellm_accessgroupmembership.find_many.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_delete_edges_invalidates_cache():
+    """Deleting edges must drop the cache too."""
+    prisma = _make_prisma(membership_rows=[_row("project-x", "image")])
+    prisma.db.litellm_accessgroupmembership.delete_many = AsyncMock(return_value=1)
+
+    await get_cached_group_memberships(prisma_client=prisma)
+    await delete_group_membership_edges(access_group="project-x", prisma_client=prisma)
+    await get_cached_group_memberships(prisma_client=prisma)
+
+    assert prisma.db.litellm_accessgroupmembership.find_many.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_cache_expires_after_ttl(monkeypatch):
+    """When monotonic time advances past the TTL, the next read re-fetches."""
+    prisma = _make_prisma(membership_rows=[_row("project-x", "image")])
+
+    # Freeze time; advance past TTL between calls
+    now = [1000.0]
+    monkeypatch.setattr(mgmt.time, "monotonic", lambda: now[0])
+
+    await get_cached_group_memberships(prisma_client=prisma)
+    now[0] += mgmt._MEMBERSHIPS_CACHE_TTL_SECONDS + 1
+    await get_cached_group_memberships(prisma_client=prisma)
+
+    assert prisma.db.litellm_accessgroupmembership.find_many.await_count == 2
+
+
+@pytest.mark.asyncio
+async def test_cache_within_ttl_does_not_refetch(monkeypatch):
+    """Reads inside the TTL window stay served from cache."""
+    prisma = _make_prisma(membership_rows=[_row("project-x", "image")])
+
+    now = [1000.0]
+    monkeypatch.setattr(mgmt.time, "monotonic", lambda: now[0])
+
+    await get_cached_group_memberships(prisma_client=prisma)
+    now[0] += mgmt._MEMBERSHIPS_CACHE_TTL_SECONDS - 1
+    await get_cached_group_memberships(prisma_client=prisma)
+
+    prisma.db.litellm_accessgroupmembership.find_many.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_cache_falls_through_empty_dict_on_error_path():
+    """When the underlying helper returns {} due to a DB error, the cache
+    still stores it - we don't want to retry on every single request."""
+    prisma = _make_prisma()
+    prisma.db.litellm_accessgroupmembership.find_many = AsyncMock(
+        side_effect=ConnectionError("postgres unreachable")
+    )
+
+    first = await get_cached_group_memberships(prisma_client=prisma)
+    second = await get_cached_group_memberships(prisma_client=prisma)
+
+    assert first == second == {}
+    # Only one DB attempt; subsequent calls served from the cached {}
+    prisma.db.litellm_accessgroupmembership.find_many.assert_awaited_once()
diff --git a/tests/test_litellm/proxy/auth/test_nested_access_groups_db.py b/tests/test_litellm/proxy/auth/test_nested_access_groups_db.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+-- CreateIndex`
	`2`	`+CREATE INDEX IF NOT EXISTS "LiteLLM_AccessGroupMembership_child_group_idx" ON "LiteLLM_AccessGroupMembership"("child_group");`
Original file line number	Diff line number	Diff line change
`@@ -1388,4 +1388,5 @@ model LiteLLM_AccessGroupMembership {`
`1388`	`1388`
`1389`	`1389`	`@@unique([parent_group, child_group])`
`1390`	`1390`	`@@index([parent_group])`
	`1391`	`+ @@index([child_group])`
`1391`	`1392`	`}`