handle individual model config json

D4ve-R · D4ve-R · commit 10a2d3265572 · 2023-11-22T19:21:18.000+01:00
diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py
@@ -3,7 +3,7 @@
 from threading import Lock
 import logging
 import llama_cpp
-from llama_cpp.server.settings import Settings, get_settings
+from llama_cpp.server.settings import Settings, ModelSettings, get_settings
 
 FILE_EXT = ".gguf"
 MODEL_ENV_ARG = "MODEL"
@@ -29,22 +29,30 @@ def __init__(self, settings: Settings) -> None:
         if os.path.isfile(settings.model):
             self(settings.model.split(os.path.sep)[-1].split(FILE_EXT)[0])
 
-    def __call__(self, model: str, **kwargs: Any) -> llama_cpp.Llama:
+    def __call__(self, model: Optional[str] = None) -> llama_cpp.Llama:
+        # handle backward compatibility, model param optional
         try:
             model_path = self._models[model]
         except KeyError:
             if self._model:
-                if self._settings.verbose: logger.info(f"Model file for {model} NOT found! Using preloaded")
+                if self._settings.verbose: logger.warn(f"Model file for {model} NOT found! Using preloaded")
                 return self._model
             else: raise Exception(404, f"Model file for {model} NOT found")
-
         
         if self._model:
             if self._model.model_path == model_path:
                 return self._model
             del self._model
 
-        settings = self._settings
+        settings_path = os.path.join(os.path.dirname(model_path), 
+                                     model_path.split(os.path.sep)[-1].split(FILE_EXT)[0] + ".json")
+        try:
+            with open(settings_path, 'rb') as f:
+                settings = ModelSettings.model_validate_json(f.read())
+        except Exception as e:
+            if self._settings.verbose: logger.warn(f"Loading settings for {model} FAILED! Using default")
+            settings = self._settings
+
         self._model = llama_cpp.Llama(
             model_path=model_path,
             # Model Params
@@ -88,14 +96,13 @@ def __call__(self, model: str, **kwargs: Any) -> llama_cpp.Llama:
             cache_size=settings.cache_size,
             # Misc
             verbose=settings.verbose,
-            **kwargs
         )
         return self._model
 
-    def __getitem__(self, model):
+    def __getitem__(self, model: str) -> str:
         return self._models[model]
     
-    def __setitem__(self, model, path):
+    def __setitem__(self, model: str, path: str):
         self._models[model] = path
     
     def __iter__(self):
diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
@@ -1,20 +1,13 @@
 import multiprocessing
 from typing import Optional, List, Literal
 from pydantic import Field
-from pydantic_settings import BaseSettings
+from pydantic_settings import BaseSettings, SettingsConfigDict
 import llama_cpp
 
 # Disable warning for model and model_alias settings
 BaseSettings.model_config['protected_namespaces'] = ()
 
-class Settings(BaseSettings):
-    model: str = Field(
-        description="The path to the model to use for generating completions."
-    )
-    model_alias: Optional[str] = Field(
-        default=None,
-        description="The alias of the model to use for generating completions.",
-    )
+class ModelSettings(BaseSettings):
     # Model Params
     n_gpu_layers: int = Field(
         default=0,
@@ -133,6 +126,9 @@ class Settings(BaseSettings):
     verbose: bool = Field(
         default=True, description="Whether to print debug information."
     )
+
+class ServerSettings(BaseSettings):
+    model_config = SettingsConfigDict(env_file='.env')
     # Server Params
     host: str = Field(default="localhost", description="Listen address")
     port: int = Field(default=8000, description="Listen port")
@@ -141,6 +137,15 @@ class Settings(BaseSettings):
         description="Whether to interrupt requests when a new request is received.",
     )
 
+class Settings(ModelSettings, ServerSettings):
+    model: str = Field(
+        description="The path to the model to use for generating completions."
+    )
+    model_alias: Optional[str] = Field(
+        default=None,
+        description="The alias of the model to use for generating completions.",
+    )
+
 SETTINGS: Optional[Settings] = None
 
 def set_settings(settings: Settings):