33from threading import Lock
44import logging
55import llama_cpp
6- from llama_cpp .server .settings import Settings , get_settings
6+ from llama_cpp .server .settings import Settings , ModelSettings , get_settings
77
88FILE_EXT = ".gguf"
99MODEL_ENV_ARG = "MODEL"
@@ -29,22 +29,30 @@ def __init__(self, settings: Settings) -> None:
2929 if os .path .isfile (settings .model ):
3030 self (settings .model .split (os .path .sep )[- 1 ].split (FILE_EXT )[0 ])
3131
32- def __call__ (self , model : str , ** kwargs : Any ) -> llama_cpp .Llama :
32+ def __call__ (self , model : Optional [str ] = None ) -> llama_cpp .Llama :
33+ # handle backward compatibility, model param optional
3334 try :
3435 model_path = self ._models [model ]
3536 except KeyError :
3637 if self ._model :
37- if self ._settings .verbose : logger .info (f"Model file for { model } NOT found! Using preloaded" )
38+ if self ._settings .verbose : logger .warn (f"Model file for { model } NOT found! Using preloaded" )
3839 return self ._model
3940 else : raise Exception (404 , f"Model file for { model } NOT found" )
40-
4141
4242 if self ._model :
4343 if self ._model .model_path == model_path :
4444 return self ._model
4545 del self ._model
4646
47- settings = self ._settings
47+ settings_path = os .path .join (os .path .dirname (model_path ),
48+ model_path .split (os .path .sep )[- 1 ].split (FILE_EXT )[0 ] + ".json" )
49+ try :
50+ with open (settings_path , 'rb' ) as f :
51+ settings = ModelSettings .model_validate_json (f .read ())
52+ except Exception as e :
53+ if self ._settings .verbose : logger .warn (f"Loading settings for { model } FAILED! Using default" )
54+ settings = self ._settings
55+
4856 self ._model = llama_cpp .Llama (
4957 model_path = model_path ,
5058 # Model Params
@@ -88,14 +96,13 @@ def __call__(self, model: str, **kwargs: Any) -> llama_cpp.Llama:
8896 cache_size = settings .cache_size ,
8997 # Misc
9098 verbose = settings .verbose ,
91- ** kwargs
9299 )
93100 return self ._model
94101
95- def __getitem__ (self , model ) :
102+ def __getitem__ (self , model : str ) -> str :
96103 return self ._models [model ]
97104
98- def __setitem__ (self , model , path ):
105+ def __setitem__ (self , model : str , path : str ):
99106 self ._models [model ] = path
100107
101108 def __iter__ (self ):
0 commit comments