Skip to content

Commit fd731d7

Browse files
committed
refactor cache settings
1 parent ec9a9db commit fd731d7

File tree

2 files changed

+10
-18
lines changed

2 files changed

+10
-18
lines changed

llama_cpp/llama.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -759,10 +759,6 @@ def __init__(
759759
# Chat Format Params
760760
chat_format: str = "llama-2",
761761
chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None,
762-
# Cache
763-
cache: bool = False,
764-
cache_type: str = "ram",
765-
cache_size: int = 2 << 30,
766762
# Misc
767763
verbose: bool = True,
768764
# Extra Params
@@ -820,9 +816,6 @@ def __init__(
820816
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
821817
chat_format: String specifying the chat format to use when calling create_chat_completion.
822818
chat_handler: Optional chat handler to use when calling create_chat_completion.
823-
cache: Optional if true enables caching.
824-
cache_type: String can be "ram" or "disk".
825-
cache_size: Number of bytes to cache, defaults to 2GB
826819
verbose: Print verbose output to stderr.
827820
828821
Raises:
@@ -965,17 +958,6 @@ def __init__(
965958
(n_ctx, self._n_vocab), dtype=np.single
966959
)
967960

968-
if cache:
969-
if cache_type == "disk":
970-
if verbose:
971-
print(f"Using disk cache with size {cache_size}")
972-
cache = LlamaDiskCache(capacity_bytes=cache_size)
973-
else:
974-
if verbose:
975-
print(f"Using ram cache with size {cache_size}")
976-
cache = LlamaRAMCache(capacity_bytes=cache_size)
977-
self.set_cache(cache)
978-
979961
@property
980962
def ctx(self) -> llama_cpp.llama_context_p:
981963
assert self._ctx.ctx is not None

llama_cpp/server/model.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,16 @@ def __call__(self, model: Optional[str] = None) -> llama_cpp.Llama:
7878
chat_handler=chat_handler
7979
)
8080
self._model.alias = model
81+
if settings.cache:
82+
if settings.cache_type == "disk":
83+
if settings.verbose:
84+
print(f"Using disk cache with size {settings.cache_size}")
85+
cache = llama_cpp.LlamaDiskCache(capacity_bytes=settings.cache_size)
86+
else:
87+
if settings.verbose:
88+
print(f"Using ram cache with size {settings.cache_size}")
89+
cache = llama_cpp.LlamaRAMCache(capacity_bytes=settings.cache_size)
90+
self._model.set_cache(cache)
8191
return self._model
8292

8393
def __getitem__(self, model: str):

0 commit comments

Comments
 (0)