forked from abetlen/llama-cpp-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
36 lines (31 loc) · 908 Bytes
/
main.py
File metadata and controls
36 lines (31 loc) · 908 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import llama_cpp
import llama_cpp.llama_tokenizer
llama = llama_cpp.Llama.from_pretrained(
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
filename="*q8_0.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
"Qwen/Qwen1.5-0.5B"
),
verbose=False,
)
response = llama.create_chat_completion(
messages=[{"role": "user", "content": "What is the capital of France?"}],
response_format={
"type": "json_object",
"schema": {
"type": "object",
"properties": {
"country": {"type": "string"},
"capital": {"type": "string"},
},
"required": ["country", "capital"],
},
},
stream=True,
)
for chunk in response:
delta = chunk["choices"][0]["delta"]
if "content" not in delta:
continue
print(delta["content"], end="", flush=True)
print()