-
Notifications
You must be signed in to change notification settings - Fork 107
Expand file tree
/
Copy pathllm.py
More file actions
59 lines (49 loc) · 1.95 KB
/
llm.py
File metadata and controls
59 lines (49 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from __future__ import annotations
import sys
import time
from abc import ABC, abstractmethod
import anthropic
from felderize.config import Config
class LLMClient(ABC):
@abstractmethod
def translate(self, system_prompt: str, user_prompt: str) -> str:
"""Send a translation request and return the raw response text."""
class AnthropicClient(LLMClient):
def __init__(self, config: Config):
self.client = anthropic.Anthropic(
api_key=config.api_key, base_url=config.base_url
)
self.model = config.model
def translate(self, system_prompt: str, user_prompt: str) -> str:
for attempt in range(5):
try:
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
system=[
{
"type": "text",
"text": system_prompt,
"cache_control": {"type": "ephemeral"},
}
],
messages=[{"role": "user", "content": user_prompt}],
)
u = response.usage
print(
f" llm: input={u.input_tokens} "
f"cache_read={getattr(u, 'cache_read_input_tokens', 0)} "
f"cache_write={getattr(u, 'cache_creation_input_tokens', 0)} "
f"output={u.output_tokens}",
file=sys.stderr,
)
return response.content[0].text
except anthropic.RateLimitError:
if attempt == 4:
raise
wait = 60 * (attempt + 1)
print(f"Rate limited — waiting {wait}s before retry...", flush=True)
time.sleep(wait)
raise AssertionError("unreachable")
def create_client(config: Config) -> LLMClient:
return AnthropicClient(config)