forked from ise-uiuc/magicoder
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
141 lines (107 loc) · 4.08 KB
/
utils.py
File metadata and controls
141 lines (107 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import functools
import hashlib
import json
import os
import random
import time
from pathlib import Path
from typing import Any, Iterable, Mapping, Sequence, TypeVar
import openai
import tiktoken
N_CORES = 1 if (count := os.cpu_count()) is None or count == 0 else count // 2
def read_jsonl(path: str | Path) -> list[Any]:
"""Read lines of JSON from a file (including '\n')."""
with Path(path).open("r") as f:
return [json.loads(line) for line in f]
def write_jsonl(path: str | Path, data: Sequence[Mapping]):
# cannot use `dict` here as it is invariant
with Path(path).open("w") as f:
for item in data:
f.write(json.dumps(item) + "\n")
# def reformat_python(code: str) -> str | None:
# """Reformat Python code using Black."""
# try:
# return black.format_str(code, mode=black.Mode())
# except Exception:
# return None
_T = TypeVar("_T")
def chunked(seq: Sequence[_T], n: int) -> Iterable[Sequence[_T]]:
"""Yield successive n-sized chunks from seq."""
return (seq[i : i + n] for i in range(0, len(seq), n))
# OpenAI API access
# Use environment variables!
# openai.organization = "org-pQ4H2mEb8OUHqSkIkP8b50k6"
# openai.api_key = os.getenv("OPENAI_API_KEY")
def retry_with_exponential_backoff(
errors: tuple,
initial_delay: float = 30,
exponential_base: float = 2,
jitter: bool = True,
max_retries: int = 5,
):
"""Retry a function with exponential backoff."""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# Initialize variables
num_retries = 0
delay = initial_delay
# Loop until a successful response or max_retries is hit or an exception is raised
while True:
try:
return func(*args, **kwargs)
# Retry on specific errors
except errors as e:
print(f"Error: {e}. Retrying in {delay} seconds...")
# Increment retries
num_retries += 1
# Check if max retries has been reached
if num_retries > max_retries:
raise Exception(
f"Maximum number of retries ({max_retries}) exceeded."
)
# Increment the delay
delay *= exponential_base * (1 + jitter * random.random())
# Sleep for the delay
time.sleep(delay)
# time.sleep(60)
# Raise exceptions for any errors not specified
except Exception as e:
raise e
return wrapper
return decorator
ERRORS = (
openai.RateLimitError,
openai.APIError,
openai.APIConnectionError,
openai.InternalServerError,
)
try:
OPENAI_CLIENT: openai.OpenAI | None = openai.OpenAI(
base_url=os.getenv("OPENAI_BASE_URL")
)
except openai.OpenAIError:
OPENAI_CLIENT = None
@retry_with_exponential_backoff(ERRORS)
def chat_completions_with_backoff(*args, **kwargs):
assert OPENAI_CLIENT is not None
return OPENAI_CLIENT.chat.completions.create(*args, **kwargs)
@retry_with_exponential_backoff(ERRORS)
def completions_with_backoff(*args, **kwargs):
assert OPENAI_CLIENT is not None
return OPENAI_CLIENT.completions.create(*args, **kwargs)
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
def num_tokens_from_string(string: str, model: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.encoding_for_model(model)
# encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
def timestamp() -> str:
return time.strftime("%Y%m%d_%H%M%S")
def compute_fingerprint(*args: Any, hash_length: int | None = None) -> str:
combined = "".join(map(str, args))
content = hashlib.sha256(combined.encode()).hexdigest()
if hash_length is not None:
content = content[:hash_length]
return content