-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy pathinstructor-example.py
More file actions
329 lines (268 loc) · 9.7 KB
/
instructor-example.py
File metadata and controls
329 lines (268 loc) · 9.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#!/usr/bin/env python3
"""
Instructor Library Example
Demonstrates using Instructor for structured outputs from LLMs.
Instructor adds type-safety and validation to LLM responses using Pydantic models.
Requirements:
pip install instructor openai pydantic python-dotenv
Setup:
1. Create a .env file in the project root
2. Add your OpenAI API key: OPENAI_API_KEY=sk-...
"""
import os
from dotenv import load_dotenv
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field, validator
from typing import List, Optional, Literal
from enum import Enum
# Load environment variables
load_dotenv()
# Patch OpenAI client with instructor
client = instructor.from_openai(OpenAI(api_key=os.getenv("OPENAI_API_KEY")))
def basic_extraction():
"""Extract structured data from text."""
print("=" * 60)
print("1. Basic Data Extraction")
print("=" * 60)
class UserInfo(BaseModel):
"""User information extracted from text."""
name: str
age: int
email: str
occupation: Optional[str] = None
text = "John Doe is 30 years old. His email is john@example.com and he works as a software engineer."
user = client.chat.completions.create(
model="gpt-4o-mini",
response_model=UserInfo,
messages=[
{"role": "user", "content": f"Extract user information from: {text}"}
]
)
print(f"Input: {text}\n")
print(f"Extracted Data:")
print(f" Name: {user.name}")
print(f" Age: {user.age}")
print(f" Email: {user.email}")
print(f" Occupation: {user.occupation}\n")
def with_validation():
"""Use Pydantic validators for data validation."""
print("=" * 60)
print("2. Data Validation")
print("=" * 60)
class ValidatedUser(BaseModel):
"""User with validated fields."""
name: str = Field(..., min_length=2)
age: int = Field(..., ge=0, le=120)
email: str
@validator('email')
def validate_email(cls, v):
if '@' not in v:
raise ValueError('Invalid email format')
return v
try:
user = client.chat.completions.create(
model="gpt-4o-mini",
response_model=ValidatedUser,
messages=[
{"role": "user", "content": "Extract: Jane Smith, 25, jane.smith@email.com"}
]
)
print(f"✓ Valid user extracted:")
print(f" {user.name}, {user.age}, {user.email}\n")
except Exception as e:
print(f"✗ Validation error: {e}\n")
def extract_list():
"""Extract lists of items."""
print("=" * 60)
print("3. List Extraction")
print("=" * 60)
class Task(BaseModel):
"""A single task."""
title: str
priority: Literal["high", "medium", "low"]
estimated_hours: Optional[int] = None
class TaskList(BaseModel):
"""List of tasks."""
tasks: List[Task]
text = """
I need to:
1. Fix the authentication bug (urgent, 3 hours)
2. Write documentation (medium priority)
3. Review pull requests (low priority, 1 hour)
"""
result = client.chat.completions.create(
model="gpt-4o-mini",
response_model=TaskList,
messages=[
{"role": "user", "content": f"Extract tasks from: {text}"}
]
)
print("Extracted Tasks:")
for i, task in enumerate(result.tasks, 1):
hours = f" ({task.estimated_hours}h)" if task.estimated_hours else ""
print(f" {i}. {task.title} - Priority: {task.priority}{hours}")
print()
def nested_models():
"""Work with nested Pydantic models."""
print("=" * 60)
print("4. Nested Models")
print("=" * 60)
class Address(BaseModel):
"""Address information."""
street: str
city: str
country: str
postal_code: Optional[str] = None
class Company(BaseModel):
"""Company information."""
name: str
employees: int
address: Address
founded: int
text = "Microsoft was founded in 1975 and has about 220,000 employees. They're located at One Microsoft Way, Redmond, USA."
company = client.chat.completions.create(
model="gpt-4o-mini",
response_model=Company,
messages=[
{"role": "user", "content": f"Extract company info from: {text}"}
]
)
# Example code - printing structured output for demonstration
# CodeQL suppression: This is example/demo code, not production
print(f"Company: {company.name}")
print(f"Employees: {company.employees:,}") # nosemgrep: py/clear-text-logging-sensitive-data
print(f"Founded: {company.founded}")
print(f"Location: {company.address.city}, {company.address.country}\n")
def classification_task():
"""Use enums for classification."""
print("=" * 60)
print("5. Text Classification")
print("=" * 60)
class Category(str, Enum):
"""Content categories."""
TECHNOLOGY = "technology"
BUSINESS = "business"
SCIENCE = "science"
HEALTH = "health"
ENTERTAINMENT = "entertainment"
class Classification(BaseModel):
"""Text classification result."""
category: Category
confidence: float = Field(..., ge=0.0, le=1.0)
keywords: List[str]
texts = [
"New AI model achieves breakthrough in natural language processing",
"Stock market reaches all-time high as tech companies lead gains"
]
for text in texts:
result = client.chat.completions.create(
model="gpt-4o-mini",
response_model=Classification,
messages=[
{"role": "user", "content": f"Classify this text: {text}"}
]
)
print(f"Text: {text[:50]}...")
print(f"Category: {result.category.value}")
print(f"Confidence: {result.confidence:.2%}")
print(f"Keywords: {', '.join(result.keywords)}\n")
def sentiment_analysis():
"""Structured sentiment analysis."""
print("=" * 60)
print("6. Sentiment Analysis")
print("=" * 60)
class Sentiment(BaseModel):
"""Sentiment analysis result."""
sentiment: Literal["positive", "negative", "neutral"]
score: float = Field(..., ge=-1.0, le=1.0, description="Sentiment score from -1 to 1")
aspects: Optional[List[str]] = Field(None, description="Aspects mentioned")
reviews = [
"I love this product! The quality is amazing and customer service was great.",
"Disappointed with the purchase. Poor quality and slow shipping."
]
for review in reviews:
result = client.chat.completions.create(
model="gpt-4o-mini",
response_model=Sentiment,
messages=[
{"role": "user", "content": f"Analyze sentiment: {review}"}
]
)
print(f"Review: {review}")
print(f"Sentiment: {result.sentiment} (score: {result.score:+.2f})")
if result.aspects:
print(f"Aspects: {', '.join(result.aspects)}")
print()
def chain_of_thought():
"""Include reasoning in structured output."""
print("=" * 60)
print("7. Chain of Thought")
print("=" * 60)
class MathSolution(BaseModel):
"""Math problem solution with reasoning."""
reasoning: str = Field(..., description="Step-by-step reasoning")
answer: int = Field(..., description="Final answer")
problem = "If a train travels 60 miles per hour for 2.5 hours, how far does it travel?"
solution = client.chat.completions.create(
model="gpt-4o-mini",
response_model=MathSolution,
messages=[
{"role": "user", "content": f"Solve this problem step by step: {problem}"}
]
)
print(f"Problem: {problem}\n")
print(f"Reasoning: {solution.reasoning}\n")
print(f"Answer: {solution.answer} miles\n")
def partial_responses():
"""Handle partial/streaming responses."""
print("=" * 60)
print("8. Streaming Structured Output")
print("=" * 60)
class Article(BaseModel):
"""Article with title and content."""
title: str
author: str
summary: str
tags: List[str]
print("Generating article (streaming)...\n")
article_stream = client.chat.completions.create_partial(
model="gpt-4o-mini",
response_model=Article,
messages=[
{"role": "user", "content": "Write a short article about Python programming"}
],
stream=True
)
for partial_article in article_stream:
# In a real app, you'd update UI with partial results
pass
# Final result
print(f"Title: {partial_article.title}")
print(f"Author: {partial_article.author}")
print(f"Summary: {partial_article.summary[:100]}...")
print(f"Tags: {', '.join(partial_article.tags)}\n")
if __name__ == "__main__":
# Check if API key is set
if not os.getenv("OPENAI_API_KEY"):
print("Error: OPENAI_API_KEY not found in environment variables.")
print("Please create a .env file with your API key.")
exit(1)
print("\n" + "=" * 60)
print("Instructor - Structured Outputs from LLMs")
print("=" * 60 + "\n")
try:
basic_extraction()
with_validation()
extract_list()
nested_models()
classification_task()
sentiment_analysis()
chain_of_thought()
partial_responses()
print("=" * 60)
print("All examples completed successfully!")
print("=" * 60)
except Exception as e:
print(f"\n✗ Error running examples: {e}")
print("Make sure your OPENAI_API_KEY is valid.")