-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathexamples.py
More file actions
163 lines (121 loc) · 4.41 KB
/
examples.py
File metadata and controls
163 lines (121 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# install pandas and NumPy to run the last two examples
# 1. Measuring Before Optimizing
import time
def load_records():
# Simulate loading 100,000 records
return list(range(100_000))
def filter_records(records):
return [r for r in records if r % 2 == 0]
def generate_report(records):
return sum(records)
# Time each step
start = time.perf_counter()
records = load_records()
print(f"Load : {time.perf_counter() - start:.4f}s")
start = time.perf_counter()
filtered = filter_records(records)
print(f"Filter : {time.perf_counter() - start:.4f}s")
start = time.perf_counter()
report = generate_report(filtered)
print(f"Report : {time.perf_counter() - start:.4f}s")
# 2. Using Built-in Functions and Standard Library Tools
import time
numbers = list(range(1_000_000))
# Manual loop
start = time.perf_counter()
total = 0
for n in numbers:
total += n
print(f"Manual loop : {time.perf_counter() - start:.4f}s → {total}")
# Built-in sum()
start = time.perf_counter()
total = sum(numbers)
print(f"Built-in : {time.perf_counter() - start:.4f}s → {total}")
orders = [
{"id": "ORD-003", "amount": 250.0},
{"id": "ORD-001", "amount": 89.99},
{"id": "ORD-002", "amount": 430.0},
]
# Slow: manual comparison logic
def manual_sort(orders):
for i in range(len(orders)):
for j in range(i + 1, len(orders)):
if orders[i]["amount"] > orders[j]["amount"]:
orders[i], orders[j] = orders[j], orders[i]
return orders
# Fast: built-in sorted()
sorted_orders = sorted(orders, key=lambda o: o["amount"])
print(sorted_orders)
# 3. Avoiding Repeated Work Inside Loops
import time
approved = ["SKU-001", "SKU-002", "SKU-003", "SKU-004", "SKU-005"] * 1000
incoming = [f"SKU-{str(i).zfill(3)}" for i in range(5000)]
# Slow: len() and list membership check on every iteration
start = time.perf_counter()
valid = []
for code in incoming:
if code in approved: # list search is O(n) — slow
valid.append(code)
print(f"List check : {time.perf_counter() - start:.4f}s → {len(valid)} valid")
# Fast: convert approved to a set once, before the loop
start = time.perf_counter()
approved_set = set(approved) # set lookup is O(1) — fast
valid = []
for code in incoming:
if code in approved_set:
valid.append(code)
print(f"Set check : {time.perf_counter() - start:.4f}s → {len(valid)} valid")
import re
# Slow: recompiles the pattern on every call
def extract_slow(text):
return re.findall(r'\d+', text)
# Fast: compile once, reuse
DIGIT_PATTERN = re.compile(r'\d+')
def extract_fast(text):
return DIGIT_PATTERN.findall(text)
# 4. Choosing the Right Data Structure
import time
import random
all_customers = [f"CUST-{i}" for i in range(100_000)]
ordered = [f"CUST-{i}" for i in random.sample(range(100_000), 10_000)]
# Slow: ordered is a list
start = time.perf_counter()
repeat_customers = [c for c in all_customers if c in ordered]
print(f"List : {time.perf_counter() - start:.4f}s → {len(repeat_customers)} found")
# Fast: ordered is a set
ordered_set = set(ordered)
start = time.perf_counter()
repeat_customers = [c for c in all_customers if c in ordered_set]
print(f"Set : {time.perf_counter() - start:.4f}s → {len(repeat_customers)} found")
# 5. Vectorizing Operations on Numeric Data
import time
import numpy as np
import pandas as pd
prices = [round(10 + i * 0.05, 2) for i in range(500_000)]
discount_rate = 0.15
# Slow: Python loop
start = time.perf_counter()
discounted = []
for price in prices:
discounted.append(round(price * (1 - discount_rate), 2))
print(f"Python loop : {time.perf_counter() - start:.4f}s")
# Fast: NumPy vectorisation
prices_array = np.array(prices)
start = time.perf_counter()
discounted = np.round(prices_array * (1 - discount_rate), 2)
print(f"NumPy : {time.perf_counter() - start:.4f}s")
# Fast: pandas vectorisation
prices_series = pd.Series(prices)
start = time.perf_counter()
discounted = (prices_series * (1 - discount_rate)).round(2)
print(f"Pandas : {time.perf_counter() - start:.4f}s")
df = pd.DataFrame({"price": prices})
# Slow: row-by-row with iterrows
start = time.perf_counter()
for idx, row in df.iterrows():
df.at[idx, "discounted"] = round(row["price"] * 0.85, 2)
print(f"iterrows : {time.perf_counter() - start:.4f}s")
# Fast: vectorised column operation
start = time.perf_counter()
df["discounted"] = (df["price"] * 0.85).round(2)
print(f"Vectorised : {time.perf_counter() - start:.4f}s")