forked from anthonydb/practical-sql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathChapter_12.sql
More file actions
310 lines (254 loc) · 8.46 KB
/
Chapter_12.sql
File metadata and controls
310 lines (254 loc) · 8.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 12 Code Examples
--------------------------------------------------------------
-- Listing 12-1: Using a subquery in a WHERE clause
SELECT geo_name,
state_us_abbreviation,
p0010001
FROM us_counties_2010
WHERE p0010001 >= (
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010
)
ORDER BY p0010001 DESC;
-- Listing 12-2: Using a subquery in a WHERE clause for DELETE
CREATE TABLE us_counties_2010_top10 AS
SELECT * FROM us_counties_2010;
DELETE FROM us_counties_2010_top10
WHERE p0010001 < (
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010_top10
);
SELECT count(*) FROM us_counties_2010_top10;
-- Listing 12-3: Subquery as a derived table in a FROM clause
SELECT round(calcs.average, 0) as average,
calcs.median,
round(calcs.average - calcs.median, 0) AS median_average_diff
FROM (
SELECT avg(p0010001) AS average,
percentile_cont(.5)
WITHIN GROUP (ORDER BY p0010001)::numeric(10,1) AS median
FROM us_counties_2010
)
AS calcs;
-- Listing 12-4: Joining two derived tables
SELECT census.state_us_abbreviation AS st,
census.st_population,
plants.plant_count,
round((plants.plant_count/census.st_population::numeric(10,1)) * 1000000, 1)
AS plants_per_million
FROM
(
SELECT st,
count(*) AS plant_count
FROM meat_poultry_egg_inspect
GROUP BY st
)
AS plants
JOIN
(
SELECT state_us_abbreviation,
sum(p0010001) AS st_population
FROM us_counties_2010
GROUP BY state_us_abbreviation
)
AS census
ON plants.st = census.state_us_abbreviation
ORDER BY plants_per_million DESC;
-- Listing 12-5: Adding a subquery to a column list
SELECT geo_name,
state_us_abbreviation AS st,
p0010001 AS total_pop,
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010) AS us_median
FROM us_counties_2010;
-- Listing 12-6: Using a subquery expression in a calculation
SELECT geo_name,
state_us_abbreviation AS st,
p0010001 AS total_pop,
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010) AS us_median,
p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010) AS diff_from_median
FROM us_counties_2010
WHERE (p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010))
BETWEEN -1000 AND 1000;
-- BONUS: Subquery expressions
-- If you'd like to try the IN, EXISTS, and NOT EXISTS expressions on pages 199-200,
-- here's the code to create a retirees table. The queries below are similar
-- to the hypothetical examples on pages 199 and 200. You will need the
-- employees table you created in Chapter 6.
-- Create table and insert data
CREATE TABLE retirees (
id int,
first_name varchar(50),
last_name varchar(50)
);
INSERT INTO retirees
VALUES (2, 'Lee', 'Smith'),
(4, 'Janet', 'King');
-- Generating values for the IN operator
SELECT first_name, last_name
FROM employees
WHERE emp_id IN (
SELECT id
FROM retirees);
-- Checking whether values exist (returns all rows from employees
-- if the expression evaluates as true)
SELECT first_name, last_name
FROM employees
WHERE EXISTS (
SELECT id
FROM retirees);
-- Using a correlated subquery to find matching values from employees
-- in retirees.
SELECT first_name, last_name
FROM employees
WHERE EXISTS (
SELECT id
FROM retirees
WHERE id = employees.emp_id);
-- Listing 12-7: Using a simple CTE to find large counties
WITH
large_counties (geo_name, st, p0010001)
AS
(
SELECT geo_name, state_us_abbreviation, p0010001
FROM us_counties_2010
WHERE p0010001 >= 100000
)
SELECT st, count(*)
FROM large_counties
GROUP BY st
ORDER BY count(*) DESC;
-- Bonus: You can also write this query as:
SELECT state_us_abbreviation, count(*)
FROM us_counties_2010
WHERE p0010001 >= 100000
GROUP BY state_us_abbreviation
ORDER BY count(*) DESC;
-- Listing 12-8: Using CTEs in a table join
WITH
counties (st, population) AS
(SELECT state_us_abbreviation, sum(population_count_100_percent)
FROM us_counties_2010
GROUP BY state_us_abbreviation),
plants (st, plants) AS
(SELECT st, count(*) AS plants
FROM meat_poultry_egg_inspect
GROUP BY st)
SELECT counties.st,
population,
plants,
round((plants/population::numeric(10,1))*1000000, 1) AS per_million
FROM counties JOIN plants
ON counties.st = plants.st
ORDER BY per_million DESC;
-- Listing 12-9: Using CTEs to minimize redundant code
WITH us_median AS
(SELECT percentile_cont(.5)
WITHIN GROUP (ORDER BY p0010001) AS us_median_pop
FROM us_counties_2010)
SELECT geo_name,
state_us_abbreviation AS st,
p0010001 AS total_pop,
us_median_pop,
p0010001 - us_median_pop AS diff_from_median
FROM us_counties_2010 CROSS JOIN us_median
WHERE (p0010001 - us_median_pop)
BETWEEN -1000 AND 1000;
-- Cross tabulations
-- Install the crosstab() function via the tablefunc module
CREATE EXTENSION tablefunc;
-- Listing 12-10: Creating and filling the ice_cream_survey table
CREATE TABLE ice_cream_survey (
response_id integer PRIMARY KEY,
office varchar(20),
flavor varchar(20)
);
COPY ice_cream_survey
FROM 'C:\YourDirectory\ice_cream_survey.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 12-11: Generating the ice cream survey crosstab
SELECT *
FROM crosstab('SELECT office,
flavor,
count(*)
FROM ice_cream_survey
GROUP BY office, flavor
ORDER BY office',
'SELECT flavor
FROM ice_cream_survey
GROUP BY flavor
ORDER BY flavor')
AS (office varchar(20),
chocolate bigint,
strawberry bigint,
vanilla bigint);
-- Listing 12-12: Creating and filling a temperature_readings table
CREATE TABLE temperature_readings (
reading_id bigserial PRIMARY KEY,
station_name varchar(50),
observation_date date,
max_temp integer,
min_temp integer
);
COPY temperature_readings
(station_name, observation_date, max_temp, min_temp)
FROM 'C:\YourDirectory\temperature_readings.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 12-13: Generating the temperature readings crosstab
SELECT *
FROM crosstab('SELECT
station_name,
date_part(''month'', observation_date),
percentile_cont(.5)
WITHIN GROUP (ORDER BY max_temp)
FROM temperature_readings
GROUP BY station_name,
date_part(''month'', observation_date)
ORDER BY station_name',
'SELECT month
FROM generate_series(1,12) month')
AS (station varchar(50),
jan numeric(3,0),
feb numeric(3,0),
mar numeric(3,0),
apr numeric(3,0),
may numeric(3,0),
jun numeric(3,0),
jul numeric(3,0),
aug numeric(3,0),
sep numeric(3,0),
oct numeric(3,0),
nov numeric(3,0),
dec numeric(3,0)
);
-- Listing 12-14: Re-classifying temperature data with CASE
SELECT max_temp,
CASE WHEN max_temp >= 90 THEN 'Hot'
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
ELSE 'Inhumane'
END AS temperature_group
FROM temperature_readings;
-- Listing 12-15: Using CASE in a Common Table Expression
WITH temps_collapsed (station_name, max_temperature_group) AS
(SELECT station_name,
CASE WHEN max_temp >= 90 THEN 'Hot'
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
ELSE 'Inhumane'
END
FROM temperature_readings)
SELECT station_name, max_temperature_group, count(*)
FROM temps_collapsed
GROUP BY station_name, max_temperature_group
ORDER BY station_name, count(*) DESC;