forked from anthonydb/practical-sql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathChapter_05.sql
More file actions
207 lines (164 loc) · 5.55 KB
/
Chapter_05.sql
File metadata and controls
207 lines (164 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 5 Code Examples
--------------------------------------------------------------
-- Listing 5-1: Basic addition, subtraction and multiplication with SQL
SELECT 2 + 2; -- addition
SELECT 9 - 1; -- subtraction
SELECT 3 * 4; -- multiplication
-- Listing 5-2: Integer and decimal division with SQL
SELECT 11 / 6; -- integer division
SELECT 11 % 6; -- modulo division
SELECT 11.0 / 6; -- decimal division
SELECT CAST(11 AS numeric(3,1)) / 6;
-- Listing 5-3: Exponents, roots and factorials with SQL
SELECT 3 ^ 4; -- exponentiation
SELECT |/ 10; -- square root (operator)
SELECT sqrt(10); -- square root (function)
SELECT ||/ 10; -- cube root
SELECT 4 !; -- factorial
-- Order of operations
SELECT 7 + 8 * 9; -- answer: 79
SELECT (7 + 8) * 9; -- answer: 135
SELECT 3 ^ 3 - 1; -- answer: 26
SELECT 3 ^ (3 - 1); -- answer: 9
-- Listing 5-4: Selecting Census population columns by race with aliases
SELECT geo_name,
state_us_abbreviation AS "st",
p0010001 AS "Total Population",
p0010003 AS "White Alone",
p0010004 AS "Black or African American Alone",
p0010005 AS "Am Indian/Alaska Native Alone",
p0010006 AS "Asian Alone",
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
p0010008 AS "Some Other Race Alone",
p0010009 AS "Two or More Races"
FROM us_counties_2010;
-- Listing 5-5: Adding two columns in us_counties_2010
SELECT geo_name,
state_us_abbreviation AS "st",
p0010003 AS "White Alone",
p0010004 AS "Black Alone",
p0010003 + p0010004 AS "Total White and Black"
FROM us_counties_2010;
-- Listing 5-6: Checking Census data totals
SELECT geo_name,
state_us_abbreviation AS "st",
p0010001 AS "Total",
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
+ p0010008 + p0010009 AS "All Races",
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
+ p0010008 + p0010009) - p0010001 AS "Difference"
FROM us_counties_2010
ORDER BY "Difference" DESC;
-- Listing 5-7: Calculating the percent of the population that is
-- Asian by county (percent of the whole)
SELECT geo_name,
state_us_abbreviation AS "st",
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
FROM us_counties_2010
ORDER BY "pct_asian" DESC;
-- Listing 5-8: Calculating percent change
CREATE TABLE percent_change (
department varchar(20),
spend_2014 numeric(10,2),
spend_2017 numeric(10,2)
);
INSERT INTO percent_change
VALUES
('Building', 250000, 289000),
('Assessor', 178556, 179500),
('Library', 87777, 90001),
('Clerk', 451980, 650000),
('Police', 250000, 223000),
('Recreation', 199000, 195000);
SELECT department,
spend_2014,
spend_2017,
round( (spend_2017 - spend_2014) /
spend_2014 * 100, 1 ) AS "pct_change"
FROM percent_change;
-- Listing 5-9: Using sum() and avg() aggregate functions
SELECT sum(p0010001) AS "County Sum",
round(avg(p0010001), 0) AS "County Average"
FROM us_counties_2010;
-- Listing 5-10: Testing SQL percentile functions
CREATE TABLE percentile_test (
numbers integer
);
INSERT INTO percentile_test (numbers) VALUES
(1), (2), (3), (4), (5), (6);
SELECT
percentile_cont(.5)
WITHIN GROUP (ORDER BY numbers),
percentile_disc(.5)
WITHIN GROUP (ORDER BY numbers)
FROM percentile_test;
-- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions
SELECT sum(p0010001) AS "County Sum",
round(avg(p0010001), 0) AS "County Average",
percentile_cont(.5)
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
FROM us_counties_2010;
-- Listing 5-12: Passing an array of values to percentile_cont()
-- quartiles
SELECT percentile_cont(array[.25,.5,.75])
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
FROM us_counties_2010;
-- Extra:
-- quintiles
SELECT percentile_cont(array[.2,.4,.6,.8])
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
FROM us_counties_2010;
-- deciles
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
FROM us_counties_2010;
-- Listing 5-13: Using unnest() to turn an array into rows
SELECT unnest(
percentile_cont(array[.25,.5,.75])
WITHIN GROUP (ORDER BY p0010001)
) AS "quartiles"
FROM us_counties_2010;
-- Listing 5-14: Creating a median() aggregate function in PostgreSQL
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
CREATE OR REPLACE FUNCTION _final_median(anyarray)
RETURNS float8 AS
$$
WITH q AS
(
SELECT val
FROM unnest($1) val
WHERE VAL IS NOT NULL
ORDER BY 1
),
cnt AS
(
SELECT COUNT(*) AS c FROM q
)
SELECT AVG(val)::float8
FROM
(
SELECT val FROM q
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
) q2;
$$
LANGUAGE sql IMMUTABLE;
CREATE AGGREGATE median(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_final_median,
INITCOND='{}'
);
-- Listing 5-15: Using a median() aggregate function
SELECT sum(p0010001) AS "County Sum",
round(avg(p0010001), 0) AS "County Average",
median(p0010001) AS "County Median",
percentile_cont(.5)
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
FROM us_counties_2010;
-- Listing 5-16: Finding the most-frequent value with mode()
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010;