Introduction to Databases and SQLite
### Set-Up ###
```{r}
# install.packages("RSQLite")
library(RSQLite)
```
SQL == Structured Query Language
SQL is the most used language worldwide for relational databases
data tables we are using are: mtcars; iris
```{r}
mtcars$car_names <- rownames(mtcars) # string operator to adds a new column
iris
```
# Create a database
```{r}
db <- dbConnect(RSQLite::SQLite(), "")
summary(db) # base R - what is "db"?
# add data tables in
dbWriteTable(db, "mtcars", mtcars) # database connection, name of the table
# and the actual dataframe mtcars from R
dbWriteTable(db, "iris", iris)
dbListTables(db) # two tables in my database
```
# Write SQL Queries
```{r}
# star * wildcard == select all
dbGetQuery(db, "SELECT * FROM mtcars")
# select certain columns
dbGetQuery(db, "SELECT car_names, mpg, qsec FROM mtcars")
# filter certain rows
# mpg is between 10-15
dbGetQuery(db, "SELECT car_names, mpg, qsec FROM mtcars
WHERE mpg BETWEEN 10 AND 15")
dbGetQuery(db, "SELECT * from mtcars WHERE qsec > 15")
```
# Create a new data table
```{r}
# make a new table in R (not SQL sorry)
car <- c("Camaro", "Mustang", "Explorer", "California") # column 1
make <- c("Chevrolet", "Ferrari", "Ford", "Dodges") # column 2
df1 <- tibble::tibble(car, make)
# write a table by appending the data tables inside our db
dbWriteTable(db, "car_makes", df1)
dbListTables(db)
```
# Make more queries!
```{r}
dbGetQuery(db, "SELECT * FROM car_makes")
dbGetQuery(db, "SELECT * FROM mtcars LIMIT 5")
# this gives top rows to a limit
# there is no way to get bottom
# but we can use GROUP BY and ORDER BY
# Now, let's get into some logic
# mtcars: get car names and horsepower
# but car names start with M and 6 or 8 cylinders
dbGetQuery(db, "SELECT car_names, hp, cyl FROM mtcars
WHERE car_names LIKE 'M%'
AND cyl IN (6, 8)")
# mtcars: select all the columns but not MPG between 10 and 15
dbGetQuery(db, "SELECT * FROM mtcars WHERE mpg NOT BETWEEN 10 AND 15")
# what if you want car names starting with M or disp bigger than 170?
# less strict: car names that don't start with M as long as disp is
# still bigger than 170, or vice versa
dbGetQuery(db, "SELECT car_names, disp FROM mtcars WHERE
disp > 170
OR car_names LIKE 'M%' ")
# make sure to group correctly! use brackets to group conditions
# select all the columns and where mgp is not between 10 and 15
# and (car name start with M or disp is bigger than 170)
dbGetQuery(db, "SELECT * FROM mtcars
WHERE mpg NOT BETWEEN 10 AND 15
AND (car_names LIKE 'M%' OR disp > 170)
")
# select car_names, hp, cyl where car names do not start with T
# and mpg is not between 11 and 16
dbGetQuery(db, "SELECT car_names, hp, cyl FROM mtcars WHERE
car_names NOT LIKE 'T%' AND
mpg NOT BETWEEN 11 AND 16")
```
# Appending our data table
```{r}
car <- c("Corolla", "Lancer", "Sportage", "XE")
make <- c("Toyota Corolla", "Mitsubishi", "Kia", "Jaguar")
df2 <- tibble::tibble(car, make)
# add
dbWriteTable(db, "car_makes", df2, append = TRUE)
dbGetQuery(db, "SELECT * FROM car_makes")
# or overwrite
dbWriteTable(db, "car_makes", df2, overwrite = TRUE)
dbGetQuery(db, "SELECT * FROM car_makes")
```
# Calculations
```{r}
# sort in asc or desc order
dbGetQuery(db, "SELECT cyl, hp FROM mtcars
ORDER BY hp")
dbGetQuery(db, "SELECT cyl, hp FROM mtcars
ORDER BY cyl DESC")
dbGetQuery(db, "SELECT cyl, hp, mpg FROM mtcars
ORDER BY mpg, hp")
# get the average
dbGetQuery(db, "SELECT cyl, AVG(hp) AS 'mean_horse' FROM mtcars
GROUP BY cyl
ORDER BY mean_horse")
# select cyl, hp, mpg
# average hp and average mpg
# group by cylinder
# order descending hp
avg_hpcyl <- dbGetQuery(db, "SELECT cyl, AVG(hp) AS 'mean_horse', AVG(mpg)
AS 'mean_gas'
FROM mtcars
GROUP BY cyl
ORDER BY mean_horse DESC
")
# saved as a dataframe now
avg_hpcyl
class(avg_hpcyl)
```
# Joining data tables
```{r}
dbGetQuery(db, "SELECT * FROM mtcars WHERE car_names = 'Toyota Corolla' ")
dbGetQuery(db, "SELECT * FROM car_makes")
dbGetQuery(db, "
SELECT * FROM mtcars AS t1
INNER JOIN car_makes AS t2
ON car_names = make
")
### select certain columns in join
dbGetQuery(db, "
SELECT t1.hp, t1.cyl, t1.car_names, t2.car
FROM mtcars t1
JOIN car_makes t2
ON t1.car_names = make")
```
# Close connections
```{r}
dbDisconnect(db)
```