Skip to content

Commit e09a7bb

Browse files
authored
Add files via upload
1 parent 402f1ef commit e09a7bb

File tree

4 files changed

+114
-0
lines changed

4 files changed

+114
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
library(arrow)
2+
library(purrr)
3+
4+
# List of all parquet files
5+
files_to_check <- list.files(path = getwd(), pattern = ".parquet$", full.names = TRUE)
6+
7+
# Function to get column names from a parquet file
8+
get_column_names <- function(file) {
9+
# Read the schema (column names) of the parquet file
10+
schema <- read_parquet(file, as_data_frame = FALSE)$schema
11+
schema$names
12+
}
13+
14+
# Get column names for each file
15+
column_names_list <- map(files_to_check, get_column_names)
16+
17+
# Find the intersection of column names across all files
18+
same_columns <- reduce(column_names_list, intersect)
19+
20+
# Print common columns
21+
cat("Common columns across all parquet files:\n")
22+
print(same_columns)
23+
24+
# View columns in each file for comparison
25+
names(column_names_list) <- basename(files_to_check)
26+
column_names_list

december_demo/MissingColumn.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
library(arrow)
2+
3+
# List all files
4+
files <- list.files(path = getwd(), recursive = TRUE, full.names = TRUE, pattern = ".parquet")
5+
6+
# Function for missing column
7+
missing_column <- lapply(files, function(file) {
8+
data <- read_parquet(file)
9+
10+
# Check if 'town' is missing
11+
if (!"town" %in% colnames(data)) {
12+
return(basename(file)) # Return file name if "town" is missing
13+
} else {
14+
return(NULL) # Return NULL if 'town' is present
15+
}
16+
})
17+
18+
# Filter out NULL values to get the list of files without 'town'
19+
missing_files <- Filter(Negate(is.null), missing_column)
20+
21+
# Print the result
22+
if (length(missing_files) > 0) {
23+
cat("Files missing 'town' column:\n")
24+
print(missing_files)
25+
} else {
26+
cat("All files contain the expected column.\n")
27+
}

december_demo/Parquetize.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
library(parquetize)
2+
library(arrow)
3+
4+
5+
filesloc<-getwd()
6+
7+
files<-list.files(filesloc,
8+
full.names = TRUE,
9+
pattern=".csv",
10+
recursive=TRUE)
11+
12+
for (i in seq_along(files)){
13+
csv<- files[i]
14+
parquet<- gsub(".csv",".parquet",csv)
15+
csv_to_parquet(csv,path_to_parquet = parquet)
16+
}
17+
18+

december_demo/Simple_Map.R

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# #Easy map
2+
library(leaflet)
3+
4+
# Create a simple map using the quakes dataset
5+
leaflet(data = quakes) %>%
6+
addProviderTiles(providers$Stadia.AlidadeSmooth) %>%
7+
addCircleMarkers(
8+
radius = 1,
9+
color = "red",
10+
fillOpacity = 1,
11+
clusterOptions = markerClusterOptions() # Cluster for performance
12+
)
13+
14+
15+
library(leaflet)
16+
library(maps)
17+
library(sf)
18+
19+
#US states dataset
20+
#Convert to sf for polygons
21+
states <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE))
22+
23+
# Sample points dataset
24+
points_data <- data.frame(
25+
longitude = c(-72.673370, -73.759262, -75.165222),
26+
latitude = c(41.765804, 42.652580, 39.952583),
27+
label = c("Hartford", "Albany", "Philadelphia")
28+
)
29+
30+
# Create a map with polygons and points
31+
leaflet() %>%
32+
addProviderTiles(providers$Esri.WorldImagery) %>%
33+
addPolygons(data = states, color = "blue", weight = 1, fillOpacity = 0.3) %>%
34+
addCircleMarkers(
35+
data = points_data,
36+
lng = ~longitude,
37+
lat = ~latitude,
38+
radius = 5,
39+
color = "orange",
40+
fillOpacity = 1,
41+
label = ~label
42+
)
43+

0 commit comments

Comments
 (0)