-
Notifications
You must be signed in to change notification settings - Fork 57
Expand file tree
/
Copy pathpull_states.py
More file actions
106 lines (92 loc) · 2.73 KB
/
Copy pathpull_states.py
File metadata and controls
106 lines (92 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
from typing import Dict, List
import wikipedia as wiki
import pandas as pd
EXPORT_PATH = "./data"
EXPORT_FILENAME = os.path.join(
EXPORT_PATH,
"city_wikipedia_summaries.csv",
)
CITIES = [
"New York, New York",
"Los Angeles, California",
"Chicago, Illinois",
"Houston, Texas",
"Phoenix, Arizona",
"Philadelphia, Pennsylvania",
"San Antonio, Texas",
"San Diego, California",
"Dallas, Texas",
"San Jose, California",
"Austin, Texas",
"Jacksonville, Florida",
"Fort Worth, Texas",
"Columbus, Ohio",
"Charlotte, North Carolina",
"San Francisco, California",
"Indianapolis, Indiana",
"Seattle, Washington",
"Denver, Colorado",
"Washington, D.C.",
"Boston, Massachusetts",
"El Paso, Texas",
"Nashville, Tennessee",
"Detroit, Michigan",
"Oklahoma City, Oklahoma",
"Portland, Oregon",
"Las Vegas, Nevada",
"Memphis, Tennessee",
"Louisville, Kentucky",
"Baltimore, Maryland",
"Milwaukee, Wisconsin",
"Albuquerque, New Mexico",
"Tucson, Arizona",
"Fresno, California",
"Mesa, Arizona",
"Sacramento, California",
"Atlanta, Georgia",
"Kansas City, Missouri",
"Colorado Springs, Colorado",
"Miami, Florida",
"Raleigh, North Carolina",
"Omaha, Nebraska",
"Long Beach, California",
"Virginia Beach, Virginia",
"Oakland, California",
"Minneapolis, Minnesota",
"Tulsa, Oklahoma",
"Arlington, Texas",
"Tampa, Florida",
"New Orleans, Louisiana",
]
def create_folder_if_not_exists(folder_path: str) -> None:
"""Create a folder if it does not exist.
Args:
folder_path (str): The path to the folder to create.
"""
if not os.path.exists(folder_path):
os.makedirs(folder_path)
print(f"Folder created: {folder_path}")
def get_wikipedia_summary(cities: List[str]) -> Dict[str, str]:
city_summaries = {}
for city in cities:
try:
city_summaries[city] = wiki.summary(city)
except:
print(f"error retrieving {city}")
return city_summaries
def write_data(output_dict: Dict[str, str]) -> None:
df = pd.DataFrame([output_dict]).T.reset_index()
df.columns = ["State", "Wiki Summary"]
create_folder_if_not_exists(EXPORT_PATH)
df.to_csv(EXPORT_FILENAME, index=False)
def pull_state_data() -> None:
if EXPORT_FILENAME not in os.listdir(EXPORT_PATH):
print("data not found pullling wikipedia state summaries...")
city_summary_output = get_wikipedia_summary(CITIES)
write_data(city_summary_output)
print(f"...data exported to {EXPORT_FILENAME}")
else:
print("data already present...skipping download")
if __name__ == "__main__":
pull_state_data()