Skip to content

Commit 2cca79f

Browse files
committed
pandas ex 1–4
1 parent 6d4344d commit 2cca79f

File tree

4 files changed

+62
-0
lines changed

4 files changed

+62
-0
lines changed

08-pandas/pd1.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
# Create a Pandas DataFrame from CSV data
3+
# https://pandas.pydata.org/docs/getting_started/intro_tutorials/02_read_write.html
4+
5+
import pandas as pd
6+
from pathlib import Path
7+
8+
infile = Path('./data/publications.csv')
9+
pubs = pd.read_csv(infile)
10+
11+
print(pubs.info())
12+
# print(pubs)
13+
# print(pubs.dtypes)

08-pandas/pd2.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
# Remove leading/trailing and double spaces from CatCallNumber
3+
# https://pandas.pydata.org/docs/getting_started/intro_tutorials/10_text_data.html
4+
5+
import pandas as pd
6+
from pathlib import Path
7+
8+
infile = Path('./data/publications.csv')
9+
pubs = pd.read_csv(infile)
10+
11+
pubs["CatCallNumber"] = pubs["CatCallNumber"].str.strip()
12+
13+
print(pubs["CatCallNumber"])

08-pandas/pd3.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
# Count concatenated CallNumbers. (Find and Rplace, or COUNTIF)
3+
# https://pandas.pydata.org/docs/reference/api/pandas.Series.str.contains.html
4+
5+
import pandas as pd
6+
from pathlib import Path
7+
8+
infile = Path('./data/publications.csv')
9+
pubs = pd.read_csv(infile)
10+
11+
pubs["CatCallNumber"] = pubs["CatCallNumber"].str.strip()
12+
13+
search_char = "|"
14+
count_concat = pubs["CatCallNumber"].str.contains(search_char, regex=False)
15+
16+
print(
17+
f"The character '{search_char}' appears in {count_concat.sum()} rows in the CatCallNumber column.")

08-pandas/pd4.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
# Split concatenated CallNumbers on "|" delimiter
3+
# https://pandas.pydata.org/docs/reference/api/pandas.Series.str.split.html
4+
5+
import pandas as pd
6+
from pathlib import Path
7+
8+
infile = Path('./data/publications.csv')
9+
10+
pubs = pd.read_csv(infile)
11+
12+
pubs["CatCallNumber"] = pubs["CatCallNumber"].str.strip()
13+
14+
delim = "|"
15+
headers = "CatCallNumber"
16+
new_df = pubs["CatCallNumber"].str.split(
17+
pat=delim, regex=False, expand=True)
18+
19+
print(pubs)

0 commit comments

Comments
 (0)