pandas ex 1–4

hughlilly · hughlilly · commit 2cca79f738a3 · 2022-02-18T08:56:48.000+13:00
diff --git a/08-pandas/pd1.py b/08-pandas/pd1.py
@@ -0,0 +1,13 @@
+
+# Create a Pandas DataFrame from CSV data
+# https://pandas.pydata.org/docs/getting_started/intro_tutorials/02_read_write.html
+
+import pandas as pd
+from pathlib import Path
+
+infile = Path('./data/publications.csv')
+pubs = pd.read_csv(infile)
+
+print(pubs.info())
+# print(pubs)
+# print(pubs.dtypes)
diff --git a/08-pandas/pd2.py b/08-pandas/pd2.py
@@ -0,0 +1,13 @@
+
+# Remove leading/trailing and double spaces from CatCallNumber
+# https://pandas.pydata.org/docs/getting_started/intro_tutorials/10_text_data.html
+
+import pandas as pd
+from pathlib import Path
+
+infile = Path('./data/publications.csv')
+pubs = pd.read_csv(infile)
+
+pubs["CatCallNumber"] = pubs["CatCallNumber"].str.strip()
+
+print(pubs["CatCallNumber"])
diff --git a/08-pandas/pd3.py b/08-pandas/pd3.py
@@ -0,0 +1,17 @@
+
+# Count concatenated CallNumbers. (Find and Rplace, or COUNTIF)
+# https://pandas.pydata.org/docs/reference/api/pandas.Series.str.contains.html
+
+import pandas as pd
+from pathlib import Path
+
+infile = Path('./data/publications.csv')
+pubs = pd.read_csv(infile)
+
+pubs["CatCallNumber"] = pubs["CatCallNumber"].str.strip()
+
+search_char = "|"
+count_concat = pubs["CatCallNumber"].str.contains(search_char, regex=False)
+
+print(
+    f"The character '{search_char}' appears in {count_concat.sum()} rows in the CatCallNumber column.")
diff --git a/08-pandas/pd4.py b/08-pandas/pd4.py
@@ -0,0 +1,19 @@
+
+# Split concatenated CallNumbers on "|" delimiter
+# https://pandas.pydata.org/docs/reference/api/pandas.Series.str.split.html
+
+import pandas as pd
+from pathlib import Path
+
+infile = Path('./data/publications.csv')
+
+pubs = pd.read_csv(infile)
+
+pubs["CatCallNumber"] = pubs["CatCallNumber"].str.strip()
+
+delim = "|"
+headers = "CatCallNumber"
+new_df = pubs["CatCallNumber"].str.split(
+    pat=delim, regex=False, expand=True)
+
+print(pubs)