File tree Expand file tree Collapse file tree 4 files changed +62
-0
lines changed
Expand file tree Collapse file tree 4 files changed +62
-0
lines changed Original file line number Diff line number Diff line change 1+
2+ # Create a Pandas DataFrame from CSV data
3+ # https://pandas.pydata.org/docs/getting_started/intro_tutorials/02_read_write.html
4+
5+ import pandas as pd
6+ from pathlib import Path
7+
8+ infile = Path ('./data/publications.csv' )
9+ pubs = pd .read_csv (infile )
10+
11+ print (pubs .info ())
12+ # print(pubs)
13+ # print(pubs.dtypes)
Original file line number Diff line number Diff line change 1+
2+ # Remove leading/trailing and double spaces from CatCallNumber
3+ # https://pandas.pydata.org/docs/getting_started/intro_tutorials/10_text_data.html
4+
5+ import pandas as pd
6+ from pathlib import Path
7+
8+ infile = Path ('./data/publications.csv' )
9+ pubs = pd .read_csv (infile )
10+
11+ pubs ["CatCallNumber" ] = pubs ["CatCallNumber" ].str .strip ()
12+
13+ print (pubs ["CatCallNumber" ])
Original file line number Diff line number Diff line change 1+
2+ # Count concatenated CallNumbers. (Find and Rplace, or COUNTIF)
3+ # https://pandas.pydata.org/docs/reference/api/pandas.Series.str.contains.html
4+
5+ import pandas as pd
6+ from pathlib import Path
7+
8+ infile = Path ('./data/publications.csv' )
9+ pubs = pd .read_csv (infile )
10+
11+ pubs ["CatCallNumber" ] = pubs ["CatCallNumber" ].str .strip ()
12+
13+ search_char = "|"
14+ count_concat = pubs ["CatCallNumber" ].str .contains (search_char , regex = False )
15+
16+ print (
17+ f"The character '{ search_char } ' appears in { count_concat .sum ()} rows in the CatCallNumber column." )
Original file line number Diff line number Diff line change 1+
2+ # Split concatenated CallNumbers on "|" delimiter
3+ # https://pandas.pydata.org/docs/reference/api/pandas.Series.str.split.html
4+
5+ import pandas as pd
6+ from pathlib import Path
7+
8+ infile = Path ('./data/publications.csv' )
9+
10+ pubs = pd .read_csv (infile )
11+
12+ pubs ["CatCallNumber" ] = pubs ["CatCallNumber" ].str .strip ()
13+
14+ delim = "|"
15+ headers = "CatCallNumber"
16+ new_df = pubs ["CatCallNumber" ].str .split (
17+ pat = delim , regex = False , expand = True )
18+
19+ print (pubs )
You can’t perform that action at this time.
0 commit comments