Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ I/O
- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
- Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
-

Period
Expand Down
16 changes: 11 additions & 5 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
defaultdict,
)
import copy
import sys
from typing import (
Any,
DefaultDict,
Expand Down Expand Up @@ -148,13 +149,18 @@ def _normalise_json(
if isinstance(data, dict):
for key, value in data.items():
new_key = f"{key_string}{separator}{key}"

if not key_string:
if sys.version_info < (3, 9):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MarcoGorelli if we use if not PY310 where PY310 is from pandas.compat would pyupgrade still flag this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it wouldn't, no, pyupgrade just does static analysis (it wouldn't know what the symbol PY310 means) - in fact, I was kinda tempted to replace all the PY310 and other pandas.compat constants with sys.version_info checks, so we don't need to remember what to clean up when dropping versions each year

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be open to this change.

from pandas.util._str_methods import removeprefix

new_key = removeprefix(new_key, separator)
else:
new_key = new_key.removeprefix(separator)

_normalise_json(
data=value,
# to avoid adding the separator to the start of every key
# GH#43831 avoid adding key if key_string blank
key_string=new_key
if new_key[: len(separator)] != separator
else new_key[len(separator) :],
key_string=new_key,
normalized_dict=normalized_dict,
separator=separator,
)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/io/json/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,14 @@ def generator_data():

tm.assert_frame_equal(result, expected)

def test_top_column_with_leading_underscore(self):
# 49861
data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
result = json_normalize(data, sep="_")
expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])

tm.assert_frame_equal(result, expected)


class TestNestedToRecord:
def test_flat_stays_flat(self):
Expand Down