Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
empty strings -> nat
  • Loading branch information
MarcoGorelli committed Dec 14, 2022
commit 716d32b845910df9c2f595f61b5ad9e383003b10
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ Datetimelike
- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`)
- Bug in :class:`Timestamp` was showing ``UserWarning`` which was not actionable by users (:issue:`50232`)
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`)
-

Timedelta
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def array_strptime(
for i in range(n):
val = values[i]
if isinstance(val, str):
if val in nat_strings:
if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue
elif checknull_with_nat_and_na(val):
Expand Down
27 changes: 8 additions & 19 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2019,17 +2019,13 @@ def test_to_datetime_timezone_name(self):
assert result == expected

@td.skip_if_not_us_locale
def test_to_datetime_with_apply_with_empty_str(self, cache):
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
def test_to_datetime_with_apply_with_empty_str(self, cache, errors):
# this is only locale tested with US/None locales
# GH 5195
# GH 5195, GH50251
# with a format and coerce a single item to_datetime fails
td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3])
msg = r"time data '' does not match format '%b %y' \(match\)"
with pytest.raises(ValueError, match=msg):
to_datetime(td, format="%b %y", errors="raise", cache=cache)
with pytest.raises(ValueError, match=msg):
td.apply(to_datetime, format="%b %y", errors="raise", cache=cache)
expected = to_datetime(td, format="%b %y", errors="coerce", cache=cache)
expected = to_datetime(td, format="%b %y", errors=errors, cache=cache)

result = td.apply(
lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache)
Expand Down Expand Up @@ -2976,24 +2972,17 @@ def test_na_to_datetime(nulls_fixture, klass):
assert result[0] is NaT


def test_empty_string_datetime_coerce_format():
# GH13044
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
def test_empty_string_datetime_coerce_format(errors):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe good to update this test name since coerce isn't the only errors argument being tested here

# GH13044, GH50251
td = Series(["03/24/2016", "03/25/2016", ""])
format = "%m/%d/%Y"

# coerce empty string to pd.NaT
result = to_datetime(td, format=format, errors="coerce")
result = to_datetime(td, format=format, errors=errors)
expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]")
tm.assert_series_equal(expected, result)

# raise an exception in case a format is given
with pytest.raises(ValueError, match="does not match format"):
to_datetime(td, format=format, errors="raise")

# still raise an exception in case no format is given
with pytest.raises(ValueError, match="does not match format"):
to_datetime(td, errors="raise")


def test_empty_string_datetime_coerce__unit():
# GH13044
Expand Down