Skip to content

Commit 20ec0fd

Browse files
Salonijain27salonijain27
authored andcommitted
ARROW-7914: [Python] Allow pandas datetime as index for feather
Closes apache#12821 from Salonijain27/ARROW-7914_fetch_update Lead-authored-by: Salonijain27 <salj7856@gmail.com> Co-authored-by: salonijain27 <salonijain@Salonis-MacBook-Pro.local> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent f89c9ae commit 20ec0fd

2 files changed

Lines changed: 31 additions & 3 deletions

File tree

python/pyarrow/feather.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,17 @@ def write_feather(df, dest, compression=None, compression_level=None,
151151
df = df.to_dense()
152152

153153
if _pandas_api.is_data_frame(df):
154-
table = Table.from_pandas(df, preserve_index=False)
154+
# Feather v1 creates a new column in the resultant Table to
155+
# store index information if index type is not RangeIndex
156+
157+
if version == 1:
158+
preserve_index = False
159+
elif version == 2:
160+
preserve_index = None
161+
else:
162+
raise ValueError("Version value should either be 1 or 2")
163+
164+
table = Table.from_pandas(df, preserve_index=preserve_index)
155165

156166
if version == 1:
157167
# Version 1 does not chunking

python/pyarrow/tests/test_feather.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from pyarrow.feather import (read_feather, write_feather, read_table,
3131
FeatherDataset)
3232

33-
3433
try:
3534
from pandas.testing import assert_frame_equal
3635
import pandas as pd
@@ -90,13 +89,18 @@ def _check_pandas_roundtrip(df, expected=None, path=None,
9089
if path is None:
9190
path = random_path()
9291

92+
if version is None:
93+
version = 2
94+
9395
TEST_FILES.append(path)
9496
write_feather(df, path, compression=compression,
9597
compression_level=compression_level, version=version)
98+
9699
if not os.path.exists(path):
97100
raise Exception('file not written')
98101

99102
result = read_feather(path, columns, use_threads=use_threads)
103+
100104
if expected is None:
101105
expected = df
102106

@@ -504,8 +508,10 @@ def test_out_of_float64_timestamp_with_nulls(version):
504508
def test_non_string_columns(version):
505509
df = pd.DataFrame({0: [1, 2, 3, 4],
506510
1: [True, False, True, False]})
511+
expected = df
507512

508-
expected = df.rename(columns=str)
513+
if version == 1:
514+
expected = df.rename(columns=str)
509515
_check_pandas_roundtrip(df, expected, version=version)
510516

511517

@@ -820,3 +826,15 @@ def test_feather_v017_experimental_compression_backward_compatibility(datadir):
820826
expected = pa.table({'a': range(5)})
821827
result = read_table(datadir / "v0.17.0.version.2-compression.lz4.feather")
822828
assert result.equals(expected)
829+
830+
831+
@pytest.mark.pandas
832+
def test_preserve_index_pandas(version):
833+
df = pd.DataFrame({'a': [1, 2, 3]}, index=['a', 'b', 'c'])
834+
835+
if version == 1:
836+
expected = df.reset_index(drop=True).rename(columns=str)
837+
else:
838+
expected = df
839+
840+
_check_pandas_roundtrip(df, expected, version=version)

0 commit comments

Comments
 (0)