Skip to content

Commit 5bef985

Browse files
dmadekaxhochy
authored andcommitted
ARROW-2332: Feather Reader option to return Table
Author: Dhruv Madeka <madeka@bu.edu> Closes apache#1960 from dmadeka/feather-table and squashes the following commits: cfb4c20 <Dhruv Madeka> Create read_table function 1ae2edd <Dhruv Madeka> Deprecate read and move to read_table a12e8b7 <Dhruv Madeka> Fix Pep8 Issues causing build fails 14afeec <Dhruv Madeka> ARROW-2332 Table Read
1 parent 9b76ee4 commit 5bef985

2 files changed

Lines changed: 55 additions & 4 deletions

File tree

python/pyarrow/feather.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import six
2222
import pandas as pd
23+
import warnings
2324

2425
from pyarrow.compat import pdapi
2526
from pyarrow.lib import FeatherError # noqa
@@ -42,7 +43,12 @@ def __init__(self, source):
4243
self.source = source
4344
self.open(source)
4445

45-
def read(self, columns=None, nthreads=1):
46+
def read(self, *args, **kwargs):
47+
warnings.warn("read has been deprecated. Use read_pandas instead.",
48+
DeprecationWarning)
49+
return self.read_pandas(*args, **kwargs)
50+
51+
def read_table(self, columns=None):
4652
if columns is not None:
4753
column_set = set(columns)
4854
else:
@@ -58,7 +64,10 @@ def read(self, columns=None, nthreads=1):
5864
names.append(name)
5965

6066
table = Table.from_arrays(columns, names=names)
61-
return table.to_pandas(nthreads=nthreads)
67+
return table
68+
69+
def read_pandas(self, columns=None, nthreads=1):
70+
return self.read_table(columns=columns).to_pandas(nthreads=nthreads)
6271

6372

6473
class FeatherWriter(object):
@@ -129,4 +138,23 @@ def read_feather(source, columns=None, nthreads=1):
129138
df : pandas.DataFrame
130139
"""
131140
reader = FeatherReader(source)
132-
return reader.read(columns=columns, nthreads=nthreads)
141+
return reader.read_pandas(columns=columns, nthreads=nthreads)
142+
143+
144+
def read_table(source, columns=None):
145+
"""
146+
Read a pyarrow.Table from Feather format
147+
148+
Parameters
149+
----------
150+
source : string file path, or file-like object
151+
columns : sequence, optional
152+
Only read a specific set of columns. If not provided, all columns are
153+
read
154+
155+
Returns
156+
-------
157+
table : pyarrow.Table
158+
"""
159+
reader = FeatherReader(source)
160+
return reader.read_table(columns=columns)

python/pyarrow/tests/test_feather.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
import pyarrow as pa
3131
from pyarrow.feather import (read_feather, write_feather,
32-
FeatherReader)
32+
read_table, FeatherReader)
3333
from pyarrow.lib import FeatherWriter
3434

3535

@@ -129,6 +129,29 @@ def test_float_no_nulls(self):
129129
df = pd.DataFrame(data)
130130
self._check_pandas_roundtrip(df)
131131

132+
def test_read_table(self):
133+
num_values = (100, 100)
134+
path = random_path()
135+
136+
self.test_files.append(path)
137+
writer = FeatherWriter()
138+
writer.open(path)
139+
140+
values = np.random.randint(0, 100, size=num_values)
141+
142+
for i in range(100):
143+
writer.write_array('col_' + str(i), values[:, i])
144+
145+
writer.close()
146+
147+
data = pd.DataFrame(values,
148+
columns=['col_' + str(i) for i in range(100)])
149+
table = pa.Table.from_pandas(data)
150+
151+
result = read_table(path)
152+
153+
assert_frame_equal(table.to_pandas(), result.to_pandas())
154+
132155
def test_float_nulls(self):
133156
num_values = 100
134157

0 commit comments

Comments
 (0)