ARROW-6067: [Python] Fix failing large memory Python tests

wesm · wesm · commit 5da8ae80f640 · 2019-08-20T12:20:50.000-05:00
We should arrange to run the large memory tests on a regular basis (in https://ci.ursalabs.org/?) so we don't allow such failures to pass silently. Closes apache#5128 from wesm/ARROW-6067 and squashes the following commits: d484105 <Wes McKinney> Fix large memory Python tests Authored-by: Wes McKinney <wesm+git@apache.org> Signed-off-by: Wes McKinney <wesm+git@apache.org>
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
@@ -58,19 +58,19 @@ def read_pandas(self, columns=None, use_threads=True):
             use_threads=use_threads)
 
 
-def check_chunked_overflow(col):
+def check_chunked_overflow(name, col):
     if col.num_chunks == 1:
         return
 
     if col.type in (ext.binary(), ext.string()):
         raise ValueError("Column '{0}' exceeds 2GB maximum capacity of "
                          "a Feather binary column. This restriction may be "
-                         "lifted in the future".format(col.name))
+                         "lifted in the future".format(name))
     else:
         # TODO(wesm): Not sure when else this might be reached
         raise ValueError("Column '{0}' of type {1} was chunked on conversion "
                          "to Arrow and cannot be currently written to "
-                         "Feather format".format(col.name, str(col.type)))
+                         "Feather format".format(name, str(col.type)))
 
 
 class FeatherWriter(object):
@@ -93,7 +93,7 @@ def write(self, df):
             table = Table.from_pandas(df, preserve_index=False)
             for i, name in enumerate(table.schema.names):
                 col = table[i]
-                check_chunked_overflow(col)
+                check_chunked_overflow(name, col)
                 self.writer.write_array(name, col.chunk(0))
 
         self.writer.close()
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
@@ -2005,7 +2005,7 @@ def test_from_numpy_large(self):
         data['x'][data['x'] < 0.2] = np.nan
 
         ty = pa.struct([pa.field('x', pa.float64()),
-                        pa.field('y', pa.binary(bs))])
+                        pa.field('y', pa.binary())])
         arr = pa.array(data, type=ty, from_pandas=True)
         assert arr.num_chunks == 2