Skip to content

Commit c6bec15

Browse files
jorisvandenbosschepitrou
authored andcommitted
ARROW-6926: [Python] Support __sizeof__ protocol for Python objects
https://issues.apache.org/jira/browse/ARROW-6926 Closes apache#5879 from jorisvandenbossche/ARROW-6926-sizeof and squashes the following commits: baca02f <Joris Van den Bossche> check size of metadata keys and values 17f75d4 <Joris Van den Bossche> add Schema sizeof 54896e1 <Joris Van den Bossche> object -> super b36aa5d <Joris Van den Bossche> ARROW-6926: Support __sizeof__ protocol for Python objects Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 7e0022a commit c6bec15

6 files changed

Lines changed: 44 additions & 1 deletion

File tree

python/pyarrow/array.pxi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,9 @@ cdef class Array(_PandasConvertible):
805805
size += buf.size
806806
return size
807807

808+
def __sizeof__(self):
809+
return super(Array, self).__sizeof__() + self.nbytes
810+
808811
def __iter__(self):
809812
for i in range(len(self)):
810813
yield self.getitem(i)

python/pyarrow/table.pxi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ cdef class ChunkedArray(_PandasConvertible):
125125
size += chunk.nbytes
126126
return size
127127

128+
def __sizeof__(self):
129+
return super(ChunkedArray, self).__sizeof__() + self.nbytes
130+
128131
def __iter__(self):
129132
for chunk in self.iterchunks():
130133
for item in chunk:
@@ -657,6 +660,9 @@ cdef class RecordBatch(_PandasConvertible):
657660
size += self.column(i).nbytes
658661
return size
659662

663+
def __sizeof__(self):
664+
return super(RecordBatch, self).__sizeof__() + self.nbytes
665+
660666
def __getitem__(self, key):
661667
if isinstance(key, slice):
662668
return _normalize_slice(self, key)
@@ -1521,6 +1527,9 @@ cdef class Table(_PandasConvertible):
15211527
size += column.nbytes
15221528
return size
15231529

1530+
def __sizeof__(self):
1531+
return super(Table, self).__sizeof__() + self.nbytes
1532+
15241533
def add_column(self, int i, field_, column):
15251534
"""
15261535
Add column to Table at position. Returns new table

python/pyarrow/tests/test_array.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1607,13 +1607,16 @@ def test_buffers_nested():
16071607
assert struct.unpack('4xh', values) == (43,)
16081608

16091609

1610-
def test_nbytes():
1610+
def test_nbytes_sizeof():
16111611
a = pa.array(np.array([4, 5, 6], dtype='int64'))
16121612
assert a.nbytes == 8 * 3
1613+
assert sys.getsizeof(a) >= object.__sizeof__(a) + a.nbytes
16131614
a = pa.array([1, None, 3], type='int64')
16141615
assert a.nbytes == 8*3 + 1
1616+
assert sys.getsizeof(a) >= object.__sizeof__(a) + a.nbytes
16151617
a = pa.array([[1, 2], None, [3, None, 4, 5]], type=pa.list_(pa.int64()))
16161618
assert a.nbytes == 1 + 4 * 4 + 1 + 6 * 8
1619+
assert sys.getsizeof(a) >= object.__sizeof__(a) + a.nbytes
16171620

16181621

16191622
def test_invalid_tensor_constructor_repr():

python/pyarrow/tests/test_schema.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from collections import OrderedDict
1919
import pickle
20+
import sys
2021

2122
import pytest
2223
import numpy as np
@@ -542,3 +543,17 @@ def test_schema_from_pandas():
542543
schema = pa.Schema.from_pandas(df)
543544
expected = pa.Table.from_pandas(df).schema
544545
assert schema == expected
546+
547+
548+
def test_schema_sizeof():
549+
schema = pa.schema([
550+
pa.field('foo', pa.int32()),
551+
pa.field('bar', pa.string()),
552+
])
553+
554+
assert sys.getsizeof(schema) > 30
555+
556+
schema2 = schema.with_metadata({"key": "some metadata"})
557+
assert sys.getsizeof(schema2) > sys.getsizeof(schema)
558+
schema3 = schema.with_metadata({"key": "some more metadata"})
559+
assert sys.getsizeof(schema3) > sys.getsizeof(schema2)

python/pyarrow/tests/test_table.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def test_chunked_array_basics():
4545
assert all(isinstance(c, pa.lib.Int64Array) for c in data.iterchunks())
4646
assert len(data.chunks) == 3
4747
assert data.nbytes == sum(c.nbytes for c in data.iterchunks())
48+
assert sys.getsizeof(data) >= object.__sizeof__(data) + data.nbytes
4849
data.validate()
4950

5051

@@ -276,6 +277,7 @@ def test_recordbatch_basics():
276277
assert batch.num_rows == 5
277278
assert batch.num_columns == len(data)
278279
assert batch.nbytes == 5 * 2 + 1 + 5 * 4 + 1
280+
assert sys.getsizeof(batch) >= object.__sizeof__(batch) + batch.nbytes
279281
pydict = batch.to_pydict()
280282
assert pydict == OrderedDict([
281283
('c0', [0, 1, 2, 3, 4]),
@@ -524,6 +526,7 @@ def test_table_basics():
524526
assert table.num_columns == 2
525527
assert table.shape == (5, 2)
526528
assert table.nbytes == 2 * (5 * 8 + 1)
529+
assert sys.getsizeof(table) >= object.__sizeof__(table) + table.nbytes
527530
pydict = table.to_pydict()
528531
assert pydict == OrderedDict([
529532
('a', [0, 1, 2, 3, 4]),

python/pyarrow/types.pxi

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import atexit
1919
import re
20+
import sys
2021
import warnings
2122

2223
from pyarrow import compat
@@ -891,6 +892,15 @@ cdef class Schema:
891892
def __hash__(self):
892893
return hash((tuple(self), self.metadata))
893894

895+
def __sizeof__(self):
896+
size = 0
897+
if self.metadata:
898+
for key, value in self.metadata.items():
899+
size += sys.getsizeof(key)
900+
size += sys.getsizeof(value)
901+
902+
return size + super(Schema, self).__sizeof__()
903+
894904
@property
895905
def pandas_metadata(self):
896906
"""

0 commit comments

Comments
 (0)