Skip to content

Commit 4d0fbff

Browse files
authored
ARROW-17320: [Python] Refine pyarrow.parquet API exposure (apache#14096)
Fixes [ARROW-17320](https://issues.apache.org/jira/browse/ARROW-17320?page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel&focusedCommentId=17577330) Added a deprecation for `_filters_to_expression` -> `filters_to_expression`, in apache@c7fdff3 let me know if that commit should be dropped. :) Authored-by: Miles Granger <miles59923@gmail.com> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent df7babb commit 4d0fbff

2 files changed

Lines changed: 56 additions & 11 deletions

File tree

python/pyarrow/parquet/core.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from contextlib import nullcontext
2222
from functools import partial, reduce
2323

24-
import sys
2524
import json
2625
from collections.abc import Collection
2726
import numpy as np
@@ -45,7 +44,7 @@
4544
from pyarrow.fs import (LocalFileSystem, FileSystem,
4645
_resolve_filesystem_and_path, _ensure_filesystem)
4746
from pyarrow import filesystem as legacyfs
48-
from pyarrow.util import guid, _is_path_like, _stringify_path
47+
from pyarrow.util import guid, _is_path_like, _stringify_path, _deprecate_api
4948

5049
_URI_STRIP_SCHEMES = ('hdfs',)
5150

@@ -141,11 +140,27 @@ def _check_filters(filters, check_null_strings=True):
141140
"""
142141

143142

144-
def _filters_to_expression(filters):
143+
def filters_to_expression(filters):
145144
"""
146-
Check if filters are well-formed.
145+
Check if filters are well-formed and convert to an ``Expression``.
146+
147+
Parameters
148+
----------
149+
filters : List[Tuple] or List[List[Tuple]]
150+
151+
Notes
152+
-----
153+
See internal ``pyarrow._DNF_filter_doc`` attribute for more details.
154+
155+
Examples
156+
--------
157+
158+
>>> filters_to_expression([('foo', '==', 'bar')])
159+
<pyarrow.compute.Expression (foo == "bar")>
147160
148-
See _DNF_filter_doc above for more details.
161+
Returns
162+
-------
163+
pyarrow.compute.Expression
149164
"""
150165
import pyarrow.dataset as ds
151166

@@ -191,6 +206,11 @@ def convert_single_predicate(col, op, val):
191206
return reduce(operator.or_, disjunction_members)
192207

193208

209+
_filters_to_expression = _deprecate_api(
210+
"_filters_to_expression", "filters_to_expression",
211+
filters_to_expression, "10.0.0", DeprecationWarning)
212+
213+
194214
# ----------------------------------------------------------------------
195215
# Reading a single Parquet file
196216

@@ -2343,7 +2363,7 @@ def __init__(self, path_or_paths, filesystem=None, *, filters=None,
23432363

23442364
self._filter_expression = None
23452365
if filters is not None:
2346-
self._filter_expression = _filters_to_expression(filters)
2366+
self._filter_expression = filters_to_expression(filters)
23472367

23482368
# map old filesystems to new one
23492369
if filesystem is not None:
@@ -3506,6 +3526,31 @@ def read_schema(where, memory_map=False, decryption_properties=None,
35063526
return file.schema.to_arrow_schema()
35073527

35083528

3509-
# re-export everything
3510-
# std `from . import *` ignores symbols with leading `_`
3511-
__all__ = list(sys.modules[__name__].__dict__)
3529+
__all__ = (
3530+
"ColumnChunkMetaData",
3531+
"ColumnSchema",
3532+
"FileDecryptionProperties",
3533+
"FileEncryptionProperties",
3534+
"FileMetaData",
3535+
"ParquetDataset",
3536+
"ParquetDatasetPiece",
3537+
"ParquetFile",
3538+
"ParquetLogicalType",
3539+
"ParquetManifest",
3540+
"ParquetPartitions",
3541+
"ParquetReader",
3542+
"ParquetSchema",
3543+
"ParquetWriter",
3544+
"PartitionSet",
3545+
"RowGroupMetaData",
3546+
"Statistics",
3547+
"read_metadata",
3548+
"read_pandas",
3549+
"read_schema",
3550+
"read_table",
3551+
"write_metadata",
3552+
"write_table",
3553+
"write_to_dataset",
3554+
"_filters_to_expression",
3555+
"filters_to_expression",
3556+
)

python/pyarrow/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ def decorator(g):
3939
return decorator
4040

4141

42-
def _deprecate_api(old_name, new_name, api, next_version):
42+
def _deprecate_api(old_name, new_name, api, next_version, type=FutureWarning):
4343
msg = _DEPR_MSG.format(old_name, next_version, new_name)
4444

4545
def wrapper(*args, **kwargs):
46-
warnings.warn(msg, FutureWarning)
46+
warnings.warn(msg, type)
4747
return api(*args, **kwargs)
4848
return wrapper
4949

0 commit comments

Comments
 (0)