|
21 | 21 | from contextlib import nullcontext |
22 | 22 | from functools import partial, reduce |
23 | 23 |
|
24 | | -import sys |
25 | 24 | import json |
26 | 25 | from collections.abc import Collection |
27 | 26 | import numpy as np |
|
45 | 44 | from pyarrow.fs import (LocalFileSystem, FileSystem, |
46 | 45 | _resolve_filesystem_and_path, _ensure_filesystem) |
47 | 46 | from pyarrow import filesystem as legacyfs |
48 | | -from pyarrow.util import guid, _is_path_like, _stringify_path |
| 47 | +from pyarrow.util import guid, _is_path_like, _stringify_path, _deprecate_api |
49 | 48 |
|
50 | 49 | _URI_STRIP_SCHEMES = ('hdfs',) |
51 | 50 |
|
@@ -141,11 +140,27 @@ def _check_filters(filters, check_null_strings=True): |
141 | 140 | """ |
142 | 141 |
|
143 | 142 |
|
144 | | -def _filters_to_expression(filters): |
| 143 | +def filters_to_expression(filters): |
145 | 144 | """ |
146 | | - Check if filters are well-formed. |
| 145 | + Check if filters are well-formed and convert to an ``Expression``. |
| 146 | +
|
| 147 | + Parameters |
| 148 | + ---------- |
| 149 | + filters : List[Tuple] or List[List[Tuple]] |
| 150 | +
|
| 151 | + Notes |
| 152 | + ----- |
| 153 | + See internal ``pyarrow._DNF_filter_doc`` attribute for more details. |
| 154 | +
|
| 155 | + Examples |
| 156 | + -------- |
| 157 | +
|
| 158 | + >>> filters_to_expression([('foo', '==', 'bar')]) |
| 159 | + <pyarrow.compute.Expression (foo == "bar")> |
147 | 160 |
|
148 | | - See _DNF_filter_doc above for more details. |
| 161 | + Returns |
| 162 | + ------- |
| 163 | + pyarrow.compute.Expression |
149 | 164 | """ |
150 | 165 | import pyarrow.dataset as ds |
151 | 166 |
|
@@ -191,6 +206,11 @@ def convert_single_predicate(col, op, val): |
191 | 206 | return reduce(operator.or_, disjunction_members) |
192 | 207 |
|
193 | 208 |
|
| 209 | +_filters_to_expression = _deprecate_api( |
| 210 | + "_filters_to_expression", "filters_to_expression", |
| 211 | + filters_to_expression, "10.0.0", DeprecationWarning) |
| 212 | + |
| 213 | + |
194 | 214 | # ---------------------------------------------------------------------- |
195 | 215 | # Reading a single Parquet file |
196 | 216 |
|
@@ -2343,7 +2363,7 @@ def __init__(self, path_or_paths, filesystem=None, *, filters=None, |
2343 | 2363 |
|
2344 | 2364 | self._filter_expression = None |
2345 | 2365 | if filters is not None: |
2346 | | - self._filter_expression = _filters_to_expression(filters) |
| 2366 | + self._filter_expression = filters_to_expression(filters) |
2347 | 2367 |
|
2348 | 2368 | # map old filesystems to new one |
2349 | 2369 | if filesystem is not None: |
@@ -3506,6 +3526,31 @@ def read_schema(where, memory_map=False, decryption_properties=None, |
3506 | 3526 | return file.schema.to_arrow_schema() |
3507 | 3527 |
|
3508 | 3528 |
|
3509 | | -# re-export everything |
3510 | | -# std `from . import *` ignores symbols with leading `_` |
3511 | | -__all__ = list(sys.modules[__name__].__dict__) |
| 3529 | +__all__ = ( |
| 3530 | + "ColumnChunkMetaData", |
| 3531 | + "ColumnSchema", |
| 3532 | + "FileDecryptionProperties", |
| 3533 | + "FileEncryptionProperties", |
| 3534 | + "FileMetaData", |
| 3535 | + "ParquetDataset", |
| 3536 | + "ParquetDatasetPiece", |
| 3537 | + "ParquetFile", |
| 3538 | + "ParquetLogicalType", |
| 3539 | + "ParquetManifest", |
| 3540 | + "ParquetPartitions", |
| 3541 | + "ParquetReader", |
| 3542 | + "ParquetSchema", |
| 3543 | + "ParquetWriter", |
| 3544 | + "PartitionSet", |
| 3545 | + "RowGroupMetaData", |
| 3546 | + "Statistics", |
| 3547 | + "read_metadata", |
| 3548 | + "read_pandas", |
| 3549 | + "read_schema", |
| 3550 | + "read_table", |
| 3551 | + "write_metadata", |
| 3552 | + "write_table", |
| 3553 | + "write_to_dataset", |
| 3554 | + "_filters_to_expression", |
| 3555 | + "filters_to_expression", |
| 3556 | +) |
0 commit comments