|
17 | 17 |
|
18 | 18 | #include "arrow/dataset/discovery.h" |
19 | 19 |
|
20 | | -#include <memory> |
21 | | -#include <utility> |
22 | | - |
23 | 20 | #include <gmock/gmock.h> |
24 | 21 | #include <gtest/gtest.h> |
25 | 22 |
|
| 23 | +#include <memory> |
| 24 | +#include <utility> |
| 25 | + |
| 26 | +#include "arrow/dataset/filter.h" |
26 | 27 | #include "arrow/dataset/partition.h" |
27 | 28 | #include "arrow/dataset/test_util.h" |
28 | 29 | #include "arrow/filesystem/test_util.h" |
29 | 30 | #include "arrow/testing/gtest_util.h" |
30 | 31 | #include "arrow/type_fwd.h" |
| 32 | +#include "arrow/util/checked_cast.h" |
31 | 33 |
|
32 | 34 | using testing::SizeIs; |
33 | 35 |
|
@@ -198,9 +200,10 @@ TEST_F(FileSystemDatasetFactoryTest, ExplicitPartition) { |
198 | 200 |
|
199 | 201 | TEST_F(FileSystemDatasetFactoryTest, DiscoveredPartition) { |
200 | 202 | selector_.base_dir = "a=ignored/base"; |
| 203 | + selector_.recursive = true; |
201 | 204 | factory_options_.partitioning = HivePartitioning::MakeFactory(); |
202 | 205 |
|
203 | | - auto a_1 = "a=ignored/base/a=1"; |
| 206 | + auto a_1 = "a=ignored/base/a=1/file.data"; |
204 | 207 | MakeFactory({fs::File(a_1)}); |
205 | 208 |
|
206 | 209 | InspectOptions options; |
@@ -340,6 +343,27 @@ TEST_F(FileSystemDatasetFactoryTest, InspectFragmentsLimit) { |
340 | 343 | } |
341 | 344 | } |
342 | 345 |
|
| 346 | +TEST_F(FileSystemDatasetFactoryTest, FilenameNotPartOfPartitions) { |
| 347 | + // ARROW-8726: Ensure filename is not a partition. |
| 348 | + |
| 349 | + // Creates a partition with 2 explicit fields. The type `int32` is |
| 350 | + // specifically chosen such that parsing would fail given a non-integer |
| 351 | + // string. |
| 352 | + auto s = schema({field("first", utf8()), field("second", int32())}); |
| 353 | + factory_options_.partitioning = std::make_shared<DirectoryPartitioning>(s); |
| 354 | + |
| 355 | + selector_.recursive = true; |
| 356 | + // The file doesn't have a directory component for the second partition |
| 357 | + // column. In such case, the filename should not be used. |
| 358 | + MakeFactory({fs::File("one/file.parquet")}); |
| 359 | + |
| 360 | + ASSERT_OK_AND_ASSIGN(auto dataset, factory_->Finish()); |
| 361 | + for (const auto& maybe_fragment : dataset->GetFragments()) { |
| 362 | + ASSERT_OK_AND_ASSIGN(auto fragment, maybe_fragment); |
| 363 | + ASSERT_TRUE(fragment->partition_expression()->Equals(("first"_ == "one"))); |
| 364 | + } |
| 365 | +} |
| 366 | + |
343 | 367 | std::shared_ptr<DatasetFactory> DatasetFactoryFromSchemas( |
344 | 368 | std::vector<std::shared_ptr<Schema>> schemas) { |
345 | 369 | return std::make_shared<MockDatasetFactory>(schemas); |
|
0 commit comments