Skip to content

Commit 34a7522

Browse files
committed
ARROW-7977: [C++] Rename fs::FileStats to fs::FileInfo
If we use FileInfo instead of FileStats, we can use singular form "info" and plural form "infos" as variable names instead of "stats" and "stats_vector". It will help writing readable code. Closes apache#6514 from kou/cpp-file-system-stat and squashes the following commits: de69164 <Sutou Kouhei> Rename fs::FileStats to fs::FileInfo Authored-by: Sutou Kouhei <kou@clear-code.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent 6fa6c91 commit 34a7522

50 files changed

Lines changed: 1213 additions & 1205 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cpp/src/arrow/dataset/dataset_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ TEST_F(TestEndToEnd, EndToEndSingleDataset) {
309309
// A selector is used to crawl files and directories of a
310310
// filesystem. If the options in FileSelector are not enough, the
311311
// FileSystemDatasetFactory class also supports an explicit list of
312-
// fs::FileStats instead of the selector.
312+
// fs::FileInfo instead of the selector.
313313
fs::FileSelector s;
314314
s.base_dir = "/dataset";
315315
s.recursive = true;

cpp/src/arrow/dataset/discovery.cc

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -127,25 +127,25 @@ Result<fs::PathForest> FileSystemDatasetFactory::Filter(
127127
const std::shared_ptr<fs::FileSystem>& filesystem,
128128
const std::shared_ptr<FileFormat>& format, const FileSystemFactoryOptions& options,
129129
fs::PathForest forest) {
130-
fs::FileStatsVector out;
130+
std::vector<fs::FileInfo> out;
131131

132-
auto& stats = forest.stats();
132+
auto& infos = forest.infos();
133133
RETURN_NOT_OK(forest.Visit([&](fs::PathForest::Ref ref) -> fs::PathForest::MaybePrune {
134-
const auto& path = ref.stats().path();
134+
const auto& path = ref.info().path();
135135

136136
if (StartsWithAnyOf(options.ignore_prefixes, path)) {
137137
return fs::PathForest::Prune;
138138
}
139139

140-
if (ref.stats().IsFile() && options.exclude_invalid_files) {
140+
if (ref.info().IsFile() && options.exclude_invalid_files) {
141141
ARROW_ASSIGN_OR_RAISE(auto supported,
142142
format->IsSupported(FileSource(path, filesystem.get())));
143143
if (!supported) {
144144
return fs::PathForest::Continue;
145145
}
146146
}
147147

148-
out.push_back(std::move(stats[ref.i]));
148+
out.push_back(std::move(infos[ref.i]));
149149
return fs::PathForest::Continue;
150150
}));
151151

@@ -155,25 +155,25 @@ Result<fs::PathForest> FileSystemDatasetFactory::Filter(
155155
Result<std::shared_ptr<DatasetFactory>> FileSystemDatasetFactory::Make(
156156
std::shared_ptr<fs::FileSystem> filesystem, const std::vector<std::string>& paths,
157157
std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options) {
158-
ARROW_ASSIGN_OR_RAISE(auto files, filesystem->GetTargetStats(paths));
158+
ARROW_ASSIGN_OR_RAISE(auto files, filesystem->GetTargetInfos(paths));
159159
ARROW_ASSIGN_OR_RAISE(auto forest, fs::PathForest::Make(std::move(files)));
160160

161-
std::unordered_set<fs::FileStats, fs::FileStats::ByPath> missing;
161+
std::unordered_set<fs::FileInfo, fs::FileInfo::ByPath> missing;
162162
DCHECK_OK(forest.Visit([&](fs::PathForest::Ref ref) {
163163
util::string_view parent_path = options.partition_base_dir;
164164
if (auto parent = ref.parent()) {
165-
parent_path = parent.stats().path();
165+
parent_path = parent.info().path();
166166
}
167167

168168
for (auto&& path :
169-
fs::internal::AncestorsFromBasePath(parent_path, ref.stats().path())) {
170-
ARROW_ASSIGN_OR_RAISE(auto file, filesystem->GetTargetStats(std::move(path)));
169+
fs::internal::AncestorsFromBasePath(parent_path, ref.info().path())) {
170+
ARROW_ASSIGN_OR_RAISE(auto file, filesystem->GetTargetInfo(std::move(path)));
171171
missing.insert(std::move(file));
172172
}
173173
return Status::OK();
174174
}));
175175

176-
files = std::move(forest).stats();
176+
files = std::move(forest).infos();
177177
std::move(missing.begin(), missing.end(), std::back_inserter(files));
178178

179179
ARROW_ASSIGN_OR_RAISE(forest, fs::PathForest::Make(std::move(files)));
@@ -187,7 +187,7 @@ Result<std::shared_ptr<DatasetFactory>> FileSystemDatasetFactory::Make(
187187
Result<std::shared_ptr<DatasetFactory>> FileSystemDatasetFactory::Make(
188188
std::shared_ptr<fs::FileSystem> filesystem, fs::FileSelector selector,
189189
std::shared_ptr<FileFormat> format, FileSystemFactoryOptions options) {
190-
ARROW_ASSIGN_OR_RAISE(auto files, filesystem->GetTargetStats(selector));
190+
ARROW_ASSIGN_OR_RAISE(auto files, filesystem->GetTargetInfos(selector));
191191

192192
ARROW_ASSIGN_OR_RAISE(auto forest, fs::PathForest::Make(std::move(files)));
193193

@@ -210,9 +210,9 @@ Result<std::shared_ptr<Schema>> FileSystemDatasetFactory::PartitionSchema() {
210210
}
211211

212212
std::vector<util::string_view> paths;
213-
for (const auto& stats : forest_.stats()) {
213+
for (const auto& info : forest_.infos()) {
214214
if (auto relative =
215-
fs::internal::RemoveAncestor(options_.partition_base_dir, stats.path())) {
215+
fs::internal::RemoveAncestor(options_.partition_base_dir, info.path())) {
216216
paths.push_back(*relative);
217217
}
218218
}
@@ -223,7 +223,7 @@ Result<std::shared_ptr<Schema>> FileSystemDatasetFactory::PartitionSchema() {
223223
Result<std::vector<std::shared_ptr<Schema>>> FileSystemDatasetFactory::InspectSchemas() {
224224
std::vector<std::shared_ptr<Schema>> schemas;
225225

226-
for (const auto& f : forest_.stats()) {
226+
for (const auto& f : forest_.infos()) {
227227
if (!f.IsFile()) continue;
228228
FileSource src(f.path(), fs_.get());
229229
ARROW_ASSIGN_OR_RAISE(auto schema, format_->Inspect(src));
@@ -259,7 +259,7 @@ Result<std::shared_ptr<Dataset>> FileSystemDatasetFactory::Finish(
259259
// apply partitioning to forest to derive partitions
260260
auto apply_partitioning = [&](fs::PathForest::Ref ref) {
261261
if (auto relative = fs::internal::RemoveAncestor(options_.partition_base_dir,
262-
ref.stats().path())) {
262+
ref.info().path())) {
263263
auto segments = fs::internal::SplitAbstractPath(relative->to_string());
264264

265265
if (segments.size() > 0) {

cpp/src/arrow/dataset/discovery.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ struct FileSystemFactoryOptions {
149149
};
150150

151151
/// \brief FileSystemDatasetFactory creates a Dataset from a vector of
152-
/// fs::FileStats or a fs::FileSelector.
152+
/// fs::FileInfo or a fs::FileSelector.
153153
class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
154154
public:
155155
/// \brief Build a FileSystemDatasetFactory from an explicit list of
@@ -165,7 +165,7 @@ class ARROW_DS_EXPORT FileSystemDatasetFactory : public DatasetFactory {
165165

166166
/// \brief Build a FileSystemDatasetFactory from a fs::FileSelector.
167167
///
168-
/// The selector will expand to a vector of FileStats. The expansion/crawling
168+
/// The selector will expand to a vector of FileInfo. The expansion/crawling
169169
/// is performed in this function call. Thus, the finalized Dataset is
170170
/// working with a snapshot of the filesystem.
171171
//

cpp/src/arrow/dataset/discovery_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ TEST_F(MockDatasetFactoryTest, UnifySchemas) {
133133

134134
class FileSystemDatasetFactoryTest : public DatasetFactoryTest {
135135
public:
136-
void MakeFactory(const std::vector<fs::FileStats>& files) {
136+
void MakeFactory(const std::vector<fs::FileInfo>& files) {
137137
MakeFileSystem(files);
138138
ASSERT_OK_AND_ASSIGN(factory_, FileSystemDatasetFactory::Make(fs_, selector_, format_,
139139
factory_options_));

cpp/src/arrow/dataset/file_base.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,17 @@ FileSystemDataset::FileSystemDataset(std::shared_ptr<Schema> schema,
6060
Result<std::shared_ptr<Dataset>> FileSystemDataset::Make(
6161
std::shared_ptr<Schema> schema, std::shared_ptr<Expression> root_partition,
6262
std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
63-
fs::FileStatsVector stats) {
64-
ExpressionVector partitions(stats.size(), scalar(true));
63+
std::vector<fs::FileInfo> infos) {
64+
ExpressionVector partitions(infos.size(), scalar(true));
6565
return Make(std::move(schema), std::move(root_partition), std::move(format),
66-
std::move(filesystem), std::move(stats), std::move(partitions));
66+
std::move(filesystem), std::move(infos), std::move(partitions));
6767
}
6868

6969
Result<std::shared_ptr<Dataset>> FileSystemDataset::Make(
7070
std::shared_ptr<Schema> schema, std::shared_ptr<Expression> root_partition,
7171
std::shared_ptr<FileFormat> format, std::shared_ptr<fs::FileSystem> filesystem,
72-
fs::FileStatsVector stats, ExpressionVector partitions) {
73-
ARROW_ASSIGN_OR_RAISE(auto forest, fs::PathForest::Make(std::move(stats), &partitions));
72+
std::vector<fs::FileInfo> infos, ExpressionVector partitions) {
73+
ARROW_ASSIGN_OR_RAISE(auto forest, fs::PathForest::Make(std::move(infos), &partitions));
7474
return Make(std::move(schema), std::move(root_partition), std::move(format),
7575
std::move(filesystem), std::move(forest), std::move(partitions));
7676
}
@@ -88,8 +88,8 @@ std::vector<std::string> FileSystemDataset::files() const {
8888
std::vector<std::string> files;
8989

9090
DCHECK_OK(forest_.Visit([&](fs::PathForest::Ref ref) {
91-
if (ref.stats().IsFile()) {
92-
files.push_back(ref.stats().path());
91+
if (ref.info().IsFile()) {
92+
files.push_back(ref.info().path());
9393
}
9494
return Status::OK();
9595
}));
@@ -105,7 +105,7 @@ std::string FileSystemDataset::ToString() const {
105105
}
106106

107107
DCHECK_OK(forest_.Visit([&](fs::PathForest::Ref ref) {
108-
repr += "\n" + ref.stats().path();
108+
repr += "\n" + ref.info().path();
109109

110110
if (!partitions_[ref.i]->Equals(true)) {
111111
repr += ": " + partitions_[ref.i]->ToString();
@@ -177,9 +177,9 @@ FragmentIterator FileSystemDataset::GetFragmentsImpl(
177177
}
178178
}
179179

180-
if (ref.stats().IsFile()) {
180+
if (ref.info().IsFile()) {
181181
// generate a fragment for this file
182-
FileSource src(ref.stats().path(), filesystem_.get());
182+
FileSource src(ref.info().path(), filesystem_.get());
183183
ARROW_ASSIGN_OR_RAISE(auto fragment, format_->MakeFragment(src, options[ref.i]));
184184
fragments.push_back(std::move(fragment));
185185
}

cpp/src/arrow/dataset/file_base.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -161,34 +161,34 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
161161
/// \param[in] root_partition the top-level partition of the DataDataset
162162
/// \param[in] format file format to create fragments from.
163163
/// \param[in] filesystem the filesystem which files are from.
164-
/// \param[in] stats a list of files/directories to consume.
165-
/// attach additional partition expressions to FileStats found in `stats`.
164+
/// \param[in] infos a list of files/directories to consume.
165+
/// attach additional partition expressions to FileInfo found in `infos`.
166166
///
167167
/// The caller is not required to provide a complete coverage of nodes and
168168
/// partitions.
169169
static Result<std::shared_ptr<Dataset>> Make(std::shared_ptr<Schema> schema,
170170
std::shared_ptr<Expression> root_partition,
171171
std::shared_ptr<FileFormat> format,
172172
std::shared_ptr<fs::FileSystem> filesystem,
173-
fs::FileStatsVector stats);
173+
std::vector<fs::FileInfo> infos);
174174

175175
/// \brief Create a FileSystemDataset with file-level partitions.
176176
///
177177
/// \param[in] schema the top-level schema of the DataDataset
178178
/// \param[in] root_partition the top-level partition of the DataDataset
179179
/// \param[in] format file format to create fragments from.
180180
/// \param[in] filesystem the filesystem which files are from.
181-
/// \param[in] stats a list of files/directories to consume.
182-
/// \param[in] partitions partition information associated with `stats`.
183-
/// attach additional partition expressions to FileStats found in `stats`.
181+
/// \param[in] infos a list of files/directories to consume.
182+
/// \param[in] partitions partition information associated with `infos`.
183+
/// attach additional partition expressions to FileInfo found in `infos`.
184184
///
185185
/// The caller is not required to provide a complete coverage of nodes and
186186
/// partitions.
187187
static Result<std::shared_ptr<Dataset>> Make(std::shared_ptr<Schema> schema,
188188
std::shared_ptr<Expression> root_partition,
189189
std::shared_ptr<FileFormat> format,
190190
std::shared_ptr<fs::FileSystem> filesystem,
191-
fs::FileStatsVector stats,
191+
std::vector<fs::FileInfo> infos,
192192
ExpressionVector partitions);
193193

194194
/// \brief Create a FileSystemDataset with file-level partitions.
@@ -199,7 +199,7 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
199199
/// \param[in] filesystem the filesystem which files are from.
200200
/// \param[in] forest a PathForest of files/directories to consume.
201201
/// \param[in] partitions partition information associated with `forest`.
202-
/// attach additional partition expressions to FileStats found in `forest`.
202+
/// attach additional partition expressions to FileInfo found in `forest`.
203203
///
204204
/// The caller is not required to provide a complete coverage of nodes and
205205
/// partitions.

cpp/src/arrow/dataset/file_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ TEST_F(TestFileSystemDataset, RootPartitionPruning) {
122122

123123
TEST_F(TestFileSystemDataset, TreePartitionPruning) {
124124
auto source_partition = ("country"_ == "US").Copy();
125-
std::vector<fs::FileStats> regions = {
125+
std::vector<fs::FileInfo> regions = {
126126
fs::Dir("NY"), fs::File("NY/New York"), fs::File("NY/Franklin"),
127127
fs::Dir("CA"), fs::File("CA/San Francisco"), fs::File("CA/Franklin"),
128128
};

cpp/src/arrow/dataset/partition.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@
3131

3232
namespace arrow {
3333

34-
namespace fs {
35-
struct FileStats;
36-
struct FileSelector;
37-
} // namespace fs
38-
3934
namespace dataset {
4035

4136
// ----------------------------------------------------------------------

cpp/src/arrow/dataset/test_util.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -286,29 +286,29 @@ Result<std::shared_ptr<Fragment>> JSONRecordBatchFileFormat::MakeFragment(
286286

287287
class TestFileSystemDataset : public ::testing::Test {
288288
public:
289-
void MakeFileSystem(const std::vector<fs::FileStats>& stats) {
290-
ASSERT_OK_AND_ASSIGN(fs_, fs::internal::MockFileSystem::Make(fs::kNoTime, stats));
289+
void MakeFileSystem(const std::vector<fs::FileInfo>& infos) {
290+
ASSERT_OK_AND_ASSIGN(fs_, fs::internal::MockFileSystem::Make(fs::kNoTime, infos));
291291
}
292292

293293
void MakeFileSystem(const std::vector<std::string>& paths) {
294-
std::vector<fs::FileStats> stats{paths.size()};
295-
std::transform(paths.cbegin(), paths.cend(), stats.begin(),
294+
std::vector<fs::FileInfo> infos{paths.size()};
295+
std::transform(paths.cbegin(), paths.cend(), infos.begin(),
296296
[](const std::string& p) { return fs::File(p); });
297297

298-
ASSERT_OK_AND_ASSIGN(fs_, fs::internal::MockFileSystem::Make(fs::kNoTime, stats));
298+
ASSERT_OK_AND_ASSIGN(fs_, fs::internal::MockFileSystem::Make(fs::kNoTime, infos));
299299
}
300300

301-
void MakeDataset(const std::vector<fs::FileStats>& stats,
301+
void MakeDataset(const std::vector<fs::FileInfo>& infos,
302302
std::shared_ptr<Expression> source_partition = scalar(true),
303303
ExpressionVector partitions = {}) {
304304
if (partitions.empty()) {
305-
partitions.resize(stats.size(), scalar(true));
305+
partitions.resize(infos.size(), scalar(true));
306306
}
307307

308-
MakeFileSystem(stats);
308+
MakeFileSystem(infos);
309309
auto format = std::make_shared<DummyFileFormat>();
310310
ASSERT_OK_AND_ASSIGN(
311-
source_, FileSystemDataset::Make(schema({}), source_partition, format, fs_, stats,
311+
source_, FileSystemDataset::Make(schema({}), source_partition, format, fs_, infos,
312312
partitions));
313313
}
314314

cpp/src/arrow/dataset/type_fwd.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ namespace fs {
3535

3636
class FileSystem;
3737

38-
struct FileStats;
39-
using FileStatsVector = std::vector<FileStats>;
38+
struct FileInfo;
4039

4140
} // namespace fs
4241

0 commit comments

Comments
 (0)