Skip to content

Commit 9c8ddae

Browse files
romainfrancoiswesm
authored andcommitted
ARROW-3942: [R] Feather api fixes
Some fixes to follow up open apache#3043, and added the columns argument to `read_feather` that can be: - character vector - integer vector : 1-based in R - NULL: to get all columns (the default) Also adds `as_tibble` argument to read_feather to switch between data.frame and arrow::Table return value Author: Romain Francois <romain@purrple.cat> Closes apache#3106 from romainfrancois/ARROW-3942/feather and squashes the following commits: 13061af <Romain Francois> fixed link in documentation ce414c1 <Romain Francois> + as_tibble argument to read_feather() d6c30a3 <Romain Francois> + columns argument to read_feather() 46a6fbb <Romain Francois> Update feather factories
1 parent 9da4584 commit 9c8ddae

9 files changed

Lines changed: 134 additions & 52 deletions

File tree

r/NAMESPACE

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ S3method("==","arrow::RecordBatch")
88
S3method("==","arrow::ipc::Message")
99
S3method(BufferReader,"arrow::Buffer")
1010
S3method(BufferReader,default)
11+
S3method(FeatherTableReader,"arrow::io::RandomAccessFile")
12+
S3method(FeatherTableReader,"arrow::ipc::feather::TableReader")
13+
S3method(FeatherTableReader,character)
14+
S3method(FeatherTableReader,default)
15+
S3method(FeatherTableReader,fs_path)
16+
S3method(FeatherTableWriter,"arrow::io::OutputStream")
1117
S3method(FixedSizeBufferWriter,"arrow::Buffer")
1218
S3method(FixedSizeBufferWriter,default)
1319
S3method(MessageReader,"arrow::io::InputStream")
@@ -33,12 +39,6 @@ S3method(buffer,default)
3339
S3method(buffer,integer)
3440
S3method(buffer,numeric)
3541
S3method(buffer,raw)
36-
S3method(feather_table_reader,"arrow::io::RandomAccessFile")
37-
S3method(feather_table_reader,"arrow::ipc::feather::TableReader")
38-
S3method(feather_table_reader,character)
39-
S3method(feather_table_reader,default)
40-
S3method(feather_table_reader,fs_path)
41-
S3method(feather_table_writer,"arrow::io::OutputStream")
4242
S3method(length,"arrow::Array")
4343
S3method(names,"arrow::RecordBatch")
4444
S3method(print,"arrow-enum")
@@ -70,6 +70,8 @@ S3method(write_feather_RecordBatch,fs_path)
7070
export(BufferOutputStream)
7171
export(BufferReader)
7272
export(DateUnit)
73+
export(FeatherTableReader)
74+
export(FeatherTableWriter)
7375
export(FileMode)
7476
export(FileOutputStream)
7577
export(FixedSizeBufferWriter)
@@ -95,8 +97,6 @@ export(date64)
9597
export(decimal)
9698
export(default_memory_pool)
9799
export(dictionary)
98-
export(feather_table_reader)
99-
export(feather_table_writer)
100100
export(field)
101101
export(float16)
102102
export(float32)

r/R/RcppExports.R

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/R/feather.R

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
num_columns = function() ipc___feather___TableReader__num_columns(self),
3636
GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i),
3737
GetColumn = function(i) shared_ptr(`arrow::Column`, ipc___feather___TableReader__GetColumn(self, i)),
38-
Read = function() shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self))
38+
Read = function(columns) {
39+
shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns))
40+
}
3941
)
4042
)
4143

@@ -44,12 +46,12 @@
4446
#' @param stream an OutputStream
4547
#'
4648
#' @export
47-
feather_table_writer <- function(stream) {
48-
UseMethod("feather_table_writer")
49+
FeatherTableWriter <- function(stream) {
50+
UseMethod("FeatherTableWriter")
4951
}
5052

5153
#' @export
52-
`feather_table_writer.arrow::io::OutputStream` <- function(stream){
54+
`FeatherTableWriter.arrow::io::OutputStream` <- function(stream){
5355
unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream))
5456
}
5557

@@ -107,7 +109,7 @@ write_feather_RecordBatch <- function(data, stream) {
107109
#' @export
108110
#' @method write_feather_RecordBatch arrow::io::OutputStream
109111
`write_feather_RecordBatch.arrow::io::OutputStream` <- function(data, stream) {
110-
ipc___TableWriter__RecordBatch__WriteFeather(feather_table_writer(stream), data)
112+
ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data)
111113
}
112114

113115
#' A arrow::ipc::feather::TableReader to read from a file
@@ -117,44 +119,50 @@ write_feather_RecordBatch <- function(data, stream) {
117119
#' @param ... extra parameters
118120
#'
119121
#' @export
120-
feather_table_reader <- function(file, mmap = TRUE, ...){
121-
UseMethod("feather_table_reader")
122+
FeatherTableReader <- function(file, mmap = TRUE, ...){
123+
UseMethod("FeatherTableReader")
122124
}
123125

124126
#' @export
125-
feather_table_reader.default <- function(file, mmap = TRUE, ...) {
127+
FeatherTableReader.default <- function(file, mmap = TRUE, ...) {
126128
stop("unsupported")
127129
}
128130

129131
#' @export
130-
feather_table_reader.character <- function(file, mmap = TRUE, ...) {
131-
feather_table_reader(fs::path_abs(file), mmap = mmap, ...)
132+
FeatherTableReader.character <- function(file, mmap = TRUE, ...) {
133+
FeatherTableReader(fs::path_abs(file), mmap = mmap, ...)
132134
}
133135

134136
#' @export
135-
feather_table_reader.fs_path <- function(file, mmap = TRUE, ...) {
137+
FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) {
136138
stream <- if(isTRUE(mmap)) mmap_open(file, ...) else ReadableFile(file, ...)
137-
feather_table_reader(stream)
139+
FeatherTableReader(stream)
138140
}
139141

140142
#' @export
141-
`feather_table_reader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
143+
`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
142144
unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file))
143145
}
144146

145147
#' @export
146-
`feather_table_reader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){
148+
`FeatherTableReader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){
147149
file
148150
}
149151

150152
#' Read a feather file
151153
#'
152-
#' @param file a arrow::ipc::feather::TableReader or whatever the [feather_table_reader()] function can handle
154+
#' @param file a arrow::ipc::feather::TableReader or whatever the [FeatherTableReader()] function can handle
155+
#' @param columns names if the columns to read. The default `NULL` means all columns
156+
#' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble.
153157
#' @param ... additional parameters
154158
#'
155-
#' @return an arrow::Table
159+
#' @return a data frame if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise
156160
#'
157161
#' @export
158-
read_feather <- function(file, ...){
159-
feather_table_reader(file, ...)$Read()
162+
read_feather <- function(file, columns = NULL, as_tibble = TRUE, ...){
163+
out <- FeatherTableReader(file, ...)$Read(columns)
164+
if (isTRUE(as_tibble)) {
165+
out <- as_tibble(out)
166+
}
167+
out
160168
}
Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/man/read_feather.Rd

Lines changed: 7 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/RcppExports.cpp

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/feather.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,37 @@ std::shared_ptr<arrow::Column> ipc___feather___TableReader__GetColumn(
115115

116116
// [[Rcpp::export]]
117117
std::shared_ptr<arrow::Table> ipc___feather___TableReader__Read(
118-
const std::unique_ptr<arrow::ipc::feather::TableReader>& reader) {
118+
const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, SEXP columns) {
119119
std::shared_ptr<arrow::Table> table;
120-
STOP_IF_NOT_OK(reader->Read(&table));
120+
121+
switch (TYPEOF(columns)) {
122+
case INTSXP: {
123+
R_xlen_t n = XLENGTH(columns);
124+
std::vector<int> indices(n);
125+
int* p_columns = INTEGER(columns);
126+
for (int i = 0; i < n; i++) {
127+
indices[i] = p_columns[i] - 1;
128+
}
129+
STOP_IF_NOT_OK(reader->Read(indices, &table));
130+
break;
131+
}
132+
case STRSXP: {
133+
R_xlen_t n = XLENGTH(columns);
134+
std::vector<std::string> names(n);
135+
for (R_xlen_t i = 0; i < n; i++) {
136+
names[i] = CHAR(STRING_ELT(columns, i));
137+
}
138+
STOP_IF_NOT_OK(reader->Read(names, &table));
139+
break;
140+
}
141+
case NILSXP:
142+
STOP_IF_NOT_OK(reader->Read(&table));
143+
break;
144+
default:
145+
Rcpp::stop("incompatible column specification");
146+
break;
147+
};
148+
121149
return table;
122150
}
123151

r/tests/testthat/test-feather.R

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,66 @@ test_that("feather read/write round trip", {
3434
expect_true(fs::file_exists(tf3))
3535

3636
tab1 <- read_feather(tf1)
37-
expect_is(tab1, "arrow::Table")
37+
expect_is(tab1, "data.frame")
3838

3939
tab2 <- read_feather(tf2)
40-
expect_is(tab2, "arrow::Table")
40+
expect_is(tab2, "data.frame")
4141

4242
tab3 <- read_feather(tf3)
43-
expect_is(tab3, "arrow::Table")
43+
expect_is(tab3, "data.frame")
4444

4545
# reading directly from arrow::io::MemoryMappedFile
4646
tab4 <- read_feather(mmap_open(tf3))
47-
expect_is(tab4, "arrow::Table")
47+
expect_is(tab4, "data.frame")
4848

4949
# reading directly from arrow::io::ReadableFile
5050
tab5 <- read_feather(ReadableFile(tf3))
51-
expect_is(tab5, "arrow::Table")
51+
expect_is(tab5, "data.frame")
52+
53+
expect_equal(tib, tab1)
54+
expect_equal(tib, tab2)
55+
expect_equal(tib, tab3)
56+
expect_equal(tib, tab4)
57+
expect_equal(tib, tab5)
58+
})
59+
60+
test_that("feather handles columns = <names>", {
61+
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
62+
63+
tf1 <- local_tempfile()
64+
write_feather(tib, tf1)
65+
expect_true(fs::file_exists(tf1))
66+
67+
tab1 <- read_feather(tf1, columns = c("x", "y"))
68+
expect_is(tab1, "data.frame")
69+
70+
expect_equal(tib[, c("x", "y")], as_tibble(tab1))
71+
})
72+
73+
test_that("feather handles columns = <integer>", {
74+
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
75+
76+
tf1 <- local_tempfile()
77+
write_feather(tib, tf1)
78+
expect_true(fs::file_exists(tf1))
79+
80+
tab1 <- read_feather(tf1, columns = 1:2)
81+
expect_is(tab1, "data.frame")
82+
83+
expect_equal(tib[, c("x", "y")], as_tibble(tab1))
84+
})
85+
86+
test_that("feather read/write round trip", {
87+
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
88+
89+
tf1 <- local_tempfile()
90+
write_feather(tib, tf1)
91+
expect_true(fs::file_exists(tf1))
92+
93+
tab1 <- read_feather(tf1, as_tibble = FALSE)
94+
expect_is(tab1, "arrow::Table")
5295

5396
expect_equal(tib, as_tibble(tab1))
54-
expect_equal(tib, as_tibble(tab2))
55-
expect_equal(tib, as_tibble(tab3))
56-
expect_equal(tib, as_tibble(tab4))
57-
expect_equal(tib, as_tibble(tab5))
5897
})
98+
99+

0 commit comments

Comments
 (0)