Skip to content

Commit a0c6504

Browse files
dragosmgjonkeane
authored andcommitted
ARROW-13887 [R] Capture error produced when reading in CSV file with headers and using a schema, and add suggestion
Closes apache#11432 from dragosmg/ARROW-13887_csv_header_schema_error Lead-authored-by: Dragos Moldovan-Grünfeld <dragos.mold@gmail.com> Co-authored-by: Dragoș Moldovan-Grünfeld <dragos.mold@gmail.com> Signed-off-by: Jonathan Keane <jkeane@gmail.com>
1 parent 273fab7 commit a0c6504

3 files changed

Lines changed: 42 additions & 1 deletion

File tree

r/R/csv.R

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,12 @@ read_delim_arrow <- function(file,
192192
convert_options = convert_options
193193
)
194194

195-
tab <- reader$Read()
195+
tryCatch(
196+
tab <- reader$Read(),
197+
error = function(e) {
198+
handle_csv_read_error(e, schema)
199+
}
200+
)
196201

197202
# TODO: move this into convert_options using include_columns
198203
col_select <- enquo(col_select)

r/R/util.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,18 @@ repeat_value_as_array <- function(object, n) {
193193
}
194194
return(Scalar$create(object)$as_array(n))
195195
}
196+
197+
handle_csv_read_error <- function(e, schema) {
198+
msg <- conditionMessage(e)
199+
200+
if (grepl("conversion error", msg) && inherits(schema, "Schema")) {
201+
abort(c(
202+
msg,
203+
i = paste("If you have supplied a schema and your data contains a header",
204+
"row, you should supply the argument `skip = 1` to prevent the",
205+
"header being read in as data.")
206+
))
207+
}
208+
209+
abort(e)
210+
}

r/tests/testthat/test-csv.R

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,27 @@ test_that("Mix of guessing and declaring types", {
266266
expect_identical(df, tbl[, c("dbl", "false", "chr")])
267267
})
268268

269+
test_that("more informative error when reading a CSV with headers and schema", {
270+
tf <- tempfile()
271+
on.exit(unlink(tf))
272+
273+
write.csv(example_data, tf, row.names = FALSE)
274+
275+
share_schema <- schema(
276+
int = int32(),
277+
dbl = float64(),
278+
dbl2 = float64(),
279+
lgl = boolean(),
280+
false = boolean(),
281+
chr = utf8(),
282+
fct = utf8()
283+
)
284+
285+
expect_error(
286+
read_csv_arrow(tf, schema = share_schema),
287+
"header row"
288+
)
289+
})
269290

270291
test_that("Write a CSV file with header", {
271292
tbl_out <- write_csv_arrow(tbl_no_dates, csv_file)

0 commit comments

Comments
 (0)