@@ -57,21 +57,53 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
5757 if (.Generic %in% names(.unary_function_map )) {
5858 expr <- array_expression(.unary_function_map [[.Generic ]], e1 )
5959 } else {
60- e1 <- .wrap_arrow(e1 , .Generic , e2 $ type )
61- e2 <- .wrap_arrow(e2 , .Generic , e1 $ type )
60+ e1 <- .wrap_arrow(e1 , .Generic )
61+ e2 <- .wrap_arrow(e2 , .Generic )
62+
63+ # In Arrow, "divide" is one function, which does integer division on
64+ # integer inputs and floating-point division on floats
65+ if (.Generic == " /" ) {
66+ # TODO: omg so many ways it's wrong to assume these types
67+ e1 <- cast_array_expression(e1 , float64())
68+ e2 <- cast_array_expression(e2 , float64())
69+ } else if (.Generic == " %/%" ) {
70+ # In R, integer division works like floor(float division)
71+ out <- build_array_expression(" /" , e1 , e2 )
72+ return (cast_array_expression(out , int32(), allow_float_truncate = TRUE ))
73+ } else if (.Generic == " %%" ) {
74+ # {e1 - e2 * ( e1 %/% e2 )}
75+ # ^^^ form doesn't work because Ops.Array evaluates eagerly,
76+ # but we can build that up
77+ quotient <- build_array_expression(" %/%" , e1 , e2 )
78+ # this cast is to ensure that the result of this and e1 are the same
79+ # (autocasting only applies to scalars)
80+ base <- cast_array_expression(quotient * e2 , e1 $ type )
81+ return (build_array_expression(" -" , e1 , base ))
82+ }
83+
6284 expr <- array_expression(.binary_function_map [[.Generic ]], e1 , e2 , ... )
6385 }
6486 expr
6587}
6688
67- .wrap_arrow <- function (arg , fun , type ) {
89+ cast_array_expression <- function (x , to_type , safe = TRUE , ... ) {
90+ opts <- list (
91+ to_type = to_type ,
92+ allow_int_overflow = ! safe ,
93+ allow_time_truncate = ! safe ,
94+ allow_float_truncate = ! safe
95+ )
96+ array_expression(" cast" , x , options = modifyList(opts , list (... )))
97+ }
98+
99+ .wrap_arrow <- function (arg , fun ) {
68100 if (! inherits(arg , c(" ArrowObject" , " array_expression" ))) {
69101 # TODO: Array$create if lengths are equal?
70102 # TODO: these kernels should autocast like the dataset ones do (e.g. int vs. float)
71103 if (fun == " %in%" ) {
72- arg <- Array $ create(arg , type = type )
104+ arg <- Array $ create(arg )
73105 } else {
74- arg <- Scalar $ create(arg , type = type )
106+ arg <- Scalar $ create(arg )
75107 }
76108 }
77109 arg
@@ -91,6 +123,15 @@ build_array_expression <- function(.Generic, e1, e2, ...) {
91123 " <=" = " less_equal" ,
92124 " &" = " and_kleene" ,
93125 " |" = " or_kleene" ,
126+ " +" = " add_checked" ,
127+ " -" = " subtract_checked" ,
128+ " *" = " multiply_checked" ,
129+ " /" = " divide_checked" ,
130+ " %/%" = " divide_checked" ,
131+ # we don't actually use divide_checked with `%%`, rather it is rewritten to
132+ # use %/% above.
133+ " %%" = " divide_checked" ,
134+ # TODO: "^" (ARROW-11070)
94135 " %in%" = " is_in_meta_binary"
95136)
96137
@@ -104,6 +145,16 @@ eval_array_expression <- function(x) {
104145 a
105146 }
106147 })
148+ if (length(x $ args ) == 2L ) {
149+ # Insert implicit casts
150+ if (inherits(x $ args [[1 ]], " Scalar" )) {
151+ x $ args [[1 ]] <- x $ args [[1 ]]$ cast(x $ args [[2 ]]$ type )
152+ } else if (inherits(x $ args [[2 ]], " Scalar" )) {
153+ x $ args [[2 ]] <- x $ args [[2 ]]$ cast(x $ args [[1 ]]$ type )
154+ } else if (x $ fun == " is_in_meta_binary" && inherits(x $ args [[2 ]], " Array" )) {
155+ x $ args [[2 ]] <- x $ args [[2 ]]$ cast(x $ args [[1 ]]$ type )
156+ }
157+ }
107158 call_function(x $ fun , args = x $ args , options = x $ options %|| % empty_named_list())
108159}
109160
@@ -160,7 +211,16 @@ print.array_expression <- function(x, ...) {
160211# ' @export
161212Expression <- R6Class(" Expression" , inherit = ArrowObject ,
162213 public = list (
163- ToString = function () dataset___expr__ToString(self )
214+ ToString = function () dataset___expr__ToString(self ),
215+ cast = function (to_type , safe = TRUE , ... ) {
216+ opts <- list (
217+ to_type = to_type ,
218+ allow_int_overflow = ! safe ,
219+ allow_time_truncate = ! safe ,
220+ allow_float_truncate = ! safe
221+ )
222+ Expression $ create(" cast" , self , options = modifyList(opts , list (... )))
223+ }
164224 )
165225)
166226Expression $ create <- function (function_name ,
@@ -196,6 +256,21 @@ build_dataset_expression <- function(.Generic, e1, e2, ...) {
196256 if (! inherits(e2 , " Expression" )) {
197257 e2 <- Expression $ scalar(e2 )
198258 }
259+
260+ # In Arrow, "divide" is one function, which does integer division on
261+ # integer inputs and floating-point division on floats
262+ if (.Generic == " /" ) {
263+ # TODO: omg so many ways it's wrong to assume these types
264+ e1 <- e1 $ cast(float64())
265+ e2 <- e2 $ cast(float64())
266+ } else if (.Generic == " %/%" ) {
267+ # In R, integer division works like floor(float division)
268+ out <- build_dataset_expression(" /" , e1 , e2 )
269+ return (out $ cast(int32(), allow_float_truncate = TRUE ))
270+ } else if (.Generic == " %%" ) {
271+ return (e1 - e2 * ( e1 %/% e2 ))
272+ }
273+
199274 expr <- Expression $ create(.binary_function_map [[.Generic ]], e1 , e2 , ... )
200275 }
201276 expr
0 commit comments