Skip to content

Commit 17e2ae6

Browse files
ARROW-11774: [R] macos one line install
Closes apache#9579 from jonkeane/ARROW-11774-macos-one-line-install Lead-authored-by: Jonathan Keane <jkeane@gmail.com> Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com> Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
1 parent 6093e19 commit 17e2ae6

9 files changed

Lines changed: 186 additions & 22 deletions

File tree

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,6 +2180,10 @@ macro(build_bzip2)
21802180

21812181
set(BZIP2_EXTRA_ARGS "CC=${CMAKE_C_COMPILER}" "CFLAGS=${EP_C_FLAGS}")
21822182

2183+
if(CMAKE_OSX_SYSROOT)
2184+
list(APPEND BZIP2_EXTRA_ARGS "SDKROOT=${CMAKE_OSX_SYSROOT}")
2185+
endif()
2186+
21832187
externalproject_add(bzip2_ep
21842188
${EP_LOG_OPTIONS}
21852189
CONFIGURE_COMMAND ""
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# NOTE: must set "Crossbow" as name to have the badge links working in the
19+
# github comment reports!
20+
name: Crossbow
21+
22+
on:
23+
push:
24+
branches:
25+
- "*-github-*"
26+
27+
jobs:
28+
autobrew:
29+
name: "install from local source"
30+
runs-on: {{ "${{ matrix.os }}" }}
31+
strategy:
32+
fail-fast: false
33+
matrix:
34+
os: [macOS-latest, ubuntu-20.04]
35+
36+
steps:
37+
- name: Checkout Arrow
38+
run: |
39+
git clone --no-checkout {{ arrow.remote }} arrow
40+
git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
41+
git -C arrow checkout FETCH_HEAD
42+
git -C arrow submodule update --init --recursive
43+
- name: Configure non-autobrew dependencies (macos)
44+
run: |
45+
cd arrow/r
46+
brew install openssl
47+
if: contains(matrix.os, 'macOS')
48+
- name: Configure non-autobrew dependencies (linux)
49+
run: |
50+
cd arrow/r
51+
sudo apt install libcurl4-openssl-dev libssl-dev
52+
if: contains(matrix.os, 'ubuntu')
53+
- uses: r-lib/actions/setup-r@v1
54+
- name: Install dependencies
55+
run: |
56+
install.packages("remotes")
57+
remotes::install_deps("arrow/r", dependencies = TRUE)
58+
remotes::install_cran(c("rcmdcheck", "sys", "sessioninfo"))
59+
shell: Rscript {0}
60+
- name: Session info
61+
run: |
62+
options(width = 100)
63+
pkgs <- installed.packages()[, "Package"]
64+
sessioninfo::session_info(pkgs, include_base = TRUE)
65+
shell: Rscript {0}
66+
- name: Install
67+
env:
68+
_R_CHECK_CRAN_INCOMING_: false
69+
ARROW_USE_PKG_CONFIG: false
70+
FORCE_BUNDLED_BUILD: true
71+
LIBARROW_MINIMAL: false
72+
TEST_R_WITH_ARROW: TRUE
73+
ARROW_R_DEV: TRUE
74+
run: |
75+
cd arrow/r
76+
R CMD INSTALL . --install-tests
77+
- name: Run the tests
78+
run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")'
79+
- name: Dump test logs
80+
run: cat arrow-tests/testthat.Rout*
81+
if: failure()
82+
- name: Save the test output
83+
uses: actions/upload-artifact@v2
84+
with:
85+
name: test-output
86+
path: arrow-tests/testthat.Rout*
87+
if: always()

dev/tasks/tasks.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1197,7 +1197,7 @@ tasks:
11971197
homebrew-r-autobrew:
11981198
# This tests that the autobrew formula + script work in practice
11991199
ci: github
1200-
template: r/github.macos.yml
1200+
template: r/github.macos.autobrew.yml
12011201

12021202
############################## Gandiva Tasks ################################
12031203

@@ -1630,6 +1630,10 @@ tasks:
16301630
params:
16311631
MATRIX: "${{ matrix.r_version }}"
16321632

1633+
test-r-install-local:
1634+
ci: github
1635+
template: r/github.macos-linux.local.yml
1636+
16331637
test-r-rhub-ubuntu-gcc-release:
16341638
ci: azure
16351639
template: r/azure.linux.yml

r/R/install-arrow.R

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,30 @@ install_arrow <- function(nightly = FALSE,
7777
ARROW_R_DEV = verbose,
7878
ARROW_USE_PKG_CONFIG = use_system
7979
)
80-
if (isTRUE(binary)) {
80+
# On the M1, we can't use the usual autobrew, which pulls Intel dependencies
81+
apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
82+
# On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
83+
rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1")
84+
if (rosetta) {
85+
Sys.setenv(ARROW_JEMALLOC = "OFF")
86+
}
87+
if (apple_m1 || rosetta) {
88+
Sys.setenv(FORCE_BUNDLED_BUILD = "true")
89+
}
90+
91+
opts <- list()
92+
if (apple_m1 || rosetta) {
93+
# Skip binaries (esp. for rosetta)
94+
opts$pkgType <- "source"
95+
} else if (isTRUE(binary)) {
8196
# Unless otherwise directed, don't consider newer source packages when
8297
# options(pkgType) == "both" (default on win/mac)
83-
opts <- options(
84-
install.packages.check.source = "no",
85-
install.packages.compile.from.source = "never"
86-
)
87-
on.exit(options(opts))
98+
opts$install.packages.check.source <- "no"
99+
opts$install.packages.compile.from.source <- "never"
100+
}
101+
if (length(opts)) {
102+
old <- options(opts)
103+
on.exit(options(old))
88104
}
89105
install.packages("arrow", repos = arrow_repos(repos, nightly), ...)
90106
}

r/configure

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ PKG_LIBS="-larrow"
3636
# Make some env vars case-insensitive
3737
ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
3838
FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'`
39+
FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
3940
ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
4041

4142
VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
@@ -60,7 +61,7 @@ if [ -f "tools/apache-arrow.rb" ]; then
6061
fi
6162
fi
6263

63-
if [ "$FORCE_AUTOBREW" = "true" ]; then
64+
if [ "$FORCE_AUTOBREW" = "true" ] || [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
6465
ARROW_USE_PKG_CONFIG="false"
6566
fi
6667

@@ -96,7 +97,7 @@ else
9697
echo "**** or retry with ARROW_USE_PKG_CONFIG=false"
9798
fi
9899
else
99-
if [ "$UNAME" = "Darwin" ]; then
100+
if [ "$UNAME" = "Darwin" ] && [ "$FORCE_BUNDLED_BUILD" != "true" ]; then
100101
if [ "$FORCE_AUTOBREW" != "true" ] && [ "`command -v brew`" ] && [ "`brew ls --versions ${PKG_BREW_NAME}`" != "" ]; then
101102
echo "*** Using Homebrew ${PKG_BREW_NAME}"
102103
BREWDIR=`brew --prefix`
@@ -133,22 +134,45 @@ else
133134
if [ "${LIBARROW_MINIMAL}" = "" ] && [ "${NOT_CRAN}" = "true" ]; then
134135
LIBARROW_MINIMAL=false; export LIBARROW_MINIMAL
135136
fi
136-
${R_HOME}/bin/Rscript tools/linuxlibs.R $VERSION
137+
138+
# find openssl on macos. macOS ships with libressl. openssl is installable
139+
# with brew, but it is generally not linked. We can over-ride this and find
140+
# openssl but setting OPENSSL_ROOT_DIR (which cmake will pick up later in
141+
# the installation process). FWIW, arrow's cmake process uses this
142+
# same process to find openssl, but doing it now allows us to catch it in
143+
# nixlibs.R and throw a nicer error.
144+
if [ "$UNAME" = "Darwin" ] && [ "${OPENSSL_ROOT_DIR}" = "" ]; then
145+
brew --prefix openssl >/dev/null 2>&1
146+
if [ $? -eq 0 ]; then
147+
export OPENSSL_ROOT_DIR="$(brew --prefix openssl)"
148+
fi
149+
fi
150+
151+
${R_HOME}/bin/Rscript tools/nixlibs.R $VERSION
137152
PKG_CFLAGS="-I$(pwd)/libarrow/arrow-${VERSION}/include $PKG_CFLAGS"
138153

139154
LIB_DIR="libarrow/arrow-${VERSION}/lib"
140155
if [ -d "$LIB_DIR" ]; then
141156
# Enumerate the static libs, put their -l flags in BUNDLED_LIBS,
142157
# and put their -L location in PKG_DIRS
143158
#
144-
# If tools/linuxlibs.R fails to produce libs, this dir won't exist
159+
# If tools/nixlibs.R fails to produce libs, this dir won't exist
145160
# so don't try (the error message from `ls` would be misleading)
146-
# Assume linuxlibs.R has handled and messaged about its failure already
161+
# Assume nixlibs.R has handled and messaged about its failure already
147162
#
148163
# TODO: what about non-bundled deps?
149164
BUNDLED_LIBS=`cd $LIB_DIR && ls *.a`
150165
BUNDLED_LIBS=`echo $BUNDLED_LIBS | sed -E "s/lib(.*)\.a/-l\1/" | sed -e "s/\\.a lib/ -l/g"`
151166
PKG_DIRS="-L$(pwd)/$LIB_DIR"
167+
168+
# When using brew's openssl it is not bundled and it is not on the system
169+
# search path and so we must add the lib path to BUNDLED_LIBS if we are
170+
# using it. Note the order is important, this must be after the arrow
171+
# lib path + the pkg and bundled libs above so this is why we're
172+
# appending to BUNDLED_LIBS and not PKG_DIRS
173+
if [ $OPENSSL_ROOT_DIR != "" ]; then
174+
BUNDLED_LIBS="$BUNDLED_LIBS -L$OPENSSL_ROOT_DIR/lib"
175+
fi
152176
fi
153177
fi
154178
fi

r/data-raw/codegen.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
# #if defined(ARROW_R_WITH_FEATURE)
4242
# and each feature is written to its own set of export files.
4343

44-
Sys.setlocale("LC_COLLATE", "C")
44+
invisible(Sys.setlocale("LC_COLLATE", "C"))
4545

4646
features <- c("arrow", "dataset", "parquet", "s3")
4747

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,14 @@ ensure_cmake <- function() {
349349
# If not found, download it
350350
cat("**** cmake\n")
351351
CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2")
352+
if (tolower(Sys.info()[["sysname"]]) %in% "darwin") {
353+
postfix <- "-macos-universal.tar.gz"
354+
} else {
355+
postfix <- "-Linux-x86_64.tar.gz"
356+
}
352357
cmake_binary_url <- paste0(
353358
"https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
354-
"/cmake-", CMAKE_VERSION, "-Linux-x86_64.tar.gz"
359+
"/cmake-", CMAKE_VERSION, postfix
355360
)
356361
cmake_tar <- tempfile()
357362
cmake_dir <- tempfile()
@@ -361,7 +366,7 @@ ensure_cmake <- function() {
361366
options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
362367
cmake <- paste0(
363368
cmake_dir,
364-
"/cmake-", CMAKE_VERSION, "-Linux-x86_64",
369+
"/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE),
365370
"/bin/cmake"
366371
)
367372
}
@@ -402,10 +407,11 @@ with_s3_support <- function(env_vars) {
402407
cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n")
403408
arrow_s3 <- FALSE
404409
} else if (!cmake_find_package("CURL", NULL, env_vars)) {
410+
# curl on macos should be installed, so no need to alter this for macos
405411
cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n")
406412
arrow_s3 <- FALSE
407413
} else if (!cmake_find_package("OpenSSL", "1.0.2", env_vars)) {
408-
cat("**** S3 support requires openssl-devel (rpm) or libssl-dev (deb), version >= 1.0.2; building with ARROW_S3=OFF\n")
414+
cat("**** S3 support requires version >= 1.0.2 of openssl-devel (rpm), libssl-dev (deb), or openssl (brew); building with ARROW_S3=OFF\n")
409415
arrow_s3 <- FALSE
410416
}
411417
}

r/vignettes/install.Rmd

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
---
2-
title: "Installing the Arrow Package on Linux"
2+
title: "Installing the Arrow Package"
33
output: rmarkdown::html_vignette
44
vignette: >
5-
%\VignetteIndexEntry{Installing the Arrow Package on Linux}
5+
%\VignetteIndexEntry{Installing the Arrow Package}
66
%\VignetteEngine{knitr::rmarkdown}
77
%\VignetteEncoding{UTF-8}
88
---
@@ -100,8 +100,6 @@ satisfy C++ dependencies.
100100

101101
> Note that, unlike packages like `tensorflow`, `blogdown`, and others that require external dependencies, you do not need to run `install_arrow()` after a successful `arrow` installation.
102102
103-
<!-- TODO: does remotes::install_github("apache/arrow/r") work now? -->
104-
105103
## S3 support
106104

107105
The `arrow` package allows you to work with data in AWS S3 or in other cloud
@@ -166,10 +164,35 @@ run `install.packages("arrow")` or `R CMD INSTALL` but not when running `R CMD c
166164
unless you've set the `NOT_CRAN=true` environment variable.
167165

168166
For the mechanics of how all this works, see the R package `configure` script,
169-
which calls `tools/linuxlibs.R`.
167+
which calls `tools/nixlibs.R`.
170168
If the C++ library is built from source, `inst/build_arrow_static.sh` is executed.
171169
This build script is also what is used to generate the prebuilt binaries.
172170

171+
172+
# Using `remotes::install_github(...)`
173+
174+
If you need an Arrow installation from a specific repository or at a specific ref,
175+
`remotes::install_github("apache/arrow/r", build = FALSE)`
176+
should work on most platforms (with the notable exception of Windows).
177+
The `build = FALSE` argument is important so that the installation can access the
178+
C++ source in the `cpp/` directory in `apache/arrow`.
179+
180+
As with other installation methods, setting the environment variables `LIBARROW_MINIMAL=false` and `ARROW_R_DEV=true` will provide a more full-featured version of Arrow and provide more verbose output, respectively.
181+
182+
For example, to install from the (fictional) branch `bugfix` from `apache/arrow` one could:
183+
184+
```r
185+
Sys.setenv(LIBARROW_MINIMAL="false")
186+
remotes::install_github("apache/arrow/r@bugfix", build = FALSE)
187+
```
188+
189+
Developers may wish to use this method of installing a specific commit
190+
separate from another Arrow development environment or system installation
191+
(e.g. we use this in [arrowbench](https://github.com/ursacomputing/arrowbench) to install development versions of arrow isolated from the system install). If you already have Arrow C++ libraries installed system-wide, you may need to set some additional variables in order to isolate this build from your system libraries:
192+
193+
* Setting the environment variable `FORCE_BUNDLED_BUILD` to `true` will skip the `pkg-config` search for Arrow libraries and attempt to build from the same source at the repository+ref given.
194+
* You may also need to set the Makevars `CPPFLAGS` and `LDFLAGS` to `""` in order to prevent the installation process from attempting to link to already installed system versions of Arrow. One way to do this temporarily is wrapping your `remotes::install_github()` call like so: `withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(...))`.
195+
173196
# Troubleshooting
174197

175198
The intent is that `install.packages("arrow")` will just work and handle all C++
@@ -334,7 +357,7 @@ By default, these are all unset. All boolean variables are case-insensitive.
334357
Use this if you want to avoid compiling the C++ library, which may be slow
335358
and resource-intensive, and ensure that you only use a prebuilt binary.
336359
* `LIBARROW_MINIMAL`: If set to `false`, the build script
337-
will enable some optional features, including compression libraries, S3
360+
will enable some optional features, including compression libraries, S3
338361
support, and additional alternative memory allocators. This will increase the
339362
source build time but results in a more fully functional library.
340363
* `NOT_CRAN`: If this variable is set to `true`, as the `devtools` package does,

0 commit comments

Comments
 (0)