Skip to content

Commit ec6879b

Browse files
Yurui Zhouemkornfield
authored andcommitted
ARROW-4714: [C++][JAVA] Providing JNI interface to Read ORC file via Arrow C++
- setup necessary dev environment for JNI development on JAVA and C++ codebase - implemented JNI interface to enable reading arrow record batch from ORC files - implemented a naive arrow buffer reference manager to ensure c++ memory release Author: Yurui Zhou <yurui.zyr@alibaba-inc.com> Closes apache#4348 from yuruiz/JniOrcReader and squashes the following commits: 41592bf <Yurui Zhou> minor doc fix 44b5420 <Yurui Zhou> make sure lookup operation are performed under lock 706c8dc <Yurui Zhou> resolve comments de8529c <Yurui Zhou> resolve comments fc80175 <Yurui Zhou> resolve comments 9b04b76 <Yurui Zhou> fix style issues and add proper docs 9b13d7f <Yurui Zhou> replace nullptr with NULLPTR macro dd981af <Yurui Zhou> fix lint and clang-format 44505df <Yurui Zhou> Fix cmake format f2a0c04 <Yurui Zhou> destruct schema reader when finish reading 4f89e34 <Yurui Zhou> Make sure resources are properly released. 26d74db <Yurui Zhou> fix minor style check error ce30933 <Yurui Zhou> Add Arrow Jni Reader Unittests 7a80fbd <Yurui Zhou> Minor refactor e4c0630 <Yurui Zhou> remove redundant code e932aa8 <Yurui Zhou> Move jni code to src/jni and change build flag to arrow_jni 1b6a704 <Yurui Zhou> Interface refactor and performance optimization 3604c24 <Yurui Zhou> Resolve merge conflicts 1c0e0b2 <Yurui Zhou> Fix minor build errors e0d9c1f <Yurui Zhou> implement JNI interface on both size a1e80a6 <Yurui Zhou> Add arrow-orc setup
1 parent a2ef7d9 commit ec6879b

22 files changed

Lines changed: 1476 additions & 6 deletions

cpp/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ if(MSVC)
256256
set(ARROW_USE_GLOG OFF)
257257
endif()
258258

259+
if(ARROW_JNI)
260+
set(ARROW_BUILD_STATIC ON)
261+
endif()
262+
259263
if(ARROW_ORC)
260264
set(ARROW_WITH_LZ4 ON)
261265
set(ARROW_WITH_SNAPPY ON)
@@ -729,6 +733,10 @@ if(ARROW_PARQUET)
729733
endif()
730734
endif()
731735
736+
if(ARROW_JNI)
737+
add_subdirectory(src/jni)
738+
endif()
739+
732740
if(ARROW_GANDIVA)
733741
add_subdirectory(src/gandiva)
734742
endif()

cpp/build-support/lint_cpp_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def lint_file(path):
7777
arrow/visitor_inline.h
7878
gandiva/cache.h
7979
gandiva/jni
80+
jni/
8081
test
8182
internal''')
8283

cpp/cmake_modules/BuildUtils.cmake

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ function(ADD_ARROW_LIB LIB_NAME)
139139
PRIVATE_INCLUDES
140140
DEPENDENCIES
141141
SHARED_INSTALL_INTERFACE_LIBS
142-
STATIC_INSTALL_INTERFACE_LIBS)
142+
STATIC_INSTALL_INTERFACE_LIBS
143+
OUTPUT_PATH)
143144
cmake_parse_arguments(ARG
144145
"${options}"
145146
"${one_value_args}"
@@ -164,6 +165,11 @@ function(ADD_ARROW_LIB LIB_NAME)
164165
else()
165166
set(BUILD_STATIC ${ARROW_BUILD_STATIC})
166167
endif()
168+
if(ARG_OUTPUT_PATH)
169+
set(OUTPUT_PATH ${ARG_OUTPUT_PATH})
170+
else()
171+
set(OUTPUT_PATH ${BUILD_OUTPUT_ROOT_DIRECTORY})
172+
endif()
167173

168174
if(WIN32 OR (CMAKE_GENERATOR STREQUAL Xcode))
169175
# We need to compile C++ separately for each library kind (shared and static)
@@ -234,11 +240,11 @@ function(ADD_ARROW_LIB LIB_NAME)
234240

235241
set_target_properties(${LIB_NAME}_shared
236242
PROPERTIES LIBRARY_OUTPUT_DIRECTORY
237-
"${BUILD_OUTPUT_ROOT_DIRECTORY}"
243+
"${OUTPUT_PATH}"
238244
RUNTIME_OUTPUT_DIRECTORY
239-
"${BUILD_OUTPUT_ROOT_DIRECTORY}"
245+
"${OUTPUT_PATH}"
240246
PDB_OUTPUT_DIRECTORY
241-
"${BUILD_OUTPUT_ROOT_DIRECTORY}"
247+
"${OUTPUT_PATH}"
242248
LINK_FLAGS
243249
"${ARG_SHARED_LINK_FLAGS}"
244250
OUTPUT_NAME
@@ -313,8 +319,7 @@ function(ADD_ARROW_LIB LIB_NAME)
313319
endif()
314320

315321
set_target_properties(${LIB_NAME}_static
316-
PROPERTIES LIBRARY_OUTPUT_DIRECTORY
317-
"${BUILD_OUTPUT_ROOT_DIRECTORY}" OUTPUT_NAME
322+
PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OUTPUT_PATH}" OUTPUT_NAME
318323
${LIB_NAME_STATIC})
319324

320325
if(ARG_STATIC_INSTALL_INTERFACE_LIBS)

cpp/cmake_modules/DefineOptions.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
151151

152152
define_option(ARROW_ORC "Build the Arrow ORC adapter" OFF)
153153

154+
define_option(ARROW_JNI "Build the Arrow JNI lib" OFF)
155+
154156
define_option(ARROW_TENSORFLOW "Build Arrow with TensorFlow support enabled" OFF)
155157

156158
define_option(ARROW_JEMALLOC "Build the Arrow jemalloc-based allocator" ON)

cpp/src/jni/CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
#
19+
# arrow_jni
20+
#
21+
22+
if(ARROW_ORC)
23+
add_subdirectory(orc)
24+
endif()

cpp/src/jni/orc/CMakeLists.txt

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
#
19+
# arrow_orc_jni
20+
#
21+
22+
project(arrow_orc_jni)
23+
24+
cmake_minimum_required(VERSION 3.11)
25+
26+
find_package(JNI REQUIRED)
27+
28+
add_custom_target(arrow_orc_jni)
29+
30+
set(JNI_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
31+
32+
add_subdirectory(../../../../java/adapter/orc ./java)
33+
34+
set(ARROW_BUILD_STATIC OFF)
35+
36+
add_arrow_lib(arrow_orc_jni
37+
BUILD_SHARED
38+
SOURCES
39+
jni_wrapper.cpp
40+
OUTPUTS
41+
ARROW_ORC_JNI_LIBRARIES
42+
SHARED_PRIVATE_LINK_LIBS
43+
arrow_static
44+
EXTRA_INCLUDES
45+
${JNI_HEADERS_DIR}
46+
PRIVATE_INCLUDES
47+
${JNI_INCLUDE_DIRS}
48+
${CMAKE_CURRENT_BINARY_DIR}
49+
DEPENDENCIES
50+
arrow_static
51+
arrow_orc_java
52+
OUTPUT_PATH
53+
${CMAKE_CURRENT_BINARY_DIR})
54+
55+
add_dependencies(arrow_orc_jni ${ARROW_ORC_JNI_LIBRARIES})

cpp/src/jni/orc/concurrent_map.h

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
*/
17+
18+
#ifndef JNI_ID_TO_MODULE_MAP_H
19+
#define JNI_ID_TO_MODULE_MAP_H
20+
21+
#include <memory>
22+
#include <mutex>
23+
#include <unordered_map>
24+
#include <utility>
25+
26+
#include "arrow/util/macros.h"
27+
28+
namespace arrow {
29+
namespace jni {
30+
31+
/**
32+
* An utility class that map module id to module pointers.
33+
* @tparam Holder class of the object to hold.
34+
*/
35+
template <typename Holder>
36+
class ConcurrentMap {
37+
public:
38+
ConcurrentMap() : module_id_(init_module_id_) {}
39+
40+
jlong Insert(Holder holder) {
41+
std::lock_guard<std::mutex> lock(mtx_);
42+
jlong result = module_id_++;
43+
map_.insert(std::pair<jlong, Holder>(result, holder));
44+
return result;
45+
}
46+
47+
void Erase(jlong module_id) {
48+
std::lock_guard<std::mutex> lock(mtx_);
49+
map_.erase(module_id);
50+
}
51+
52+
Holder Lookup(jlong module_id) {
53+
std::lock_guard<std::mutex> lock(mtx_);
54+
auto it = map_.find(module_id);
55+
if (it != map_.end()) {
56+
return it->second;
57+
}
58+
return NULLPTR;
59+
}
60+
61+
void Clear() {
62+
std::lock_guard<std::mutex> lock(mtx_);
63+
map_.clear();
64+
}
65+
66+
private:
67+
// Initialize the module id starting value to a number greater than zero
68+
// to allow for easier debugging of uninitialized java variables.
69+
static constexpr int init_module_id_ = 4;
70+
71+
int64_t module_id_;
72+
std::mutex mtx_;
73+
// map from module ids returned to Java and module pointers
74+
std::unordered_map<jlong, Holder> map_;
75+
};
76+
77+
} // namespace jni
78+
} // namespace arrow
79+
80+
#endif // JNI_ID_TO_MODULE_MAP_H

0 commit comments

Comments
 (0)