Skip to content

Commit f6e75cd

Browse files
authored
HIVE-29419: Provide a Hive-specific docker image for Tez AM (#6435)
1 parent 4d4fb6a commit f6e75cd

9 files changed

Lines changed: 235 additions & 38 deletions

File tree

llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ protected void startRpcServer() {
257257
int numHandlers =
258258
HiveConf.getIntVar(conf, ConfVars.LLAP_TASK_COMMUNICATOR_LISTENER_THREAD_COUNT);
259259
String[] portRange =
260-
conf.get(HiveConf.ConfVars.LLAP_TASK_UMBILICAL_SERVER_PORT.varname)
260+
HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_TASK_UMBILICAL_SERVER_PORT)
261261
.split("-");
262262
boolean isHadoopSecurityAuthorizationEnabled = conf.getBoolean(
263263
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false);

packaging/src/docker/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ ARG UID=1000
7575
ARG HADOOP_VERSION
7676
ARG HIVE_VERSION
7777
ARG TEZ_VERSION
78-
7978
# Install dependencies
8079
RUN set -ex; \
8180
microdnf update -y; \

packaging/src/docker/build.sh

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ HADOOP_VERSION=
2222
TEZ_VERSION=
2323
usage() {
2424
cat <<EOF 1>&2
25-
Usage: $0 [-h] [-hadoop <Hadoop version>] [-tez <Tez version>] [-hive <Hive version>] [-repo <Docker repo>]
25+
Usage: $0 [-h] [-hadoop <Hadoop version>] -tez <Tez release version> [-hive <Hive version>] [-repo <Docker repo>]
2626
Build the Hive Docker image (reused for LLAP too)
2727
-help Display help
28-
-hadoop Build image with the specified Hadoop version
29-
-tez Build image with the specified Tez version
28+
-hadoop Build image with the specified Hadoop version (default: from Maven pom)
29+
-tez Required. Tez release tarball version (apache-tez-\$TEZ_VERSION-bin.tar.gz from archive)
3030
-hive Build image with the specified Hive version
3131
-repo Docker repository
3232
EOF
@@ -64,6 +64,12 @@ while [ $# -gt 0 ]; do
6464
esac
6565
done
6666

67+
if [ -z "${TEZ_VERSION}" ]; then
68+
echo "Error: -tez <Tez version> is required." >&2
69+
usage
70+
exit 1
71+
fi
72+
6773
SCRIPT_DIR=$(cd $(dirname $0); pwd)
6874
SOURCE_DIR=${SOURCE_DIR:-"$SCRIPT_DIR/../../.."}
6975
repo=${REPO:-apache}
@@ -123,12 +129,17 @@ cp "$CACHE_DIR/apache-tez-$TEZ_VERSION-bin.tar.gz" "$WORK_DIR/"
123129
cp -R "$SOURCE_DIR/packaging/src/docker/conf" "$WORK_DIR/"
124130
cp -R "$SOURCE_DIR/packaging/src/docker/entrypoint.sh" "$WORK_DIR/"
125131
cp "$SOURCE_DIR/packaging/src/docker/Dockerfile" "$WORK_DIR/"
132+
133+
DOCKER_BUILD_ARGS=(
134+
--build-arg "HIVE_VERSION=$HIVE_VERSION"
135+
--build-arg "HADOOP_VERSION=$HADOOP_VERSION"
136+
--build-arg "TEZ_VERSION=$TEZ_VERSION"
137+
)
138+
126139
docker build \
127140
"$WORK_DIR" \
128141
-f "$WORK_DIR/Dockerfile" \
129142
-t "$repo/hive:$HIVE_VERSION" \
130-
--build-arg "HIVE_VERSION=$HIVE_VERSION" \
131-
--build-arg "HADOOP_VERSION=$HADOOP_VERSION" \
132-
--build-arg "TEZ_VERSION=$TEZ_VERSION"
143+
"${DOCKER_BUILD_ARGS[@]}"
133144

134145
rm -r "${WORK_DIR}"

packaging/src/docker/conf/hive-site.xml.template

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,25 @@
8888
<name>hive.query.results.cache.directory</name>
8989
<value>${HIVE_QUERY_RESULTS_CACHE_DIRECTORY}</value>
9090
</property>
91+
<property>
92+
<name>hive.server2.tez.initialize.default.sessions</name>
93+
<value>false</value>
94+
</property>
95+
<property>
96+
<name>hive.server2.tez.use.external.sessions</name>
97+
<value>${HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS}</value>
98+
</property>
99+
<!--
100+
A registry namespace prefix is a hardcoded prefix for Tez external sessions.
101+
The actual tez.am.registry.namespace value is appended to this prefix.
102+
Once hive can use the registry client that Tez provides (ZkAMRegistryClient), this property will be removed.
103+
-->
104+
<property>
105+
<name>hive.server2.tez.external.sessions.namespace</name>
106+
<value>/tez-external-sessions${TEZ_AM_REGISTRY_NAMESPACE}</value>
107+
</property>
108+
<property>
109+
<name>hive.server2.tez.external.sessions.registry.class</name>
110+
<value>org.apache.hadoop.hive.ql.exec.tez.ZookeeperExternalSessionsRegistryClient</value>
111+
</property>
91112
</configuration>
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"taskSchedulerDescriptors": [
3+
{
4+
"className": "org.apache.hadoop.hive.llap.tezplugins.LlapTaskSchedulerService",
5+
"entityName": "LLAP"
6+
}
7+
],
8+
"containerLauncherDescriptors": [
9+
{
10+
"className": "org.apache.hadoop.hive.llap.tezplugins.LlapContainerLauncher",
11+
"entityName": "LLAP"
12+
}
13+
],
14+
"taskCommunicatorDescriptors": [
15+
{
16+
"className": "org.apache.hadoop.hive.llap.tezplugins.LlapTaskCommunicator",
17+
"entityName": "LLAP"
18+
}
19+
],
20+
"enableContainers": false,
21+
"enableUber": false
22+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
log4j.rootLogger=INFO, console
2+
log4j.appender.console=org.apache.log4j.ConsoleAppender
3+
log4j.appender.console.Target=System.err
4+
log4j.appender.console.layout=org.apache.log4j.PatternLayout
5+
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<?xml version="1.0"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
<configuration>
20+
<property>
21+
<name>tez.am.mode.session</name>
22+
<value>true</value>
23+
</property>
24+
<property>
25+
<name>tez.am.framework.mode</name>
26+
<value>${TEZ_FRAMEWORK_MODE}</value>
27+
</property>
28+
<property>
29+
<name>tez.am.zookeeper.quorum</name>
30+
<value>${TEZ_AM_ZOOKEEPER_QUORUM}</value>
31+
</property>
32+
<property>
33+
<name>tez.am.registry.namespace</name>
34+
<value>${TEZ_AM_REGISTRY_NAMESPACE}</value>
35+
</property>
36+
<property>
37+
<name>tez.local.mode</name>
38+
<value>false</value>
39+
</property>
40+
<property>
41+
<name>tez.am.tez-ui.webservice.enable</name>
42+
<value>false</value>
43+
</property>
44+
<!-- Tez AM should not timeout in ZK Mode -->
45+
<property>
46+
<name>tez.session.am.dag.submit.timeout.secs</name>
47+
<value>-1</value>
48+
</property>
49+
<property>
50+
<name>tez.ignore.lib.uris</name>
51+
<value>true</value>
52+
</property>
53+
<property>
54+
<name>tez.am.disable.client-version-check</name>
55+
<value>true</value>
56+
</property>
57+
58+
<!--
59+
In standalone AM mode with LLAP, the task scheduler (LlapTaskSchedulerService) and task
60+
communicator (LlapTaskCommunicator) are instantiated at AM startup from service_plugins_descriptor.json
61+
on the classpath — not from the DAG payload as in YARN-submitted mode. These plugins read their
62+
configuration from TezConfiguration, which is built exclusively from tez-site.xml on the classpath.
63+
Hive-specific properties must therefore appear here so the AM can find LLAP daemons and connect
64+
to them, even though they are conceptually Hive settings.
65+
-->
66+
<property>
67+
<name>hive.zookeeper.quorum</name>
68+
<value>${HIVE_ZOOKEEPER_QUORUM}</value>
69+
</property>
70+
<property>
71+
<name>hive.llap.daemon.service.hosts</name>
72+
<value>${HIVE_LLAP_DAEMON_SERVICE_HOSTS}</value>
73+
</property>
74+
</configuration>

packaging/src/docker/docker-compose.yml

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,35 @@ services:
132132
- zookeeper_datalog:/datalog
133133
- zookeeper_logs:/logs
134134

135-
#TODO Tez AM container (in the meantime, the HS2(with local Tez AM) + LLAP daemon setup is working properly)
136-
# 1. Define and use a Tez AM image from HIVE-29419 or TEZ-4682
137-
# 2. Configure TezAM to use Zookeeper Llap Registry to discover the LLAP daemon
138-
# 3. Configure HiveServer2 to use the Tez AM Zookeeper Registry to discover the Tez AM
139-
# Prerequisites:
140-
# - tez-api 1.0.0-SNAPSHOT jar injected into HiveSever2 until Tez 1.0.0 is released
141-
# - make HIVE-29477 happen to let HiveServer2 use Tez external sessions
142-
# 4. Define hadoop components here to be used by all the containers (working example can be found at TEZ-4682), currently a local volume
135+
tezam:
136+
profiles:
137+
- llap
138+
image: apache/hive:${HIVE_VERSION}
139+
container_name: tezam
140+
hostname: tezam
141+
depends_on:
142+
- zookeeper
143+
restart: on-failure:3
144+
environment:
145+
USER: hive
146+
SERVICE_NAME: 'tezam'
147+
148+
TEZ_FRAMEWORK_MODE: STANDALONE_ZOOKEEPER
149+
TEZ_AM_ZOOKEEPER_QUORUM: zookeeper:2181
150+
151+
# LLAP daemon discovery
152+
HIVE_ZOOKEEPER_QUORUM: zookeeper:2181
153+
HIVE_LLAP_DAEMON_SERVICE_HOSTS: '@llap0'
154+
155+
# Directories shared between HiveServer2 and LLAP daemon
156+
HIVE_SCRATCH_DIR: /opt/hive/scratch
157+
HIVE_QUERY_RESULTS_CACHE_DIRECTORY: /opt/hive/scratch/_resultscache_
158+
159+
volumes:
160+
- warehouse:/opt/hive/data/warehouse
161+
- scratch:/opt/hive/scratch
162+
networks:
163+
- hive
143164

144165
llapdaemon:
145166
profiles:

packaging/src/docker/entrypoint.sh

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,17 @@ export HIVE_WAREHOUSE_PATH="${HIVE_WAREHOUSE_PATH:-/opt/hive/data/warehouse}"
4141
export HIVE_SCRATCH_DIR="${HIVE_SCRATCH_DIR:-/opt/hive/scratch}"
4242
export HIVE_QUERY_RESULTS_CACHE_DIRECTORY="${HIVE_WAREHOUSE_PATH:-/opt/hive/scratch/_resultscache_}"
4343

44+
export HIVE_ZOOKEEPER_QUORUM="${HIVE_ZOOKEEPER_QUORUM:-zookeeper:2181}"
45+
export HIVE_LLAP_DAEMON_SERVICE_HOSTS="${HIVE_LLAP_DAEMON_SERVICE_HOSTS:-@llap0}"
46+
47+
export HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS="${HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS:-true}"
48+
export TEZ_FRAMEWORK_MODE="${TEZ_FRAMEWORK_MODE:-STANDALONE_ZOOKEEPER}"
49+
export TEZ_AM_REGISTRY_NAMESPACE="${TEZ_AM_REGISTRY_NAMESPACE:-/tez_am/server}"
50+
export TEZ_AM_ZOOKEEPER_QUORUM="${TEZ_AM_ZOOKEEPER_QUORUM:-${HIVE_ZOOKEEPER_QUORUM}}"
51+
4452
envsubst < $HIVE_HOME/conf/core-site.xml.template > $HIVE_HOME/conf/core-site.xml
4553
envsubst < $HIVE_HOME/conf/hive-site.xml.template > $HIVE_HOME/conf/hive-site.xml
54+
envsubst < $HIVE_HOME/conf/tez-site.xml.template > $HIVE_HOME/conf/tez-site.xml
4655
# =========================================================================
4756

4857
: "${DB_DRIVER:=derby}"
@@ -68,9 +77,30 @@ function initialize_hive {
6877
fi
6978
}
7079

80+
function append_java_opens {
81+
local -n _opts=$1
82+
local opens=(
83+
"--add-opens=java.base/java.lang=ALL-UNNAMED"
84+
"--add-opens=java.base/java.util=ALL-UNNAMED"
85+
"--add-opens=java.base/java.io=ALL-UNNAMED"
86+
"--add-opens=java.base/java.net=ALL-UNNAMED"
87+
"--add-opens=java.base/java.nio=ALL-UNNAMED"
88+
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED"
89+
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED"
90+
"--add-opens=java.base/java.util.regex=ALL-UNNAMED"
91+
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED"
92+
"--add-opens=java.sql/java.sql=ALL-UNNAMED"
93+
"--add-opens=java.base/java.text=ALL-UNNAMED"
94+
"-Dnet.bytebuddy.experimental=true"
95+
)
96+
for opt in "${opens[@]}"; do
97+
if [[ " ${_opts} " != *" ${opt} "* ]]; then
98+
_opts="${_opts} ${opt}"
99+
fi
100+
done
101+
}
102+
71103
function run_llap {
72-
export HIVE_ZOOKEEPER_QUORUM="${HIVE_ZOOKEEPER_QUORUM:-zookeeper:2181}"
73-
export HIVE_LLAP_DAEMON_SERVICE_HOSTS="${HIVE_LLAP_DAEMON_SERVICE_HOSTS:-@llap0}"
74104
export LLAP_MEMORY_MB="${LLAP_MEMORY_MB:-1024}"
75105
export LLAP_EXECUTORS="${LLAP_EXECUTORS:-1}"
76106

@@ -87,25 +117,8 @@ function run_llap {
87117
export LLAP_DAEMON_CONF_DIR="${LLAP_DAEMON_CONF_DIR:-$HIVE_CONF_DIR}"
88118
export LLAP_DAEMON_USER_CLASSPATH="${LLAP_DAEMON_USER_CLASSPATH:-$TEZ_HOME/*:$TEZ_HOME/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/tools/lib/*}"
89119

90-
JAVA_ADD_OPENS=(
91-
"--add-opens=java.base/java.lang=ALL-UNNAMED"
92-
"--add-opens=java.base/java.util=ALL-UNNAMED"
93-
"--add-opens=java.base/java.io=ALL-UNNAMED"
94-
"--add-opens=java.base/java.net=ALL-UNNAMED"
95-
"--add-opens=java.base/java.nio=ALL-UNNAMED"
96-
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED"
97-
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED"
98-
"--add-opens=java.base/java.util.regex=ALL-UNNAMED"
99-
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED"
100-
"--add-opens=java.sql/java.sql=ALL-UNNAMED"
101-
"--add-opens=java.base/java.text=ALL-UNNAMED"
102-
"-Dnet.bytebuddy.experimental=true"
103-
)
104-
for opt in "${JAVA_ADD_OPENS[@]}"; do
105-
if [[ " ${LLAP_DAEMON_OPTS:-} " != *" ${opt} "* ]]; then
106-
LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS:-} ${opt}"
107-
fi
108-
done
120+
LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS:-}"
121+
append_java_opens LLAP_DAEMON_OPTS
109122

110123
if [[ -n "${LLAP_EXTRA_OPTS:-}" ]]; then
111124
export LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS:-} ${LLAP_EXTRA_OPTS}"
@@ -121,6 +134,35 @@ function run_llap {
121134
exec "${LLAP_RUN_SCRIPT}" run "$@"
122135
}
123136

137+
function run_tezam {
138+
: "${USER:=hive}"
139+
: "${LOCAL_DIRS:="/tmp"}"
140+
: "${LOG_DIRS:="/opt/tez/logs"}"
141+
: "${APP_SUBMIT_TIME_ENV:=$(($(date +%s) * 1000))}"
142+
: "${TEZ_AM_EXTERNAL_ID:="tez-session-${HOSTNAME:-tezam}"}"
143+
export USER LOCAL_DIRS LOG_DIRS APP_SUBMIT_TIME_ENV TEZ_AM_EXTERNAL_ID
144+
145+
export HADOOP_HOME="${HADOOP_HOME:-/opt/hadoop}"
146+
export TEZ_HOME="${TEZ_HOME:-/opt/tez}"
147+
export HIVE_HOME="${HIVE_HOME:-/opt/hive}"
148+
export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HIVE_CONF_DIR}"
149+
export TEZ_CONF_DIR="${TEZ_CONF_DIR:-$HADOOP_CONF_DIR}"
150+
# service_plugins_descriptor.json references org.apache.hadoop.hive.llap.tezplugins.* (hive-llap-tez, etc.)
151+
tezam_cp="${HADOOP_CONF_DIR}:${TEZ_CONF_DIR}:${TEZ_HOME}/*:${TEZ_HOME}/lib/*:${HIVE_HOME}/lib/*:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/yarn/*:${HADOOP_HOME}/share/hadoop/yarn/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/mapreduce/lib/*:${HADOOP_CLASSPATH:-}"
152+
153+
local java_bin
154+
local tezam_java_opts
155+
java_bin="${JAVA_HOME:+$JAVA_HOME/bin/}java"
156+
tezam_java_opts="${HADOOP_CLIENT_OPTS:-} -Dlog4j.configuration=file:${HIVE_CONF_DIR}/tez-log4j.properties"
157+
append_java_opens tezam_java_opts
158+
"${java_bin}" ${tezam_java_opts} -cp "${tezam_cp}" org.apache.tez.dag.app.DAGAppMaster --session "$@"
159+
local rc=$?
160+
if [[ ${rc} -ne 0 ]]; then
161+
echo "DAGAppMaster exited with code ${rc}. See logs above for details."
162+
fi
163+
exit "${rc}"
164+
}
165+
124166
export HIVE_CONF_DIR=$HIVE_HOME/conf
125167
if [ -d "${HIVE_CUSTOM_CONF_DIR:-}" ]; then
126168
find "${HIVE_CUSTOM_CONF_DIR}" -type f -exec \
@@ -129,7 +171,7 @@ if [ -d "${HIVE_CUSTOM_CONF_DIR:-}" ]; then
129171
export TEZ_CONF_DIR=$HIVE_CONF_DIR
130172
fi
131173

132-
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xmx1G $SERVICE_OPTS"
174+
export HADOOP_CLIENT_OPTS="${HADOOP_CLIENT_OPTS:-} -Xmx1G ${SERVICE_OPTS:-}"
133175
if [[ "${SKIP_SCHEMA_INIT}" == "false" && ( "${SERVICE_NAME}" == "hiveserver2" || "${SERVICE_NAME}" == "metastore" ) ]]; then
134176
# handles schema initialization
135177
initialize_hive
@@ -147,4 +189,6 @@ elif [ "${SERVICE_NAME}" == "metastore" ]; then
147189
fi
148190
elif [ "${SERVICE_NAME}" == "llap" ]; then
149191
run_llap "$@"
192+
elif [ "${SERVICE_NAME}" == "tezam" ]; then
193+
run_tezam "$@"
150194
fi

0 commit comments

Comments
 (0)