我有 docker-compose.yml 文件。我如何将 apache hive 与 apache hadoop 连接起来?我想在配置单元上创建表。从 api 中提取数据并加载到 hadoop hdfs 中。还有另一种方法,比如创建 DockerFile 设置配置 Apache hive 连接到 Hadoop ....................................... ………………
version: '3.9'
services:
postgres:
image: postgres
restart: unless-stopped
container_name: postgres
hostname: postgres
environment:
POSTGRES_DB: 'metastore_db'
POSTGRES_USER: 'hive'
POSTGRES_PASSWORD: 'hive'
ports:
- '5432:5432'
volumes:
- hive-db:/var/lib/postgresql
namenode:
image: apache/hadoop:3
container_name: namenode
hostname: namenode
command: ["hdfs", "namenode"]
ports:
- 9870:9870
- 8020:8020
env_file:
- ./hadoop.env
environment:
ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name"
volumes:
- ./data/namenode:/hadoop/dfs/name
datanode:
image: apache/hadoop:3
container_name: datanode
command: ["hdfs", "datanode"]
ports:
- 9864:9864
env_file:
- ./hadoop.env
volumes:
- ./data/datanode:/hadoop/dfs/data
resourcemanager:
image: apache/hadoop:3
hostname: resourcemanager
command: ["yarn", "resourcemanager"]
ports:
- 8088:8088
env_file:
- ./hadoop.env
volumes:
- ./test.sh:/opt/test.sh
nodemanager:
image: apache/hadoop:3
command: ["yarn", "nodemanager"]
env_file:
- ./hadoop.env
metastore:
image: apache/hive:4.0.0-alpha-2
depends_on:
- postgres
- namenode
- resourcemanager
- datanode
restart: unless-stopped
container_name: metastore
hostname: metastore
environment:
DB_DRIVER: postgres
SERVICE_NAME: 'metastore'
# HIVE_CUSTOM_CONF_DIR: ./hive_custom_conf
SERVICE_OPTS: '-Xmx1G
-Djavax.jdo.option.ConnectionDriverName=org.postgresql.Driver
-Djavax.jdo.option.ConnectionURL=jdbc:postgresql://postgres:5432/metastore_db
-Djavax.jdo.option.ConnectionUserName=hive
-Djavax.jdo.option.ConnectionPassword=hive'
ports:
- '9083:9083'
volumes:
- warehouse:/opt/hive/data/warehouse
# - /opt/hive/conf:./hive_custom_conf
hiveserver2:
image: apache/hive:4.0.0-alpha-2
depends_on:
- metastore
restart: unless-stopped
container_name: hiveserver2
environment:
HIVE_SERVER2_THRIFT_PORT: 10000
SERVICE_OPTS: '-Xmx1G -Dhive.metastore.uris=thrift://metastore:9083'
IS_RESUME: 'true'
SERVICE_NAME: 'hiveserver2'
ports:
- '10000:10000'
- '10002:10002'
volumes:
- warehouse:/opt/hive/data/warehouse
volumes:
warehouse:
hive-db:
networks:
default:
driver: bridge
将 Apache hive 映像与 Apache hadoop 映像连接。
Hive 的行为就像一个常规的 Hadoop 客户端。您需要将 core-site.xml 和 hdfs-site.xml 文件挂载到其 HADOOP_HOME 环境变量路径上,并将 fs.defaultFS 等值指向 namenode 容器