hadoop-2.7.3+hbase-1.4.13+hive-2.3.4+zookeeper-3.4.14 hadoop cluster deployment

发布于 2020-05-13  2.21k 次阅读



#!/bin/bash
#
# hadoop cluster deploy

# cluster information:
# set hostname
hostnamectl set-hostname hadoop-master
hostnamectl set-hostname hadoop-slave1
hostnamectl set-hostname hadoop-slave2

vi /etc/hosts
  172.26.11.14 hadoop-master
  172.26.11.17 hadoop-slave1
  172.26.11.16 hadoop-slave2

# use static ip but not via dhcp
[hadoop-master]
nmcli connection modify ens33 ipv4.addresses 172.26.11.14/24 ipv4.gateway 172.26.11.254   ipv4.method manual autoconnect yes
[hadoop-slave1]
nmcli connection modify ens33 ipv4.addresses 172.26.11.17/24 ipv4.gateway 172.26.11.254   ipv4.method manual autoconnect yes
[hadoop-slave2]
nmcli connection modify ens33 ipv4.addresses 172.26.11.16/24 ipv4.gateway 172.26.11.254   ipv4.method manual autoconnect yes

# install jdk [all]
yum install -y java-1.8.0-openjdk

# ssh key configuration [all]
ssh-keygen -t rsa -P "" -f /root/.ssh/id_rsa
cat /root/.ssh/id_rsa.pub  >> /root/.ssh/authorized_keys
[m]
ssh-copy-id -i hadoop-slave1
ssh-copy-id -i hadoop-slave2
ssh hadoop-slave1
ssh hadoop-slave2
[s1]
ssh-copy-id -i hadoop-master
ssh hadoop-master
[s2]
ssh-copy-id -i hadoop-master
ssh hadoop-master

# stop firewall [all]
systemctl stop firewalld && systemctl disable firewalld
# disable selinux [all]
sed -i "s#SELINUX=enforcing#SELINUX=disabled#" /etc/selinux/config
setenforce 0



# install hadoop start
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
tar -xvf hadoop-2.7.3.tar.gz -C /usr/local/
mv /usr/local/hadoop-2.7.3 /usr/local/hadoop

vi /etc/profile # [JAVA_HOME maybe different] [all nodes]
  export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64
  export PATH=$JAVA_HOME/bin:$PATH
  export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
  export HADOOP_HOME=/usr/local/hadoop
  export PATH=$PATH:$HADOOP_HOME/bin
source /etc/profile

vi /usr/local/hadoop/etc/hadoop/core-site.xml
  <configuration>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/usr/local/hadoop/tmp</value>
        <description>Abase for other temporary directories.</description>
    </property>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://hadoop-master:9000</value>
    </property>
  </configuration>

vi /usr/local/hadoop/etc/hadoop/hdfs-site.xml
  <configuration>
  <property>
          <name>dfs.replication</name>
          <value>3</value>
      </property>
      <property>
          <name>dfs.name.dir</name>
          <value>/usr/local/hadoop/hdfs/name</value>
      </property>
      <property>
          <name>dfs.data.dir</name>
          <value>/usr/local/hadoop/hdfs/data</value>
      </property>
  </configuration>

cp /usr/local/hadoop/etc/hadoop/mapred-site.xml.template /usr/local/hadoop/etc/hadoop/mapred-site.xml
vi /usr/local/hadoop/etc/hadoop/mapred-site.xml
  <configuration>
  <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
     <property>
        <name>mapred.job.tracker</name>
        <value>http://hadoop-master:9001</value>
    </property>
  </configuration>

vi /usr/local/hadoop/etc/hadoop/yarn-site.xml
  <configuration>

  <!-- Site specific YARN configuration properties -->

   <property>
          <name>yarn.nodemanager.aux-services</name>
          <value>mapreduce_shuffle</value>
      </property>
      <property>
          <name>yarn.resourcemanager.hostname</name>
          <value>hadoop-master</value>
      </property>
  </configuration>

vi /usr/local/hadoop/etc/hadoop/masters
  hadoop-master

[hadoop-master ONLY]
vi  /usr/local/hadoop/etc/hadoop/slave
  hadoop-slave1
  hadoop-slave2
  hadoop-slave3

# distribute
[m]
scp -r /usr/local/hadoop  hadoop-slave1:/usr/local/
ssh hadoop-slave1 'rm -rf /usr/local/hadoop/etc/hadoop/slave'

scp -r /usr/local/hadoop  hadoop-slave2:/usr/local/
ssh hadoop-slave2 'rm -rf /usr/local/hadoop/etc/hadoop/slave'

# start hadoop
[m]
/usr/local/hadoop/bin/hadoop namenode -format
/usr/local/hadoop/sbin/start-dfs.sh
/usr/local/hadoop/sbin/start-yarn.sh
/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver
jps
  70803 Jps
  53097 SecondaryNameNode
  53738 JobHistoryServer
  52795 NameNode
  52925 DataNode
  53549 NodeManager

hadoop dfsadmin -report
  Configured Capacity: 117710168064 (109.63 GB)
  Present Capacity: 100732221612 (93.81 GB)
  DFS Remaining: 100083512292 (93.21 GB)
  DFS Used: 648709320 (618.66 MB)
  DFS Used%: 0.64%
  Under replicated blocks: 0
  Blocks with corrupt replicas: 0
  Missing blocks: 0
  Missing blocks (with replication factor 1): 0
  ......

# http://172.26.11.14:50070
# http://172.26.11.14:8088
# install hadoop end


# install hive-2.3.4 start

wget http://archive.apache.org/dist/hive/hive-2.3.4/apache-hive-2.3.4-bin.tar.gz
tar -xvf apache-hive-2.3.4-bin.tar.gz -C /usr/local

# JDBC
wget https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-java-5.1.47.tar.gz
tar -xvf mysql-connector-java-5.1.47.tar.gz
cp mysql-connector-java-5.1.47/mysql-connector-java-5.1.47-bin.jar /usr/local/apache-hive-2.3.4-bin/lib/mysql-connector-java.jar

vi /etc/profile # [all]
  # hive
  export HIVE_HOME=/usr/local/apache-hive-2.3.4-bin
  export HIVE_CONF_DIR=$HIVE_HOME/conf
  export PATH=$HIVE_HOME/bin:$PATH

cd /usr/local/apache-hive-2.3.4-bin/conf
cp beeline-log4j2.properties.template beeline-log4j2.properties
cp hive-env.sh.template hive-env.sh
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
cp hive-log4j2.properties.template hive-log4j2.properties
cp llap-cli-log4j2.properties.template llap-cli-log4j2.properties
cp llap-daemon-log4j2.properties.template llap-daemon-log4j2.properties
cp hive-default.xml.template hive-site.xml

vi /usr/local/apache-hive-2.3.4-bin/conf/hive-site.xml # modify some configuration items below
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://127.0.0.1:3306/hive?characterEncoding=UTF8&useSSL=false&createDatabaseIfNotExist=true</value>

    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>

    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>

    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>

vi /usr/local/apache-hive-2.3.4-bin/conf/hive-env.sh
  export HIVE_HOME=/usr/local/apache-hive-2.3.4-bin
  export HADOOP_HEAPSIZE=1024
  export HADOOP_HOME=/usr/local/hadoop
  export HIVE_CONF_DIR=/usr/local/apache-hive-2.3.4-bin/conf/
  export HIVE_AUX_JARS_PATH=/usr/local/apache-hive-2.3.4-bin/lib/
  export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HIVE_HOME/lib/*

# install mysql
yum install -y https://dev.mysql.com/get/mysql57-community-release-el7-9.noarch.rpm
yum install -y mysql-community-server
systemctl start mysqld && systemctl enable mysqld
    # get MySql root's password from log
    grep password /var/log/mysqld.log |awk '{print$NF}'
    # initialize MySql
    mysql_secure_installation
    # highly recommended using mycli, install it with:
    yum install -y python3 &&  python3 -m pip install  mycli

mysql -uroot -p"YOUR_PASSWORD"
  mysql>    set global validate_password_policy=LOW
            set global validate_password_length=3
            set global validate_password_mixed_case_count=0
            set global validate_password_number_count=0
            set global validate_password_special_char_count=0

            create database hive DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
            grant all privileges on *.* to 'hive'@'%' identified by 'hive' with grant option;
            flush privileges;
            exit

/usr/local/apache-hive-2.3.4-bin/bin/schematool -initSchema -dbType mysql
  ...........
  Initialization script completed
  schemaTool completed

hdfs dfs -mkdir -p /tmp
hdfs dfs -mkdir -p /usr/hive/warehouse
hdfs dfs -chmod g+w /tmp
hdfs dfs -chmod g+w /usr/hive/warehouse

hive
  hive > show databases;

# install hive-2.3.4 end


# install zookeeper start [all]

wget https://downloads.apache.org/zookeeper/zookeeper-3.4.14/zookeeper-3.4.14.tar.gz
tar -xvf zookeeper-3.4.14.tar.gz  -C /usr/local/

vi /etc/profile
  # zookeeper
  export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.14
  export PATH=$PATH:$ZOOKEEPER_HOME/bin
source /etc/profile

cp  /usr/local/zookeeper-3.4.14/conf/zoo_sample.cfg   /usr/local/zookeeper-3.4.14/conf/zoo.cfg
vi /usr/local/zookeeper-3.4.14/conf/zoo.cfg
  tickTime=2000
  initLimit=10
  syncLimit=5
  dataDir=/usr/local/zookeeper-3.4.14/data
  clientPort=2181
  server.1=hadoop-master:2888:3888
  server.2=hadoop-slave1:2888:3888
  server.3=hadoop-slave2:2888:3888

echo 1 > /usr/local/zookeeper-3.4.14/data/myid
# distribute
scp -r /usr/local/zookeeper-3.4.14/ hadoop-slave1:/usr/local/
scp -r /usr/local/zookeeper-3.4.14/ hadoop-slave2:/usr/local/
ssh hadoop-slave1  'echo 2 >/usr/local/zookeeper-3.4.14/data/myid'
ssh hadoop-slave2  'echo 3 >/usr/local/zookeeper-3.4.14/data/myid'

# start zookeeper [every node should start zk manually]
/usr/local/zookeeper-3.4.14/bin/zkServer.sh start
/usr/local/zookeeper-3.4.14/bin/zkServer.sh status

ssh hadoop-slave1 '/usr/local/zookeeper-3.4.14/bin/zkServer.sh start'
ssh hadoop-slave1 '/usr/local/zookeeper-3.4.14/bin/zkServer.sh status'
ssh hadoop-slave2 '/usr/local/zookeeper-3.4.14/bin/zkServer.sh start'
ssh hadoop-slave2 '/usr/local/zookeeper-3.4.14/bin/zkServer.sh status'

/usr/local/zookeeper-3.4.14/bin/zkCli.sh

# install zookeeper end


# install hbase-1.4.13 start

# time synchronization [all]
yum install -y ntp
ntpdate ntp1.aliyun.com
echo "ntpdate ntp1.aliyun.com" >> /etc/rc.local

wget http://mirrors.hust.edu.cn/apache/hbase/1.4.13/hbase-1.4.13-bin.tar.gz
tar -xvf hbase-1.4.13-bin.tar.gz -C /usr/local

vi /etc/profile
  # hbase
  export HBASE_HOME=/usr/local/hbase-1.4.13
  export PATH=$PATH:$HBASE_HOME/bin
source /etc/profile

vi /usr/local/hbase-1.4.13/conf/hbase-env.sh
  # JAVA_HOME maybe different, even in ur different nodes if u installed jdk with yum
  export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64
  export HBASE_MANAGES_ZK=false

vi /usr/local/hbase-1.4.13/conf/hbase-site.xml
  <configuration>
      <property>
          <name>hbase.cluster.distributed</name>
          <value>true</value>
      </property>
      <property>
          <name>hbase.zookeeper.property.dataDir</name>
          <value>/usr/local/zookeeper-3.4.14/data</value>
      </property>
      <property>
          <name>hbase.rootdir</name>
          <value>hdfs://hadoop-master:9000/hbase</value>
      </property>
      <property>
          <name>hbase.zookeeper.property.clientPort</name>
          <value>2181</value>
      </property>
      <property>
          <name>hbase.zookeeper.quorum</name>
          <value>hadoop-master:2181,hadoop-slave1:2181,hadoop-slave2:2181</value>
      </property>
  </configuration>

vi /usr/local/hbase-1.4.13/conf/regionservers
  hadoop-master
  hadoop-slave1
  hadoop-slave2

vi /usr/local/hbase-1.4.13/conf/backup-masters
  hadoop-slave1

cp /usr/local/hadoop/etc/hadoop/core-site.xml  /usr/local/hbase-1.4.13/conf/
cp /usr/local/hadoop/etc/hadoop/hdfs-site.xml  /usr/local/hbase-1.4.13/conf/

# distribute
rm -rf /usr/local/hbase-1.4.13/docs/*
scp -r /usr/local/hbase-1.4.13/ hadoop-slave1:/usr/local/
scp -r /usr/local/hbase-1.4.13/ hadoop-slave2:/usr/local/

# start hbase [m]
/usr/local/hbase-1.4.13/bin/start-hbase.sh
# http://[IP]:16010

# install hbase-1.4.13 end



# start hadoop cluster

# stop all services if they were started before
[m]
/usr/local/hadoop/sbin/stop-dfs.sh
/usr/local/hadoop/sbin/stop-yarn.sh
/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh stop historyserver
/usr/local/zookeeper-3.4.14/bin/zkServer.sh stop
/usr/local/hbase-1.4.13/bin/stop-hbase.sh

[s1, s2]
/usr/local/zookeeper-3.4.14/bin/zkServer.sh stop

# start cluster IN THIS ORDER STRICTLY
[zookeeper ALL nodes]
/usr/local/zookeeper-3.4.14/bin/zkServer.sh start

[hadoop master node]
/usr/local/hadoop/sbin/start-dfs.sh
/usr/local/hadoop/sbin/start-yarn.sh
/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver

[hbase master node]
/usr/local/hbase-1.4.13/bin/start-hbase.sh

# use jps to check all serices' status
/usr/bin/jps
  52578 QuorumPeerMain
  53266 ResourceManager
  54162 HRegionServer
  53097 SecondaryNameNode
  53738 JobHistoryServer
  52795 NameNode
  52925 DataNode
  53549 NodeManager
  55469 RunJar
  54015 HMaster
  75102 Jps


# done