两种配置大数据环境的方法Ambari以及hadoop源代码安装的步骤

2022-10-31 22:00:28

1.Ambari安装

Ambari & HDP（Hortonworks Data Platform）

*****************************************************************************************************

Base：

0.操作系统原则与对应的HDP对应的版本。rhel6 or rhel7

1.操作系统原则完全安装(Desktop)，所有的包都安装。

2.关闭防火墙，IPV6等服务（海涛Python脚本）。SELinux-->>IPv6-->>Iptables

_____________________________________________________________

SELINUX:

vim /etc/selinux/config

SELINUX=disabled

或者：

sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config;

_____________________________________________________________

IPV6：

chkconfig ip6tables off

cat>>/etc/modprobe.d/ECS.conf<<EOF

alias net-pf-10 off

alias ipv6 off

EOF

cat>>/etc/sysconfig/network<<EOF

NETWORKING_IPV6=off

EOF

cat>>/etc/modprobe.d/disable-ipv6.conf<<EOF

install ipv6 /bin/true

EOF

cat>>/etc/modprobe.d/dist.conf<<EOF

alias net-pf-10 off

alias ipv6 off

EOF

cat>>/etc/sysctl.conf<<EOF

net.ipv6.conf.all.disable_ipv6 = 1

EOF

_____________________________________________________________

iptables:

chkconfig iptables off

_____________________________________________________________

ONBOOT:

sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-eth0

sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-eth1

sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-eth2

sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-eth3

sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-eth4

_____________________________________________________________

Swap Closed

cat >> /etc/sysctl.conf << EOF

vm.swappiness=0

EOF

_____________________________________________________________

Time Zone:

cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

_____________________________________________________________

*****************************************************************************************************

/etc/sysconfig/network

Hostname

*****************************************************************************************************

/etc/hosts:

127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4

::1 localhost

172.31.200.7 data1

172.31.200.8 data2

172.31.200.9 data3

why not?

127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4

::1 localhost localhost.localdomain localhost6 localhost6.localdomain6

172.31.200.7 data1

172.31.200.8 data2

172.31.200.9 data3

*****************************************************************************************************

PackageKit

pkill -9 packagekitd

vim /etc/yum/pluginconf.d/refresh-packagekit.conf

enabled=0

*****************************************************************************************************

THP(Transparent Huge Pages):

echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled

echo never > /sys/kernel/mm/redhat_transparent_hugepage/defrag

*****************************************************************************************************

ulimit & nproc

[root@data2 yum.repos.d]# vim /etc/security/limits.conf

soft nproc 16384

hard nproc 16384

soft nofile 65536

hard nofile 65536

*****************************************************************************************************

REBOOT all the machine

*****************************************************************************************************

REPO for rhel:

first:

[root@server2 opt]# cd /etc/yum.repos.d/

[root@server2 yum.repos.d]# ls -al

drwxr-xr-x. 2 root root 4096 3月 22 04:02 .

drwxr-xr-x. 182 root root 16384 4月 14 22:27 ..

-rw-r--r--. 1 root root 1991 10月 23 2014 CentOS-Base.repo

-rw-r--r--. 1 root root 647 10月 23 2014 CentOS-Debuginfo.repo

-rw-r--r--. 1 root root 289 10月 23 2014 CentOS-fasttrack.repo

-rw-r--r--. 1 root root 630 10月 23 2014 CentOS-Media.repo

-rw-r--r--. 1 root root 5394 10月 23 2014 CentOS-Vault.repo

-rw-r--r--. 1 root root 270 12月 15 14:36 cloudera.repo

-rw-r--r--. 1 root root 134 12月 8 08:31 rhel65.repo

rm -rf ALL

---->>>>>>we don't get internet connection.

second:

[root@data2 yum.repos.d]# cat centos6.6.repo

[centos6]

name=cloudera

baseurl=http://172.31.200.216/centos6

enabled=1

gpgcheck=0

gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release

scp /etc/yum.repos.d/centos6.6.repo root@Hostname:/etc/yum.repos.d/

yum clean all

yum search lib*

*****************************************************************************************************

SSH:

yum install openssl

yum upgrade openssl

rm -rf ~/.ssh/*

ssh-keygen -t rsa -f ~/.ssh/id_rsa -N ''

cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

scp -r ~/.ssh root@172.31.200.8:~/.ssh

chmod 600 ~/.ssh

注意：chmod 777 为什么不行？？？

*****************************************************************************************************

jdk:

rpm -ivh jdk-7XXX-linux-XXXX.rpm

echo "JAVA_HOME=/usr/java/latest/">> /etc/environment

java -version

*****************************************************************************************************

NTP:

ntp-master node

[root@data1 yum.repos.d]# vim /etc/ntp.conf

server data1 prefer

server 127.127.1.0

fudge 127.127.1.0 stratum 10

service ntpd restart

[root@data1 yum.repos.d]# chkconfig --list ntpd

ntp-master node

/var/spool/cron/root<<EOF

*/10 * * * * /usr/sbin/ntpdate NameNode && /sbin/clock -w

EOF

service ntpd restart

ntpdate -u NameNode

*****************************************************************************************************

/var/www/html：

which httpd

or

yum install httpd

tar -xzf HDP-UTILS-1.1.0.20-centos6.tar.gz

tar -xzf AMBARI-2.1.2-377-centos6.tar.gz

tar -xzf HDP-2.3.0.0-centos6-rpm.tar.gz

check whether the listening port of http service is blocked.

---->>>>netstat -nltp | grep 80

---->>>>vim /etc/httpd/conf/httpd.conf

change value of the default port

service httpd start

*****************************************************************************************************

Repo for HDP & Ambari

[root@data2 yum.repos.d]# cat ambari.repo

[Updates-ambari-2.1.2]

name=ambari-2.1.2-Updates

baseurl=http://data1/AMBARI-2.1.2/centos6

gpgcheck=0

enabled=1

[HDP-2.3.0.0]

name=HDP Version-HDP-2.3.0.0

baseurl=http://data1/HDP/centos6/2.x/updates/2.3.0.0

gpgcheck=0

enabled=1

[HDP-UTILS-1.1.0.20]

name=HDP Utils Version - HDP-UTILS-1.1.0.20

baseurl=http://data1/HDP-UTILS-1.1.0.20/repos/centos6

gpgcheck=0

enabled=1

scp /etc/yum.repos.d/ambari.repo root@Hostname:/etc/yum.repos.d/

yum clean all

yum search ambari-agent

yum search Oozie

yum search gangli

*****************************************************************************************************

SO Address:

http://172.31.200.7/HDP/centos6/2.x/updates/2.3.0.0

http://172.31.200.7/HDP-UTILS-1.1.0.20/repos/centos6

*****************************************************************************************************

yum clean all

yum search ambari-server

yum search ambari-agent

yum search oozie

yum remove *****

Master：

yum install ambari-server

yum install ambari-agent

ambari-agent start

conf of ambari server:

/etc/ambari-server/conf/ambari.properties

Slave:

yum install ambari-agent

ambari-agent start

ambari-server start

ambari-server setup -j /usr/java/jdk1.7.0_71/

--->>>>Run the setup command to configure your Ambari Server, Database, JDK, LDAP, and other options:

--->>>>enter numeric number(n means default)

ambari-server start

http://MasterHostName:8080

Account：admin Password:admin

*****************************************************************************************************

Logs to see student:

See the log:

cat /var/log/ambari-agent/ambari-agent.lo

cat /var/log/ambari-server/ambari-server.log

*****************************************************************************************************

To Do:

HDFS:

[root@data1 yum.repos.d]# su hdfs -c "hadoop fs -ls /"

[root@data1 yum.repos.d]# su hdfs -c "hadoop fs -mkdir /lgd"

MR:

Spark:

HBase:

Hive:

ES:

*******************************************************************************************************

FAQ

1, The hostname of the machine better be Fully Qualified Domain Name---->>>>>>>hoastname.domain,such as,data.hdp.worker1

2, Zookeeper-Agent端修改Server指向的HOSTNAME, /etc/ambari-agent/conf/ambari-agent.ini,如修改过主机hostname

安装失败后或重新安装先执行ambari-server reset 后 ambari-setup

3, 最后一步安装可能会失败,多数原因是下载包错误引起的,可重复安装直到成功,本人反复几个最终成功了,网络,网络,尤其就朝民,各种干扰!

4, 如果遇到访问https://xxx:8440/ca的错误，升级openssl就可以。

5，Heartbeat lost for the host错误，检查出错节点的ambari-agent是否停止，ambari-angent是python脚本运行的，

可能遇到没有捕捉到的异常，导致进程crash或者停止了。

6，App Timeline server安装出错，retry解决。

7，如果出现乱码：echo 'LANG="en_US.UTF-8"' > /etc/sysconfig/i18n,修改字符集即可解决！

8, 如果安装linux的时候基础包未选择，缺包可以制作cdrom挂载，来安装即可解决！

9, selinux开启导致本地yum源访问403

10, centosos6.5 openssh 版本bug 导致 agent安装失败,解决 yum upgrade openssl

11,

*******************************************************************************************************

总结：

1，日志查看，追溯问题。

2，如果要安装一切顺利，可在安装操作系统时把linux基础组件一并安装！

补救方案为：

yum groupinstall "Compatibility libraries" "Base" "Development tools"

yum groupinstall "debugging Tools" "Dial-up Networking Support"

3，

*******************************************************************************************************

备注: + Ambari安装的环境路径:

各台机器的安装目录:

/usr/lib/hadoop

/usr/lib/hbase

/usr/lib/zookeeper

/usr/lib/hcatalog

/usr/lib/hive

+ Log路径, 这里需要看出错信息都可以在目录下找到相关的日志

/var/log/hadoop

/var/log/hbase

+ 配置文件的路径

/etc/hadoop

/etc/hbase

/etc/hive

+ HDFS的存储路径

/hadoop/hdfs

*******************************************************************************************************

其他1：

安装过程中使用了桌面，火狐等安装命令

yum install firefox

yum groupinstall -y “Desktop” “Desktop Platform” “Desktop Platform

Development”　 “Fonts” 　“General Purpose Desktop”　 “Graphical

Administration Tools”　 “Graphics Creation Tools” 　“Input Methods” 　“X

Window System” 　“Chinese Support [zh]”　“Internet Browser”

iso yum 源来安装一些基础包

sudo mount -o loop /home/whoami/rhel-server-6.7-x86_64-dvd.iso /mnt/cdimg/

$ cat rhel-source.repo

[rhel-Server]

name=Red Hat Server

baseurl=file:///mnt/cdimg

enable=1

gpgcheck=0

*******************************************************************************************************

其他2：

Ambari配置时在Confirm Hosts的步骤时，中间遇到一个很奇怪的问题：总是报错误：

Ambari agent machine hostname (localhost.localdomain) does not match expected ambari server hostname (xxx).

后来修改的/etc/hosts文件中

修改前：

127.0.0.1 xxx localhost localhost.localdomain localhost4 localhost4.localdomain4

::1 xxx localhost localhost.localdomain localhost6 localhost6.localdomain6

修改后：

127.0.0.1 xxx localhost localhost.localdomain localhost4 localhost4.localdomain4

::1 xxx

感觉应该是走的ipv6协议，很奇怪，不过修改后就可以了。

2.hadoop源代码配置

首先配置hosts文件关联主机名和ip地址

host1=

host2=

host3=

=== security shell

rm -rf ~/.ssh/*

ssh-keygen -t rsa -f ~/.ssh/id_rsa -N ''

ssh-copy-id -o StrictHostKeyChecking=no $remothostname

ssh $remothostname hostname

######################## Hadoop cluster deploy

1. tar -xzf hadoop-2.7.1.tar.gz

2. add profile

Shell> cat << EOF >/etc/profile.d/hadoop.sh

#!/bin/sh

export JAVA_HOME=/root/BIGDATA/jdk1.8.0_65

export HADOOP_PREFIX=/root/BIGDATA/hadoop-2.7.1

export HADOOP_HOME=\$HADOOP_PREFIX

export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop

export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop

export JAVA_LIBRARY_PATH=\$HADOOP_HOME/lib/native:\$JAVA_LIBRARY_PATH

export LD_LIBRARY_PATH=\$HADOOP_HOME/lib/native:\$LD_LIBRARY_PATH

export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:\$JAVA_HOME/lib/tools.jar:

export PATH=\$JAVA_HOME/bin:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\${PATH}

EOF

Shell> source /etc/profile

3. create hdfs dirs on all hosts

HADOOP_LOCAL_BASE_DIR=/opt/local/hdfs

mkdir -p ${HADOOP_LOCAL_BASE_DIR}

mkdir -p ${HADOOP_LOCAL_BASE_DIR}/dfs/data

mkdir -p ${HADOOP_LOCAL_BASE_DIR}/dfs/name

mkdir -p ${HADOOP_LOCAL_BASE_DIR}/dfs/snn

mkdir -p ${HADOOP_LOCAL_BASE_DIR}/tmp

mkdir -p ${HADOOP_LOCAL_BASE_DIR}/yarn/logs

4. config etc/hadoop/

1. add all slaves to slaves

bigdata1

bigdata3

2.

HADOOP_DFS_MASTER=masternode

HADOOP_DFS_SECONDARY_NAMENODE=masternode

YARN_RESOURCE_MANAGER=masternode

JOBHISTORY_SERVER=masternode

JOBTRACKRT_HOST=masternode

HADOOP_TOOL_INSTALL_DIR=/root/BIGDATA/DOCS/hadoop_doc/hadoop_demo

#core-site.xml

conf_file=core-site.xml

cp -raf ${HADOOP_TOOL_INSTALL_DIR}/${conf_file} ${HADOOP_PREFIX}/etc/hadoop/

sed -i "s^\${HADOOP_LOCAL_BASE_DIR}^${HADOOP_LOCAL_BASE_DIR}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

sed -i "s^\${HADOOP_DFS_MASTER}^${HADOOP_DFS_MASTER}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

#hdfs-site.xml

conf_file=hdfs-site.xml

cp -raf ${HADOOP_TOOL_INSTALL_DIR}/${conf_file} ${HADOOP_PREFIX}/etc/hadoop/

sed -i "s^\${HADOOP_LOCAL_BASE_DIR}^${HADOOP_LOCAL_BASE_DIR}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

sed -i "s^\${HADOOP_DFS_SECONDARY_NAMENODE}^${HADOOP_DFS_SECONDARY_NAMENODE}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

sed -i "s^\${HADOOP_DFS_MASTER}^${HADOOP_DFS_MASTER}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

#mapreducesite.xml

conf_file=mapred-site.xml

cp -raf ${HADOOP_TOOL_INSTALL_DIR}/${conf_file} ${HADOOP_PREFIX}/etc/hadoop/

sed -i "s^\${JOBTRACKRT_HOST}^${JOBTRACKRT_HOST}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

sed -i "s^\${JOBHISTORY_SERVER}^${JOBHISTORY_SERVER}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

#yarn-site.xml

conf_file=yarn-site.xml

cp -raf ${HADOOP_TOOL_INSTALL_DIR}/${conf_file} ${HADOOP_PREFIX}/etc/hadoop/

sed -i "s^\${YARN_RESOURCE_MANAGER}^${YARN_RESOURCE_MANAGER}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

sed -i "s^\${HADOOP_LOCAL_BASE_DIR}^${HADOOP_LOCAL_BASE_DIR}^g" "${HADOOP_PREFIX}/etc/hadoop/${conf_file}"

5. init namenode

Shell>hdfs namenode -format cluster1

6. start all

Shell>$HADOOP_HOME/sbin/start-all.sh

Shell> $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver

===Hadoop check

1. After deploy hadoop.

Shell>hadoop checknative -a

Shell>hadoop jar ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar pi 4 100

Shell> cat <<EOF >/tmp/file1

Hello World Bye World

EOF

Shell> cat <<EOF >/tmp/file2

Hello Hadoop Goodbye Hadoop

EOF

Shell> hadoop fs -mkdir /tmp

Shell> hadoop fs -copyFromLocal -f /tmp/file1 /tmp/file2 /tmp

Shell> hadoop jar ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /tmp/file1 /tmp/file2 /tmp/wordcount

Shell> hadoop fs -cat /tmp/wordcount/part-r-00000

===hadoop Daemon Web Interface

NameNode http://nn_host:port/
Default HTTP port is 50070.

ResourceManager http://rm_host:port/
Default HTTP port is 8088.

#MapReduce JobHistory Server http://jhs_host:port/
Default HTTP port is 19888.

######################## Spark cluster deploy

1. tar -xzf spark-1.6.1-bin-hadoop2.6.tgz

2. add profile

cat << EOF >>/etc/profile.d/hadoop.sh

export SPARK_HOME=/root/BIGDATA/spark-1.6.1-bin-hadoop2.6

export PATH=\${SPARK_HOME}/sbin:\${PATH}:\${SPARK_HOME}/bin:

EOF

Shell>source /etc/profile

3. create local dir

SPARK_LOCAL_BASE_DIR=/opt/local/spark

Shell>mkdir -p ${SPARK_LOCAL_BASE_DIR}/tmp

Shell>hadoop fs -mkdir /sparkHistoryLogs /sparkEventLogs

4. config

1. add all slaves to slaves

Shell>mv slaves.template slaves

bigdata1

bigdata3

2.

SPARK_MASTER=masternode

HADOOP_DFS_MASTER=masternode

Shell> cat << EOF > ${SPARK_HOME}/conf/spark-defaults.conf

spark.master spark://${SPARK_MASTER}:7077

spark.local.dir ${SPARK_LOCAL_BASE_DIR}/tmp

spark.master.rest.port 7177

#Spark UI

spark.eventLog.enabled true

spark.eventLog.dir hdfs://${HADOOP_DFS_MASTER}:9000/sparkEventLogs

spark.ui.killEnabled true

spark.ui.port 4040

spark.history.ui.port 18080

spark.history.fs.logDirectory hdfs://${HADOOP_DFS_MASTER}:9000/sparkHistoryLogs

#

spark.shuffle.service.enabled false

#

spark.yarn.am.extraJavaOptions -Xmx3g

spark.executor.extrajavaoptions -Xmx3g

#Amount of memory to use for the YARN Application Master in client mode

spark.yarn.am.memory 2048m

#The amount of off-heap memory (in megabytes) to be allocated per executor.

spark.yarn.driver.memoryOverhead 512

#The amount of off-heap memory (in megabytes) to be allocated per driver in cluster mode

spark.yarn.executor.memoryOverhead 512

#Same as spark.yarn.driver.memoryOverhead, but for the YARN Application Master in client mode, fix yarn-client OOM, "ERROR yarn.ApplicationMaster: RECEIVED SIGNAL 15: SIGTERM"

spark.yarn.am.memoryOverhead 1024



EOF

Shell> cat << EOF > ${SPARK_HOME}/conf/spark-env.sh

SPARK_WORKER_WEBUI_PORT=8081

SPARK_WORKER_DIR=\${SPARK_HOME}/work

#SPARK_LOCAL_DIRS=\${SPARK_WORKER_DIR}

EOF

5. start all

Shell> ${SPARK_HOME}/sbin/start-all.sh

check cluster status

http://${SPARK_MASTER}:8080

===Spark Daemon Web Interface

spark.history.ui.port 18080

spark master 8080

http://${SPARK_MASTER}:port/

===Spark check

1. Spark Standalone (client, cluster(spark.master.rest.port))

# Run application locally on 1 cores

Shell> ${SPARK_HOME}/bin/spark-submit \

--class org.apache.spark.examples.SparkPi \

--master spark://masternode:7077 \

--deploy-mode client \

${SPARK_HOME}/lib/spark-examples*.jar \

10

# Run on a Spark standalone cluster

Shell> ${SPARK_HOME}/bin/spark-submit \

--class org.apache.spark.examples.SparkPi \

--master spark://$SPARK_MASTER:7177 \

--deploy-mode cluster \

--executor-memory 1G \

--total-executor-cores 1 \

${SPARK_HOME}/lib/spark-examples*.jar \

10



#spark shell

Shell> ${SPARK_HOME}/bin/spark-shell --master spark://$SPARK_MASTER:7077

2. Spark on Yarn (It needn't start spark cluster, only need start hadoop)

#run yarn-client

Shell> ${SPARK_HOME}/bin/spark-submit --class org.apache.spark.examples.SparkPi \

--master yarn-client \

--driver-java-options '-Xmx3096m' \

--conf spark.executor.extrajavaoptions=-Xmx3096m \

--executor-memory 3096m \

--num-executors 1 \

--conf spark.yarn.am.memoryOverhead=1024 \

${SPARK_HOME}/lib/spark-examples*.jar \

10



#run yarn-cluster

Shell> ${SPARK_HOME}/bin/spark-submit --class org.apache.spark.examples.SparkPi \

--master yarn \

--deploy-mode cluster \

--driver-memory 2g \

--executor-memory 2g \

${SPARK_HOME}/lib/spark-examples*.jar \

10

######################## Hbase cluster deploy

1. Shell> tar -xzf hbase-1.1.4-bin.tar.gz

2. add profile

cat << EOF >>/etc/profile.d/hadoop.sh

export HBASE_HOME=/root/BIGDATA/hbase-1.1.4

export PATH=\${PATH}:\${HBASE_HOME}/bin:

EOF

Shell>source /etc/profile

3. create local dir

HBASE_ROOTDIR=/hbase

HBASE_TMP_DIR=/opt/local/hbase

Shell> hadoop fs -mkdir ${HBASE_ROOTDIR}

Shell> mkdir -p ${HBASE_TMP_DIR}

4. config

1. add all hosts to regionservers

bigdata1

bigdata2

2. modify hbase-site.xml

cat <<EOF >${HBASE_HOME}/conf/hbase-site.xml

<configuration>

<property>

<name>hbase.rootdir</name>

<value>hdfs://masternode:9000/hbase </value>

<description>The directory shared by RegionServers.

Default: \${hbase.tmp.dir}/hbase

</description>

</property>

<property>

<name>hbase.zookeeper.quorum</name>

<value>masternode,slavesnode</value>

<description>The directory shared by RegionServers.

</description>

</property>

<property>

<name>hbase.tmp.dir</name>

<value>/opt/local/hbase</value>

<description>Temporary directory on the local filesystem

Default: \${java.io.tmpdir}/hbase-${user.name}.

</description>

</property>

<property>

<name>hbase.cluster.distributed</name>

<value>true</value>

<description>The mode the cluster will be in. Possible values are

false: standalone and pseudo-distributed setups with managed Zookeeper

true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)

</description>

</property>





</configuration>

EOF

3. ln -s $HADOOP_HOME/etc/hadoop/hdfs-site.xml ${HBASE_HOME}/conf/hdfs-site.xml

4. ulimit 咿nproc

cat <<EOF > /etc/security/limits.conf

root - nofile 32768

root soft/hard nproc 32000

EOF

5. start all

Shell> ${HBASE_HOME}/bin/start-hbase.sh

===Hbase Daemon Web Interface

hbase.master.info.port 16010

hbase.regionserver.info.port 16030

http://${HBASE_MASTER}:port/

===Hbase check

1. run hbase shell

Shell> ${HBASE_HOME}/bin/hbase shell

hbase(main):003:0> create 'test', 'cf'

0 row(s) in 1.2200 seconds

hbase(main):003:0> list 'table'

test

1 row(s) in 0.0550 seconds

hbase(main):004:0> put 'test', 'row1', 'cf:a', 'value1'

0 row(s) in 0.0560 seconds

hbase(main):005:0> put 'test', 'row2', 'cf:b', 'value2'

0 row(s) in 0.0370 seconds

hbase(main):006:0> put 'test', 'row3', 'cf:c', 'value3'

0 row(s) in 0.0450 seconds

hbase(main):007:0> scan 'test'

ROW COLUMN+CELL

row1 column=cf:a, timestamp=1288380727188, value=value1

row2 column=cf:b, timestamp=1288380738440, value=value2

row3 column=cf:c, timestamp=1288380747365, value=value3

3 row(s) in 0.0590 seconds

hbase(main):008:0> get 'test', 'row1'

COLUMN CELL

cf:a timestamp=1288380727188, value=value1

1 row(s) in 0.0400 seconds

hbase(main):012:0> disable 'test'

0 row(s) in 1.0930 seconds

hbase(main):013:0> drop 'test'

0 row(s) in 0.0770 seconds

hbase(main):014:0> exit

######################## Hive cluster deploy

1. tar -xzf apache-hive-2.0.0-bin.tar.gz

2. add profile

cat << EOF >>/etc/profile.d/hadoop.sh

export HIVE_HOME=/root/BIGDATA/apache-hive-2.0.0-bin

export HIVE_CONF_DIR=\${HIVE_HOME}/conf

export PATH=\${HIVE_HOME}/bin:\${PATH}

EOF

Shell>source /etc/profile

3. create local dir

$HADOOP_HOME/bin/hadoop fs -mkdir /tmp

$HADOOP_HOME/bin/hadoop fs -mkdir -p /user/hive/warehouse

$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp

$HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse

Shell> mkdir -p ${HBASE_TMP_DIR}

4. config

=M1. [ Local Embedded Derby ]

HIVE_LOCAL_WAREHOUSE=/opt/hive/warehouse

Shell> mkdir -p ${HIVE_LOCAL_WAREHOUSE}

Shell>cat <<EOF > ${HIVE_CONF_DIR}/hive-site.xml

<configuration>

<property>

<name>javax.jdo.option.ConnectionURL</name>

<value>jdbc:derby:;databaseName=metastore_db;create=true</value>

<description>JDBC connect string for a JDBC metastore</description>

</property>

<property>

<name>javax.jdo.option.ConnectionDriverName</name>

<value>org.apache.derby.jdbc.EmbeddedDriver</value>

<description>Driver class name for a JDBC metastore</description>

</property>

<property>

<name>javax.jdo.option.ConnectionUserName</name>

<value>APP</value>

<description>username to use against metastore database</description>

</property>

<property>

<name>javax.jdo.option.ConnectionPassword</name>

<value>mine</value>

<description>password to use against metastore database</description>

</property>

<property>

<name>hive.metastore.warehouse.dir</name>

<value>${HIVE_LOCAL_WAREHOUSE}</value>

<description>unit test data goes in here on your local filesystem</description>

</property>

</configuration>

EOF

Shell> $HIVE_HOME/bin/schematool -initSchema -dbType derby

Shell> $HIVE_HOME/bin/schematool -dbType derby-info

Shell> $HIVE_HOME/bin/hive

=M2. [Remote Metastore Server Derby]

Shell> tar -xzf db-derby-10.12.1.1-bin.tar.gz

Shell> cd db-derby-10.12.1.1-bin

Shell> mkdir data

Shell> cd data

Shell> ../bin/startNetworkServer -h 172.31.200.110 -p 1527 &

Shell> cp -raf ../lib/derbyclient.jar ../lib/derbytools.jar $HIVE_HOME/lib/

DERBY_SERVER_HOST=masternode

Shell>cat <<EOF > ${HIVE_CONF_DIR}/hive-site.xml

<configuration>

<property>

<name>javax.jdo.option.ConnectionURL</name>

<value>jdbc:derby://${DERBY_SERVER_HOST}:1527/hive_meta;create=true</value>

<description>JDBC connect string for a JDBC metastore</description>

</property>

<property>

<name>javax.jdo.option.ConnectionDriverName</name>

<value>org.apache.derby.jdbc.ClientDriver</value>

<description>Driver class name for a JDBC metastore</description>

</property>

<property>

<name>datanucleus.schema.autoCreateAll</name>

<value>true</value>

<description>creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once</description>

</property>

<property>

<name>javax.jdo.option.ConnectionUserName</name>

<value>app</value>

<description>username to use against metastore database</description>

</property>

<property>

<name>javax.jdo.option.ConnectionPassword</name>

<value>app</value>

<description>password to use against metastore database</description>

</property>

<property>

<name>hive.metastore.warehouse.dir</name>



<value>/user/hive/warehouse</value>

<description>base hdfs path :location of default database for the warehouse</description>

</property>







<property>

<name>hive.metastore.uris</name>

<value>thrift://masternode:9083</value>

</property>

</configuration>

EOF

#start metastore service

$HIVE_HOME/bin/hive --service metastore &

#star thiveserver service

$HIVE_HOME/bin/hiveserver2 &

5. start

$HIVE_HOME/bin/hive

hive> CREATE TABLE pokes (foo INT, bar STRING);

hive> CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING);

hive> SHOW TABLES;

hive> SHOW TABLES '.*s';

hive> DESCRIBE invites;

hive> LOAD DATA LOCAL INPATH '/root/BIGDATA/apache-hive-2.0.0-bin/examples/files/kv1.txt' OVERWRITE INTO TABLE pokes;

======#

#Remote Metastore Server

$HIVE_HOME/bin/hive --service metastore -p 9083

#Running HiveServer2 and Beeline

$HIVE_HOME/bin/hiveserver2

$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000

#Running HCatalog

$HIVE_HOME/hcatalog/sbin/hcat_server.sh

$HIVE_HOME/hcatalog/bin/hcat

#Running WebHCat

$HIVE_HOME/hcatalog/sbin/webhcat_server.sh

####### pig

2. add profile

cat << EOF >>/etc/profile.d/hadoop.sh

export PIG_HOME=/BIGDATA/pig-0.15.0

export PATH=\${PIG_HOME}/bin:\${PATH}

EOF

Shell>source /etc/profile

码农公寓

1.Ambari安装

2.hadoop源代码配置

相关文章