Hadoop和Yarn的配置文件

  集群参数配置原则重写配置、默认覆盖,否则默认生效。下面总结Haoop常用配置文件参数。常用配置文件:core-site.xml、hdfs-site.xml、mapred-site.xml、yarn-site.xml,配置于Hadoop和Yarn这两个实例中,Hadoop和Yarn两个组件一个是负责存储一个是资源管理框架,相当于计算和存储,有的公司计算节点和存储节点分离,有的没有,按照需求使用。
  
  1. core-site.xml 是 NameNode 的核心配置文件,主要对 NameNode 的属性进行设置,也仅仅在 NameNode 节点生效。
   

<configuration>
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://HadoopHhy</value>
</property>
<property>
    <name>ha.zookeeper.quorum</name>
    <value>zk1:2015,zk2:2015,zk3:2015</value>
</property>
<property>
    <name>hadoop.tmp.dir</name>
    <value>/home/bigdata/hadoop/tmp</value>
    <final>true</final>
</property>

<property>
    <name>io.file.buffer.size</name>
    <value>131072</value>
    <final>true</final>
</property>
<property>
    <name>fs.trash.interval</name>
    <value>1440</value>
</property>

<property>
        <name>hadoop.security.authorization</name>
        <value>true</value>
</property>

</configuration>

   
 参数配置和解释:

Hadoop和Yarn的配置文件

2 . hdfs-site.xml 文件

该文件是 HDFS 的核心配置文件,主要配置 NameNode、DataNode 的一些基于 HDFS 的属性信息、在 NameNode 和 DataNode 节点生效。

<configuration>
<property>
    <name>dfs.nameservices</name>
    <value>test</value>
</property>

<property>
    <name>dfs.ha.namenodes.test</name>
    <value>nn1,nn2</value>
</property>

<property>
    <name>dfs.namenode.rpc-address.test.nn1</name>
    <value>host1:9000</value>
</property>

<property>
    <name>dfs.namenode.rpc-address.test.nn2</name>
    <value>host2:9000</value>
</property>
<property>
    <name>dfs.namenode.http-address.test.nn1</name>
    <value>host1:50070</value>
</property>
<property>
    <name>dfs.namenode.http-address.test.nn2</name>
    <value>host2:50070</value>
</property>
<property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://host1:8485;host2:8485;host3:8485/test</value>
</property>
<property>
    <name>dfs.client.failover.proxy.provider.test</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

<property>
    <name>dfs.ha.fencing.methods</name>
    <value>shell(/bin/true)</value>
</property>
<property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/data/journal</value>
</property>
<property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
</property>
<property>
    <name>dfs.block.size</name>
    <value>134217728</value>
    <final>true</final>
</property>

<property>
    <name>dfs.replication</name>
    <value>3</value>
</property>
<property>
    <name>dfs.name.dir</name>
    <value>/data/namenode</value>
</property>
<property>
    <name>dfs.data.dir</name>
    <value>/data/datanode</value>
    <final>true</final>
</property>

<property>
    <name>dfs.permissions.enabled</name>
    <value>true</value>
</property>

<property>
    <name>dfs.namenode.acls.enabled</name>
    <value>true</value>
</property>
<property>
        <name>dfs.image.transfer.bandwidthPerSec</name>
        <value>314572800</value>
</property>
<property>
        <name>dfs.image.transfer.timeout</name>
        <value>120000</value>
</property>
<property>
        <name>dfs.namenode.checkpoint.txns</name>
        <value>5000000</value>
</property>

        <name>dfs.namenode.edits.dir</name>
        <value>/data/editlog</value>
</property>

<property>
        <name>dfs.hosts.exclude</name>
        <value>/etc/hadoop/hosts-exclude</value>
</property>
<property>
        <name>dfs.datanode.balance.bandwidthPerSec</name>
        <value>20971520</value>
</property>
<property>
        <name>dfs.namenode.accesstime.precision</name>
        <value>0</value>
</property>
<property>
        <name>dfs.namenode.decommission.interval</name>
        <value>30</value>
</property>
</configuration>

 参数配置和解释:

Hadoop和Yarn的配置文件


3 yarn-site.xml 文件

该文件是 Yarn 资源管理框架的核心配置文件,所有对 Yarn 的配置都在此文件中设置。


 参数配置和解释:

<configuration>

    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>

    <property>
            <name>yarn.resourcemanager.cluster-id</name>
        <value>xxx</value>
        </property>

    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>

    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>host1</value>
    </property>

    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>host2</value>
    </property>

    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>host1:2015,host2:2015,host3:2015</value>
    </property>

    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle,spark_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
        <property>
             <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
             <value>org.apache.spark.network.yarn.YarnShuffleService</value>
        </property>

    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>/data/yarn</value>
    </property>

    <property>
        <name>yarn.nodemanager.log-dirs</name>
        <value>/data/logs</value>
    </property>

    <property>
        <name>yarn.resourcemanager.address.rm1</name>
        <value>host1:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1</name>
        <value>host1:port</value>
        <final>true</final>
    </property>

    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>host1:8088</value>
    </property>

    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
        <value>host1:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.admin.address.rm1</name>
        <value>host1:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.ha.admin.address.rm1</name>
        <value>host1:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.address.rm2</name>
        <value>host2:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2</name>
        <value>host2:port</value>
        <final>true</final>
    </property>

    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>host2:8088</value>
    </property>

    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
        <value>host2:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.admin.address.rm2</name>
        <value>host2:port</value>
    </property>

    <property>
        <name>yarn.resourcemanager.ha.admin.address.rm2</name>
        <value>host2:port</value>
    </property>

    <property> 
        <name>yarn.client.failover-proxy-provider</name> 
         <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value> 
    </property>

        <property>
                <name>yarn.resourcemanager.recovery.enabled</name>
                <value>true</value>
        </property>

    <property>
            <name>yarn.resourcemanager.store.class</name>
            <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
        </property>

        <property>  
                <name>yarn.log-aggregation-enable</name>  
                <value>false</value>  
        </property>

        <property>
                <name>yarn.log-aggregation.retain-seconds</name>
                <value>172800</value>
        </property>

        <property>
                <name>yarn.log-aggregation.retain-check-interval-seconds</name>
                <value>21600</value>
        </property>

        <property>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>2048</value>
        </property>

        <property>
                <name>yarn.scheduler.maximum-allocation-mb</name>
                <value>24576</value>
        </property>
        <property>
                <name>yarn.log.server.url</name>
                <value>http://host2:port/jobhistory/logs</value>
        </property>
        <property>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>25600</value>
        </property>
        <property>
                <name>yarn.nodemanager.resource.cpu-vcores</name>
                <value>6</value>
        </property>
        <property>
                <name>yarn.resourcemanager.nodemanager-connect-retries</name>
                <value>10</value>
        </property>
</configuration>

Hadoop和Yarn的配置文件

  1. mapred-site.xml 文件

    MR 的配置文件。
    JobHistory用来记录MapReduce任务的完整信息到HDFS目录中。

     参数配置和解释:

    Hadoop和Yarn的配置文件

上一篇:hadoop之yarn详解(基础架构篇)


下一篇:hadoop知识点3