hadoop各种问题收录(学习笔记)

持续更新...

问题一

配置好hadoop集群操作环境之后测试hdfs是否生效

向hdfs文件系统上传文件时显示如下错误:

14/01/14 21:42:35 WARN hdfs.DFSClient: DataStreamer Exception: org.apache.hadoop.ipc.RemoteException: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
        at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
        at org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
        at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959)
        at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)


        at org.apache.hadoop.ipc.Client.call(Client.java:740)
        at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
        at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
        at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient.java:2937)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2819)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)


14/01/14 21:42:35 WARN hdfs.DFSClient: Error Recovery for block null bad datanode[0] nodes == null
14/01/14 21:42:35 WARN hdfs.DFSClient: Could not get block locations. Source file "/user/root/in/test2.txt" - Aborting...
put: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
14/01/14 21:42:35 ERROR hdfs.DFSClient: Exception closing file /user/root/in/test2.txt : org.apache.hadoop.ipc.RemoteException: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
        at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
        at org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
        at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959)
        at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)


org.apache.hadoop.ipc.RemoteException: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
        at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
        at org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
        at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959)
        at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)


        at org.apache.hadoop.ipc.Client.call(Client.java:740)
        at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
        at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
        at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient.java:2937)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2819)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
        at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)

解决方案

core-site.xml配置文件中的

<value>hdfs://192.168.2.100:9000</value>一定要用IP

而我之前写的是<value>hdfs://localhost:9000</value>

完整配置如下:

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.2.100:9000</value> --注意完全分布模式此地一定要用IP,下同
</property>
</configuration>
注:fs.default.name NameNode的IP地址和端口


mapred-site.xml配置文件中的

<value>192.168.2.100:9001</value>一定要用IP

而我之前写的是<value>localhost:9001</value>

完整配置如下:

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>mapred.job.tracker</name>
<value>192.168.2.100:9001</value> 注意完全分布模式此地一定要用IP,下同
</property>
</configuration>


网上提供的方法有:

1、系统或hdfs是否有足够空间(本人就是因为硬盘空间不足导致异常发生)
2、datanode数是否正常
3、是否在safemode
4、防火墙是否关闭
5、关闭hadoop、格式化、重启hadoop

提供几个网址:

http://blog.csdn.net/zuiaituantuan/article/details/6533867

http://blog.csdn.net/wanghai__/article/details/5744158

http://www.docin.com/p-629030380.html

http://www.cnblogs.com/linjiqin/archive/2013/03/13/2957310.html

http://f.dataguru.cn/thread-32858-1-1.html



问题二

配置好hadoop集群操作环境之后测试MapReduce是否生效

输入命令bin/hadoop jar hadoop-0.20.2/hadoop-0.20.2-examples.jar wordcount in out无反应的情况

解决方案

NameNode在启动时首先进入safemode模式即安全模式,如果datanode丢失的block达到一定的比例(1-dfs.safemode.threshold.pct),则系统会一直处于安全模式状态即只读状态。

dfs.safemode.threshold.pct(缺省值0.999f)表示HDFS启动的时候,如果DataNode上报的block个数达到了元数据记录的block个数的0.999倍才可以离开安全模式,否则一直是这种只读模式。如果设为1则HDFS永远是处于SafeMode。

两个方法离开安全模式:
(1)修改dfs.safemode.threshold.pct为一个比较小的值,缺省是0.999。
(2)hadoop dfsadmin -safemode leave命令强制离开

在Shell终端中进入hadoop解压目录,通过命令bin/hadoop dfsadmin -safemode value 来操作安全模式,参数value的说明如下:

enter - 进入安全模式
leave - 强制NameNode离开安全模式
get - 返回安全模式是否开启的信息
wait - 等待,一直到安全模式结束。

问题三

配置好hadoop集群操作环境之后测试MapReduce是否生效

输入命令bin/hadoop jar hadoop-0.20.2/hadoop-0.20.2-examples.jar wordcount in out后显示如下:

14/01/15 20:04:07 INFO input.FileInputFormat: Total input paths to process : 2
14/01/15 20:04:09 INFO mapred.JobClient: Running job: job_201401151958_0001
14/01/15 20:04:10 INFO mapred.JobClient:  map 0% reduce 0%
14/01/15 20:06:27 INFO mapred.JobClient:  map 50% reduce 0%
14/01/15 20:07:02 INFO mapred.JobClient:  map 100% reduce 0%
14/01/15 20:10:33 INFO mapred.JobClient: Task Id : attempt_201401151958_0001_r_000000_0, Status : FAILED
Shuffle Error: Exceeded MAX_FAILED_UNIQUE_FETCHES; bailing-out.

解决方案

在namenode以及datanode的 /etc/hosts 添加 ip地址与主机名,形如

192.168.5.191 jx1

192.168.5.192 jx2


问题四

配置好hadoop集群操作环境之后测试MapReduce是否生效

输入命令bin/hadoop jar hadoop-0.20.2/hadoop-0.20.2-examples.jar wordcount in out后显示如下:


14/01/15 20:30:52 INFO input.FileInputFormat: Total input paths to process : 2
14/01/15 20:30:53 INFO mapred.JobClient: Running job: job_201401151958_0003
14/01/15 20:30:55 INFO mapred.JobClient:  map 0% reduce 0%
14/01/15 20:31:05 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000003_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000003_0, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:296)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:31:14 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_r_000002_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_r_000002_0, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:362)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:31:24 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000003_1, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000003_1, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:296)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:31:33 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000003_2, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000003_2, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:296)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:31:51 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000002_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000002_0, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:292)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:32:03 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_r_000001_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_r_000001_0, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:358)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:32:14 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000002_1, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000002_1, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:292)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:32:23 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000002_2, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000002_2, expected: hdfs://localhost1.localdomain:9000
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
        at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
        at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
        at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
        at org.apache.hadoop.mapred.Task.done(Task.java:671)
        at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:292)
        at org.apache.hadoop.mapred.Child.main(Child.java:170)


14/01/15 20:32:32 INFO mapred.JobClient: Job complete: job_201401151958_0003
14/01/15 20:32:32 INFO mapred.JobClient: Counters: 0


解决方案


问题五

在master以hadoop用户执行:start-all.sh

        jps查看master节点启动情况: NameNode

                                                                JobTracker

                                                                SecondaryNameNode

         均已经正常启动,但是在Salve节点上执行jps,只有TaskTracker ,DataNode没有起来。

解决方案

 1. 先执行stop-all.sh暂停所有服务

         2. 将所有Salve节点上的tmp , logs 文件夹删除 , 然后重新建立tmp , logs 文件夹

         3. 将所有Salve节点上的/usr/hadoop/conf下的core-site.xml删除,将master节点的core-site.xml文件拷贝过来,到各个Salve节点

              scp /usr/java/hadoop-0.20.2/conf/core-site.xml localhost2.localdomain:/usr/java/hadoop-0.20.2/conf/

         4. 重新格式化: hadoop namenode -format

         5. 启动:start-all.sh

注:这是我碰到的情况,不见得是你遇到的问题,基本上要确定几个方面:

         1. 检查各个xml文件是否配置正确

         2. java环境变量配置是否正确

         3. ssh是否无密码互通


hadoop各种问题收录(学习笔记)

上一篇:2014年面试官识人的五大额外小“潜规则”


下一篇:HTTP请求:GET与POST方法的区别