持续更新...
问题一
配置好hadoop集群操作环境之后测试hdfs是否生效
向hdfs文件系统上传文件时显示如下错误:
14/01/14 21:42:35 WARN hdfs.DFSClient: DataStreamer Exception: org.apache.hadoop.ipc.RemoteException: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
at org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)
at org.apache.hadoop.ipc.Client.call(Client.java:740)
at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient.java:2937)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2819)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)
14/01/14 21:42:35 WARN hdfs.DFSClient: Error Recovery for block null bad datanode[0] nodes == null
14/01/14 21:42:35 WARN hdfs.DFSClient: Could not get block locations. Source file "/user/root/in/test2.txt" - Aborting...
put: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
14/01/14 21:42:35 ERROR hdfs.DFSClient: Exception closing file /user/root/in/test2.txt : org.apache.hadoop.ipc.RemoteException: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
at org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)
org.apache.hadoop.ipc.RemoteException: java.io.IOException: File /user/root/in/test2.txt could only be replicated to 0 nodes, instead of 1
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1271)
at org.apache.hadoop.hdfs.server.namenode.NameNode.addBlock(NameNode.java:422)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:959)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:955)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:953)
at org.apache.hadoop.ipc.Client.call(Client.java:740)
at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
at com.sun.proxy.$Proxy0.addBlock(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient.java:2937)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2819)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102)
at org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288)
解决方案
core-site.xml配置文件中的
<value>hdfs://192.168.2.100:9000</value>一定要用IP
而我之前写的是<value>hdfs://localhost:9000</value>
完整配置如下:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.2.100:9000</value> --注意完全分布模式此地一定要用IP,下同
</property>
</configuration>
注:fs.default.name NameNode的IP地址和端口
mapred-site.xml配置文件中的
<value>192.168.2.100:9001</value>一定要用IP
而我之前写的是<value>localhost:9001</value>
完整配置如下:
<?xml
version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>192.168.2.100:9001</value> 注意完全分布模式此地一定要用IP,下同
</property>
</configuration>
1、系统或hdfs是否有足够空间(本人就是因为硬盘空间不足导致异常发生)
2、datanode数是否正常
3、是否在safemode
4、防火墙是否关闭
5、关闭hadoop、格式化、重启hadoop
提供几个网址:
http://blog.csdn.net/zuiaituantuan/article/details/6533867
http://blog.csdn.net/wanghai__/article/details/5744158
http://www.docin.com/p-629030380.html
http://www.cnblogs.com/linjiqin/archive/2013/03/13/2957310.html
http://f.dataguru.cn/thread-32858-1-1.html
问题二
配置好hadoop集群操作环境之后测试MapReduce是否生效
输入命令bin/hadoop jar hadoop-0.20.2/hadoop-0.20.2-examples.jar wordcount in out无反应的情况
解决方案
NameNode在启动时首先进入safemode模式即安全模式,如果datanode丢失的block达到一定的比例(1-dfs.safemode.threshold.pct),则系统会一直处于安全模式状态即只读状态。
dfs.safemode.threshold.pct(缺省值0.999f)表示HDFS启动的时候,如果DataNode上报的block个数达到了元数据记录的block个数的0.999倍才可以离开安全模式,否则一直是这种只读模式。如果设为1则HDFS永远是处于SafeMode。
两个方法离开安全模式:
(1)修改dfs.safemode.threshold.pct为一个比较小的值,缺省是0.999。
(2)hadoop dfsadmin -safemode leave命令强制离开
在Shell终端中进入hadoop解压目录,通过命令bin/hadoop dfsadmin -safemode value 来操作安全模式,参数value的说明如下:
enter - 进入安全模式
leave - 强制NameNode离开安全模式
get - 返回安全模式是否开启的信息
wait - 等待,一直到安全模式结束。
问题三
配置好hadoop集群操作环境之后测试MapReduce是否生效
输入命令bin/hadoop jar hadoop-0.20.2/hadoop-0.20.2-examples.jar wordcount in out后显示如下:
14/01/15 20:04:07 INFO input.FileInputFormat: Total input paths to process : 2
14/01/15 20:04:09 INFO mapred.JobClient: Running job: job_201401151958_0001
14/01/15 20:04:10 INFO mapred.JobClient: map 0% reduce 0%
14/01/15 20:06:27 INFO mapred.JobClient: map 50% reduce 0%
14/01/15 20:07:02 INFO mapred.JobClient: map 100% reduce 0%
14/01/15 20:10:33 INFO mapred.JobClient: Task Id : attempt_201401151958_0001_r_000000_0, Status : FAILED
Shuffle Error: Exceeded MAX_FAILED_UNIQUE_FETCHES; bailing-out.
解决方案
在namenode以及datanode的 /etc/hosts 添加 ip地址与主机名,形如
192.168.5.191 jx1
192.168.5.192 jx2
问题四
配置好hadoop集群操作环境之后测试MapReduce是否生效
输入命令bin/hadoop jar hadoop-0.20.2/hadoop-0.20.2-examples.jar wordcount in out后显示如下:
14/01/15 20:30:52 INFO input.FileInputFormat: Total input paths to process : 2
14/01/15 20:30:53 INFO mapred.JobClient: Running job: job_201401151958_0003
14/01/15 20:30:55 INFO mapred.JobClient: map 0% reduce 0%
14/01/15 20:31:05 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000003_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000003_0, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:296)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:31:14 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_r_000002_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_r_000002_0, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:362)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:31:24 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000003_1, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000003_1, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:296)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:31:33 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000003_2, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000003_2, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobSetupTask(Task.java:836)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:296)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:31:51 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000002_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000002_0, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:292)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:32:03 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_r_000001_0, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_r_000001_0, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:358)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:32:14 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000002_1, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000002_1, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:292)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:32:23 INFO mapred.JobClient: Task Id : attempt_201401151958_0003_m_000002_2, Status : FAILED
java.lang.IllegalArgumentException: Wrong FS: hdfs://192.168.0.2:9000/user/root/out/_temporary/_attempt_201401151958_0003_m_000002_2, expected: hdfs://localhost1.localdomain:9000
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at org.apache.hadoop.hdfs.DistributedFileSystem.checkPath(DistributedFileSystem.java:99)
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:155)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:453)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:648)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.needsTaskCommit(FileOutputCommitter.java:217)
at org.apache.hadoop.mapred.Task.done(Task.java:671)
at org.apache.hadoop.mapred.Task.runJobCleanupTask(Task.java:827)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:292)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
14/01/15 20:32:32 INFO mapred.JobClient: Job complete: job_201401151958_0003
14/01/15 20:32:32 INFO mapred.JobClient: Counters: 0
解决方案
问题五
在master以hadoop用户执行:start-all.sh
jps查看master节点启动情况: NameNode
JobTracker
SecondaryNameNode
均已经正常启动,但是在Salve节点上执行jps,只有TaskTracker ,DataNode没有起来。
解决方案
1. 先执行stop-all.sh暂停所有服务
2. 将所有Salve节点上的tmp , logs 文件夹删除 , 然后重新建立tmp , logs 文件夹
3. 将所有Salve节点上的/usr/hadoop/conf下的core-site.xml删除,将master节点的core-site.xml文件拷贝过来,到各个Salve节点
scp /usr/java/hadoop-0.20.2/conf/core-site.xml localhost2.localdomain:/usr/java/hadoop-0.20.2/conf/
4. 重新格式化: hadoop namenode -format
5. 启动:start-all.sh
注:这是我碰到的情况,不见得是你遇到的问题,基本上要确定几个方面:
1. 检查各个xml文件是否配置正确
2. java环境变量配置是否正确
3. ssh是否无密码互通