1、创建目录
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class MakeDir {
public
static void main(String[] args) throws
IOException {
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop/hdfs/xxxx" );
fs.create(path);
fs.close();
}
} |
2、删除目录
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class DeleteDir {
public
static void main(String[] args) throws
IOException {
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop/hdfs/xxxx" );
fs.delete(path);
fs.close();
}
} |
3、写文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class WriteFile {
public
static void main(String[] args) throws
IOException {
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" );
FSDataOutputStream out = fs.create(path);
out.writeUTF( "da jia hao,cai shi zhen de hao!" );
fs.close();
}
} |
4、读文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ReadFile {
public
static void main(String[] args) throws
IOException {
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" );
if (fs.exists(path)){
FSDataInputStream is = fs.open(path);
FileStatus status = fs.getFileStatus(path);
byte [] buffer = new
byte [Integer.parseInt(String.valueOf(status.getLen()))];
is.readFully( 0 , buffer);
is.close();
fs.close();
System.out.println(buffer.toString());
}
}
} |
5、上传本地文件到HDFS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class CopyFromLocalFile {
public
static void main(String[] args) throws
IOException {
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path src = new
Path( "/home/hadoop/xxxx.txt" );
Path dst = new
Path( "/user/hadoop/hdfs/" );
fs.copyFromLocalFile(src, dst);
fs.close();
}
} |
6、删除文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class DeleteFile {
public
static void main(String[] args) throws
IOException {
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" );
fs.delete(path);
fs.close();
}
} |
7、获取给定目录下的所有子目录以及子文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class GetAllChildFile {
static
Configuration conf = new
Configuration();
public
static void main(String[] args) throws
IOException {
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop" );
getFile(path,fs);
//fs.close();
}
public
static void getFile(Path path,FileSystem fs) throws
IOException {
FileStatus[] fileStatus = fs.listStatus(path);
for ( int
i= 0 ;i<fileStatus.length;i++){
if (fileStatus[i].isDir()){
Path p = new
Path(fileStatus[i].getPath().toString());
getFile(p,fs);
} else {
System.out.println(fileStatus[i].getPath().toString());
}
}
}
} |
8、查找某个文件在HDFS集群的位置
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public
class FindFile {
public
static void main(String[] args) throws
IOException {
getFileLocal();
}
/**
* 查找某个文件在HDFS集群的位置
* @Title:
* @Description:
* @param
* @return
* @throws
*/
public
static void getFileLocal() throws
IOException{
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new
Path( "/user/hadoop/hdfs/xxxx.txt" );
FileStatus status = fs.getFileStatus(path);
BlockLocation[] locations = fs.getFileBlockLocations(status, 0 , status.getLen());
int
length = locations.length;
for ( int
i= 0 ;i<length;i++){
String[] hosts = locations[i].getHosts();
System.out.println( "block_"
+ i + "_location:"
+ hosts[i]);
}
}
} |
9、HDFS集群上所有节点名称信息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
package
com.hadoop.file;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import
org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public
class FindFile {
public
static void main(String[] args) throws
IOException {
getHDFSNode();
}
/**
* HDFS集群上所有节点名称信息
* @Title:
* @Description:
* @param
* @return
* @throws
*/
public
static void getHDFSNode() throws
IOException{
Configuration conf = new
Configuration();
FileSystem fs = FileSystem.get(conf);
DistributedFileSystem dfs = (DistributedFileSystem)fs;
DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();
for ( int
i= 0 ;i<dataNodeStats.length;i++){
System.out.println( "DataNode_"
+ i + "_Node:"
+ dataNodeStats[i].getHostName());
}
}
} |
伪分布环境下操作FileSystem时候会出现异常:
Java代码如下:
1
2
|
FileSystem fs = FileSystem.get(conf); |
抛出异常如下:
Exception in thread
"main" java.lang.IllegalArgumentException: Wrong FS:
hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected:
file:///
at
org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310)
at
org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245)
at
org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125)
at
org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283)
at
org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356)
at
com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23)
解决方案:
将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。
总结:
因为是访问远程的HDFS 需要通过URI来获得FileSystem。