简单API介绍
**github传送门(持续更新):**https://github.com/xiajie520/hadoop-demo.git
我这里用的是父子工程
父工程pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.wenjie</groupId>
<artifactId>hadoop-demo</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<modules>
<module>hdfs-demo</module>
</modules>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<hadoop-version>3.1.3</hadoop-version>
<junit-version>4.12</junit-version>
<slf4j-version>1.7.30</slf4j-version>
<json-version>1.2.76</json-version>
</properties>
<dependencyManagement>
<dependencies>
<!-- hadoop-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop-version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit-version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j-version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${json-version}</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>
子工程pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hadoop-demo</artifactId>
<groupId>com.wenjie</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>hdfs-demo</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<!-- hadoop-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<!-- junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<!-- slf4j -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</dependency>
</dependencies>
</project>
API介绍
package com.wenjie.hdfs.util;
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
public class HDFSClientTest {
/**
* hdfs客户端
*/
private FileSystem fs;
/**
* Test前置操作
*
* @throws Exception
*/
@Before
public void init() throws Exception {
URI uri = new URI("hdfs://hadoop01:8020");
/**
* 配置优先级 由低到高:hdfs-default.xml -> 环境中的hdfs-site.xml ->程序配置中的hdfs-site.xml -> 代码中编写的conf
*/
Configuration conf = new Configuration();
String user = "hadoop";
fs = FileSystem.get(uri, conf, user);
}
/**
* Test后置操作
*
* @throws IOException
*/
@After
public void close() throws IOException {
fs.close();
}
@Test
public void mkdir() throws IOException {
fs.mkdirs(new Path("/idea/dir2"));
}
/**
* 文件上传测试
*
* @throws IOException
*/
@Test
public void putTest() throws IOException {
// 是否删除源文件
boolean delSrc = false;
// 如目标路径已经有文件了,是否覆盖
boolean overwrite = true;
// 源文件路径
Path src = new Path("J:\\软件\\typora(MD).exe");
// 目标文件路径(这里有个坑)
/**
* 举例,如/idea/dir2这个文件夹不存在,上面的文件会直接上传到/idea文件夹,并且重命名为dir2,你就得到了一个名为dir2的文件
* 如果文件夹存在,则会将文件上传到文件夹下(哪怕你用/idea/dir2/也是一样的结果)
*/
Path dst = new Path("/idea/dir2");
fs.copyFromLocalFile(delSrc, overwrite, src, dst);
}
/**
* 文件下载测试
*/
@Test
public void getTest() throws IOException {
// 源文件是否删除
boolean delSrc = false;
// 源文件路径
Path src = new Path("/idea/dir2/typora(MD).exe");
// 目标文件路径(本地路径)
Path dst = new Path("C:\\Users\\86139\\Desktop\\hadoop");
// 是否开启本地校验 如果是false ,下载完毕之后除了源文件之外还有一个crc格式的文件,是做校验用的
//
boolean useRawLocalFileSystem = false;
fs.copyToLocalFile(delSrc, src, dst, useRawLocalFileSystem);
}
/**
* 删除测试
*
* @throws IOException
*/
@Test
public void delTest() throws IOException {
// 需要删除的文件
Path path = new Path("/idea/dir2");
// 是否递归删除 ,删除空目录或者删除文件的时候可以是false,删除非空目录需要设置为true
boolean isr = true;
fs.delete(path, isr);
}
/**
* 测试更名和移动
*
* @throws Exception
*/
@Test
public void mvTest() throws Exception {
Path path = new Path("/idea/input5");
// 第一个是源文件或文件夹,第二个是目标文件或文件夹
// fs移动时会截取最后一段名称作为文件名或者文件夹的名称,如目标是文件夹名称,则会置入文件夹,如目标名称不存在,则会更名为目标名称
// 与linux中的mv命令大概相同
fs.rename(path, new Path("/idea/dir1/tt"));
}
/**
* 查看文件信息
*
* @throws Exception
*/
@Test
public void getFileInfo() throws Exception {
// 两个参数 final Path f 文件路径, final boolean recursive 是否递归++
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
while (listFiles.hasNext()) {
LocatedFileStatus next = listFiles.next();
System.out.println("=====" + next.getPath() + "=====");
/**
* 参数
* private static final long serialVersionUID = 332065512L;
* private Path path; 路径
* private long length; 文件大小
* private Boolean isdir; 是否为文件夹
* private short block_replication;
* private long blocksize; 文件块大小
* private long modification_time; 更新时间
* private long access_time; 上传时间
* private FsPermission permission; 权限
* private String owner; 文件所属人
* private String group; 文件所属组
* private Path symlink; 符号链接
* private Set<FileStatus.AttrFlags> attr; 属性标志
* public static final Set<FileStatus.AttrFlags> NONE = Collections.emptySet();
* private BlockLocation[] locations; 所有块信息
*/
System.out.println(next.getReplication());
System.out.println(next.getAccessTime());
if (next.isSymlink()) {
System.out.println(next.getSymlink());
}
System.out.println(Arrays.toString(next.getBlockLocations()));
}
}
}