摘要: 本篇文章主要介绍的是hadoop hdfs的基础api的使用。包括Windows端依赖配置,Maven依赖配置。最后就是进行实际的操作,包括:获取远程hadoop hdfs连接,并对其进行的一系列操作,包括;文件夹创建、文件上传、文件下载、文件(夹)删除、文件更名或移动、文件详细信息的获取并打印到控制台,以及文件类型的判断(文件夹或文件)并打印对应文件(夹)的详细信息。
目录
使用hadoop API前先进行一下Windows的依赖配置
Windows-hadoop-3.1.0
阿里云下载链接:Windows-hadoop-3.1.0
CSDN下载链接:Windows-hadoop-3.1.0
1、下载Windows-hadoop-3.1.0并解压到您的安装目录中
进行如下的环境变量配置:
最后点击一下第一张图片方块画着的那个exe文件,如果没有报错则配置成功。
IDEA_Maven_Hadooop_API
pom.xml文件配置
1、使用前先在idea中创建Maven工程(工程名称:HdfsClientDemo),首先在pom.xml中配置如下:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>HdfsClientDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<!-- 配置hadoop API的相关插件 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>
</dependencies>
</project>
工具类com.lqs.util.HadoopUtil说明
注意:
后面代码里用到的
//关闭资源
HadoopUtil.close(fs);
这是我自己写的工具类,内容如下:
//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//
package com.lqs.util;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
public class HadoopUtil {
private HadoopUtil() {
}
public static void close(FileSystem fileSystem) {
try {
if (fileSystem != null) {
fileSystem.close();
System.out.println("关闭hadoop hdfs 文件远程连接成功...");
}
} catch (IOException e) {
System.out.println("关闭hadoop hdfs 文件远程连接失败...\n原因如下:" + e);
}
}
}
配置文件
2、然后再路径HdfsClientDemo\src\main\resources下创建配置文件:log4j.properties,并添加如下内容:
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
文件夹创建
3、在\src\main\java路径下创建创建文件夹的类文件HdfsClient.java,并输入以下内容
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 10:51
*
* 这是测试连接
*/
public class HdfsClient {
@Test
public void testMkdirs() throws IOException, URISyntaxException, InterruptedException {
//1、获取文件系统
Configuration configuration = new Configuration();
// FileSystem fileSystem = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration);
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
//执行创建目录操作
fs.mkdirs(new Path("/test/test1/lqs.txt"));
//关闭资源
HadoopUtil.close(fs);
}
}
文件上传
4、在\src\main\java路径下创建创建文件夹的类文件HdfsClientUpload.java,并输入以下内容
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 16:24
*/
public class HdfsClientUpload {
@Test
public void testCopyFromLocalFile() throws URISyntaxException, IOException, InterruptedException {
//1、获取文件系统
Configuration configuration = new Configuration();
configuration.set("dfs.replication", "2");
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
//执行上传文件操作
fs.copyFromLocalFile(new Path("F:\\test\\lqs.txt"), new Path("/test/test1"));
//关闭资源
HadoopUtil.close(fs);
}
}
文件下载
5、在\src\main\java路径下创建创建文件夹的类文件HdfsClientDownload.java,并输入以下内容
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 16:10
*/
public class HdfsClientDownload {
@Test
public void testCopyToLocalFile() throws URISyntaxException, IOException, InterruptedException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
//2、执行下载操作
// boolean delSrc 指是否将原文件删除
// Path src 指要下载的文件路径
// Path dst 指将文件下载到的路径
// boolean useRawLocalFileSystem 是否开启文件校验
fs.copyToLocalFile(false, new Path("/test/test1/lqs.txt"), new Path("f:/lqs.txt"), true);
//3、关闭资源
HadoopUtil.close(fs);
}
}
文件(夹)删除
6、在\src\main\java路径下创建创建文件夹的类文件HdfsClientDelete.java,并输入以下内容:
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 18:36
*/
public class HdfsClientDelete {
@Test
public void testDelete() throws URISyntaxException, IOException, InterruptedException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
//执行删除操作
fs.delete(new Path("/test"), true);
//关闭资源
HadoopUtil.close(fs);
}
}
文件更名或移动和两者同时进行
7、在\src\main\java路径下创建创建文件夹的类文件HdfsClientRename.java,并输入以下内容
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 17:32
*/
public class HdfsClientRename {
@Test
public void testRename() throws URISyntaxException, IOException, InterruptedException {
//1、获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
//2、修改文件名称
fs.rename(new Path("/test/test1/lqs.txt"), new Path("/test/test1/lqstest.txt"));
//移动文件到指定目录,注意:目的目录必须要存在,否则会移动失败
boolean result = fs.rename(new Path("/test/test1/lqs.txt"), new Path("/lqs/test/test.txt"));
if (result) {
System.out.println("移动文件成功");
} else {
System.out.println("移动文件失败");
}
//移动文件并修改移动后的文件名。注意:目的目录必须要存在,否则会移动失败
boolean result1 = fs.rename(new Path("/xiyo/test/test1/lqs.txt"), new Path("/lqs/test/test.txt"));
if (result1) {
System.out.println("移动文件并修改移动后的文件名成功");
} else {
System.out.println("移动文件并修改移动后的文件名失败");
}
//3、关闭资源
HadoopUtil.close(fs);
}
}
文件类型判断(文件夹或文件)
8、在\src\main\java路径下创建创建文件夹的类文件HdfsClientListStatus.java,并输入以下内容
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 23:24
*/
public class HdfsClientListStatus {
@Test
public void testListStatus() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
FileStatus[] fileStatuses = fs.listStatus(new Path("/test/test1/lqs.txt"));
for (FileStatus fileStatus : fileStatuses) {
//如果是文件者输出名字
if (fileStatus.isFile()) {
System.out.println("-:" + fileStatus.getPath());
} else {
System.out.println("d:" + fileStatus.getPath());
}
System.out.println("++++++++++++++" + fileStatus.getPath() + "++++++++++++++");
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getOwner());
System.out.println(fileStatus.getGroup());
System.out.println(fileStatus.getLen());
System.out.println(fileStatus.getModificationTime());
System.out.println(fileStatus.getReplication());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPath().getName());
//获取块信息
long blockLocations = fileStatus.getBlockSize();
System.out.println(Arrays.toString(new long[]{blockLocations}));
}
HadoopUtil.close(fs);
}
}
文件详细信息
9、在\src\main\java路径下创建创建文件夹的类文件HdfsClientListFiles.java,并输入以下内容
package com.lqs.hdfs;
import com.lqs.util.HadoopUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
/**
* @author qingSong liu
* @version 1.0
* @time 2021/12/7 19:07
*/
public class HdfsClientListFiles {
@Test
public void testListFiles() throws URISyntaxException, IOException, InterruptedException {
//1、获取文件系统,获取的的是hadoop的配置信息文件:Configuration: core-default.xml, core-site.xml, hdfs-default.xml, hdfs-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml
Configuration configuration=new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://bdc112:8020"), configuration, "lqs");
//2、获取文件详情.注意是获取的文件,而不是文件夹(目录)
//final boolean recursive 是否递归查找
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
System.out.println(listFiles.hasNext());
while (listFiles.hasNext()){
LocatedFileStatus fileStatus=listFiles.next();
System.out.println("++++++++++++++"+fileStatus.getPath()+"++++++++++++++");
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getOwner());
System.out.println(fileStatus.getGroup());
System.out.println(fileStatus.getLen());
System.out.println(fileStatus.getModificationTime());
System.out.println(fileStatus.getReplication());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPath().getName());
//获取块信息
BlockLocation[] blockLocations=fileStatus.getBlockLocations();
System.out.println(Arrays.toString(blockLocations));
}
//3、关闭资源
HadoopUtil.close(fs);
}
}