HDFS的API操作

2023-11-19 16:37:10

文章目录

1）准备好hadoop相关的资料包

2）配置环境变量

新建一个变量名为：HADOOP_HOME，值为所存放的地址

打开PATH,引入

4）在IDEA创建一个Maven工程HDFSClient（注：若不会初始配置Maven，可参照：https://www.bilibili.com/video/BV1Kr4y1T7zB?spm_id_from=333.1007.top_right_bar_window_history.content.click）

4.1配置pom.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>HDFSClient</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>17</maven.compiler.source>
        <maven.compiler.target>17</maven.compiler.target>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.3</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.30</version>
        </dependency>
    </dependencies>


</project>

4.2在src/main/resources，新建一个“log4j.properties”文件,内容

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

5）在src/main/resources/java下创建包名com.yingzi.hdfs，创建HdfsClient类进行相关操作

package com.yingzi.hdfs;

/**
 * @author 影子
 * @create 2022-01-11-15:46
 **/

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;

/**
 * 客户端常用套路
 * 1、获取一个客户端对象
 * 2、执行相关的操作命令
 * 3、关闭资源
 * HDFS zookeeper
 */
public class HdfsClient {

    private FileSystem fs;

    @Before
    public void init() throws IOException, InterruptedException, URISyntaxException {
        //  连接的集群nn地址
        URI uri = new URI("hdfs://hadoop102:8020");

        //  创建一个配置文件
        Configuration configuration = new Configuration();

        configuration.set("dfs.replication", "2");

        //  用户
        String user = "yingzi";

        //  1.获取到客户端对象
        fs = FileSystem.get(uri, configuration, user);
    }

    @After
    public void close() throws IOException {
        //  3 关闭资源
        fs.close();

    }

    //  创建目录
    @Test
    public void testmkdir() throws URISyntaxException, IOException, InterruptedException {
        //  2.创建一个文件夹
        fs.mkdirs(new Path("/xiyou/huaguoshan1"));

    }

    //  上传

    /**
     * 参数优先级
     * hdfs-default.xml => hdfs-site.xml => 在项目资源目录下的配置文件 => 代码里面的配置
     *
     * @throws IOException
     */
    @Test
    public void testPut() throws IOException {
        // 参数解读：参数一：删除原数据；参数二：是否允许覆盖；参数三：原数据路径；目的地路径
        fs.copyFromLocalFile(false, true, new Path("C:\\Users\\Admin\\Desktop\\sunwukong.txt"), new Path("/xiyou/huaguoshan"));
    }

    //  文件下载
    @Test
    public void testGet() throws IOException {
        // 参数解读：参数一:是否删除原数据；参数二:原数据路径HDFS；参数三:目标地址路径Win；参数四:
        fs.copyToLocalFile(false, new Path("/xiyou/huaguoshan"), new Path("C:\\Users\\Admin\\Desktop"), true);
    }

    //  文件删除
    @Test
    public void testRm() throws IOException {

        //  参数解读：参数一:要删除的路径；参数二:是否递归删除；
        fs.delete(new Path("/xiyou/huaguoshan1"), false);

        //删除非空的目录
        fs.delete(new Path("/jinguo"), true);
    }

    //  文件的更名和移动
    @Test
    public void testMv() throws IOException {

        //参数解读：参数一:原文件路径；参数二:目标文件路径
//        fs.rename(new Path("/input/sunwukon.txt"),new Path("/input/ss.txt")); //对文件更名

//        fs.rename(new Path("/input/ss.txt"),new Path("/cls.txt"));

        //  目录的更名
        fs.rename(new Path("/input"), new Path("/output"));
    }

    // 获取文件详细信息
    @Test
    public void fileDetail() throws IOException {

        //获取所有文件信息
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);

        //遍历文件
        while (listFiles.hasNext()) {
            LocatedFileStatus fileStatus = listFiles.next();

            System.out.println("=========" + fileStatus.getPath() + "========");
            System.out.println(fileStatus.getPermission());
            System.out.println(fileStatus.getOwner());
            System.out.println(fileStatus.getGroup());
            System.out.println(fileStatus.getLen());
            System.out.println(fileStatus.getModificationTime());
            System.out.println(fileStatus.getReplication());
            System.out.println(fileStatus.getBlockSize());
            System.out.println(fileStatus.getPath().getName());

            //  获取块信息
            BlockLocation[] blockLocations = fileStatus.getBlockLocations();
            System.out.println(Arrays.toString(blockLocations));
        }

    }

    // 判断时文件夹还是文件
    @Test
    public void testFile() throws IOException {
        FileStatus[] listStatus = fs.listStatus(new Path("/"));

        for (FileStatus status : listStatus) {
            if (status.isFile()) {
                System.out.println("文件：" + status.getPath().getName());
            } else {
                System.out.println("目录：" + status.getPath().getName());
            }
        }
    }


}

码农公寓

文章目录

1）准备好hadoop相关的资料包

2）配置环境变量

4）在IDEA创建一个Maven工程HDFSClient（注：若不会初始配置Maven，可参照：https://www.bilibili.com/video/BV1Kr4y1T7zB?spm_id_from=333.1007.top_right_bar_window_history.content.click）

5）在src/main/resources/java下创建包名com.yingzi.hdfs，创建HdfsClient类进行相关操作

相关文章