HADOOP之HDFS用idea操作(五)

2023-11-01 16:39:10

使用idea操作HDFS、创建文件、上传文件、获取块信息、下载文件

1.搭建maven工程

2.pom依赖

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.5</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.5</version>
        </dependency>

3.hadoop conf中core-site.xml、hdfs-site.xml 放入resource目录下

4.代码

package com.xiaoke.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;

public class TestHDFS {

    public Configuration conf = null;
    public FileSystem fs = null;

    //C/
    @Before
    public void conn() throws Exception {
        // true 会获取resource下的配置文件，并加载进Configuration中
        conf = new Configuration(true);

        // 使用配置文件中的配置信息
        fs = FileSystem.get(conf);
//       <property>
//        <name>fs.defaultFS</name>
//        <value>hdfs://mycluster</value>
//       </property>
        //去环境变量 HADOOP_USER_NAME  god  需要在winows环境变量中配置

        // 不使用配置文件中的信息 自己创建
       // fs = FileSystem.get(URI.create("hdfs://mycluster"), conf, "god");
    }

    // 创建文件夹
    @Test
    public void mkdir() throws Exception {

        Path dir = new Path("/xiaoke002");
        if (fs.exists(dir)) {
            fs.delete(dir, true);
        }
        fs.mkdirs(dir);

    }

    // 上传文件
    @Test
    public void upload() throws Exception {

        BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File("./data/hello.txt")));
        Path outfile = new Path("/xiaoke002/out.txt");
        FSDataOutputStream output = fs.create(outfile);
        //  下面的工具相当于我们从文件流中读取一行，在往上面刷新一次，完了关闭流
        IOUtils.copyBytes(input, output, conf, true);
    }

    @Test
    public void blocks() throws Exception {

        Path file = new Path("/user/god/data.txt");
        FileStatus fss = fs.getFileStatus(file);
        BlockLocation[] blks = fs.getFileBlockLocations(fss, 0, fss.getLen());
        for (BlockLocation b : blks) {
            System.out.println(b);
        }
//        0,        1048576,        node04,node02  A
//        1048576,  540319,         node04,node03  B
        //计算向数据移动~！
        //其实用户和程序读取的是文件这个级别~！并不知道有块的概念~！
        FSDataInputStream in = fs.open(file);  //面向文件打开的输入流  无论怎么读都是从文件开始读起~！

//        blk01: he
//        blk02: llo msb 66231

        in.seek(1048576);
        //计算向数据移动后，期望的是分治，只读取自己关心（通过seek实现），同时，具备距离的概念（优先和本地的DN获取数据--框架的默认机制）
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
        System.out.println((char) in.readByte());
    }


    @After
    public void close() throws Exception {
        fs.close();
    }
}

代码地址: https://gitee.com/Xiaokeworksveryhard/big-data.git
文件：hadoop-hdfs

码农公寓

相关文章