我的第一个hadoop程序

2023-07-31 19:27:28
利用virtualbox+win的开发环境搭设好咯,在win下做开发,利用linux跑hadoop,伪分布式的.
上第一个程序
package org.apache.hadoop.examples;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

public class PutMerge {

    public static void main(String[] args) {

        //首先定义输入和输入的文件和文件夹

        String inputdir = "hdfs://master:9000/user/design";

        String outputdir = "hdfs://master:9000/user/design/out";

        //定义path

        Path input = new Path(inputdir);

        Path output = new Path(outputdir);

        //得到配置文件

        Configuration conf = new Configuration();

        FileSystem hdfs = null;

        //FileSystem local = null;

        try {

            //根据path得到文件系统,是与文件系统交互的类

            hdfs = input.getFileSystem(conf);

            //local = FileSystem.getLocal(conf);

            //得到文件夹下的FileStatus对象,其实包含了文件的一些path,大小,更新时间,权限等信息

            FileStatus[] inputfiles = hdfs.listStatus(input);

            //创建文件夹

            //按照默认的快大小,已经默认覆盖,创建了一个文件

            FSDataOutputStream out = hdfs.create(output);

            //读取并且写入的过程

            for(FileStatus fs : inputfiles){

                System.out.println("文件:"+fs.getPath().getName());

                FSDataInputStream fin = hdfs.open(fs.getPath());

                byte[] buffer = new byte[256];

                int bytereaded = 0;

                while((bytereaded=fin.read(buffer))>0){

                    System.out.println("写入"+bytereaded);

                    out.write(buffer,0,bytereaded);

                }

                fin.close();

            }

            out.close();

        } catch (IOException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

    }

}
码农公寓

相关文章