Sample: Write And Read data from HDFS with java API

2022-11-27 08:34:39

HDFS: hadoop distributed file system

它抽象了整个集群的存储资源，可以存放大文件。

文件采用分块存储复制的设计。块的默认大小是64M。

流式数据访问，一次写入（现支持append），多次读取。

不适合的方面：

低延迟的数据访问

解决方案：HBASE

大量的小文件

解决方案：combinefileinputformat ，或直接把小文件合并成sequencefile存储到hdfs.

HDFS的块

块是独立的存储单元。但是如果文件小于默认的块大小如64M，它不会占据整个块的空间。

HDFS的块比磁盘的块大，目的是为了最小化寻址开销。

namenode 管理着文件系统的命名空间，即文件的元数据信息，文件的块信息存在哪个Datanode结点，请求文件的时候，namenode根据元数据信息去datanode结点上寻求数据内容。

package myexamples;

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.BlockLocation;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileStatus;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

public class hdfsexample {

 static void showblock(FileSystem fs,Path file) throws IOException

 {

	// show the file meta data info

	FileStatus 		fileStatus  = fs.getFileStatus(file);

	BlockLocation[] blocks		= fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());

	for(BlockLocation bl:blocks)

		System.out.println(bl.toString());

 }

 static void read(FileSystem fs,Path file) throws IOException

 {

	//Reading from file

	FSDataInputStream inStream = fs.open(file);

	String data = null;

	BufferedReader br = new BufferedReader(new InputStreamReader(inStream));

	while((data = br.readLine())!=null)

		System.out.println(data);

	br.close();

 }

 static void write(FileSystem fs,Path file) throws IOException

	{

		FSDataOutputStream outStream = null;

		outStream = fs.create(file);

		BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(outStream));

		for(int i=1;i<101;i++)

			{

				bw.write("Line" +i + " welcome to hdfs java api");

				bw.newLine();

			}

		bw.close();

	}

public static void main(String[] args) throws IOException {

	Configuration conf = new Configuration();

	//this is import for connect to hadoop hdfs

	//or else you will get file:///, local file system

	conf.set("fs.default.name", "hdfs://namenode:9000");

	FileSystem fs = FileSystem.get(conf);

	System.out.println(fs.getUri());

	Path file = new Path("/user/hadoop/test/demo2.txt");

	if (fs.exists(file)) fs.delete(file,false);

	write(fs,file);

	read(fs,file);

	showblock(fs,file);

	fs.close();

	}

}

码农公寓

相关文章