新建项目
- File->New->Module->Maven,选择
quickstart
- 设置文件名和文件地址
- 点击完成
- 项目目录
- 配置
pom.xml
- 添加
hadoop
依赖包
- 添加
<!--添加hadoop依赖--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency>
- 全局定义
hadoop
版本
- 全局定义
<!--全局定义hadoop的版本--> <hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
- 配置好的
pom.xml
- 配置好的
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.syh</groupId> <artifactId>hadoopdemo</artifactId> <version>1.0-SNAPSHOT</version> <name>hadoopdemo</name> <!-- FIXME change it to the project's website --> <url>http://www.example.com</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.7</maven.compiler.source> <maven.compiler.target>1.7</maven.compiler.target> <!--全局定义hadoop的版本--> <hadoop.version>2.6.0-cdh5.7.0</hadoop.version> </properties> <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories> <dependencies> <!--添加hadoop依赖--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> </dependencies> <build> <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> <plugins> <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> <plugin> <artifactId>maven-clean-plugin</artifactId> <version>3.1.0</version> </plugin> <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> <plugin> <artifactId>maven-resources-plugin</artifactId> <version>3.0.2</version> </plugin> <plugin> <artifactId>maven-compiler-plugin</artifactId> <version>3.8.0</version> </plugin> <plugin> <artifactId>maven-surefire-plugin</artifactId> <version>2.22.1</version> </plugin> <plugin> <artifactId>maven-jar-plugin</artifactId> <version>3.0.2</version> </plugin> <plugin> <artifactId>maven-install-plugin</artifactId> <version>2.5.2</version> </plugin> <plugin> <artifactId>maven-deploy-plugin</artifactId> <version>2.8.2</version> </plugin> <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> <plugin> <artifactId>maven-site-plugin</artifactId> <version>3.7.1</version> </plugin> <plugin> <artifactId>maven-project-info-reports-plugin</artifactId> <version>3.0.0</version> </plugin> </plugins> </pluginManagement> </build> </project>
- 点击图中标志
- 配置好的Maven目录
测试hadoop
是否安装成功
- 在
AppTest
文件中写入以下代码
package com.syh; import static org.junit.Assert.assertTrue; import org.junit.Test; import org.apache.hadoop.fs.*; /** * Unit test for simple App. */ public class AppTest { /** * Rigorous Test :-) */ public void shouldAnswerWithTrue() { assertTrue( true ); } public void testHadoop() { FileSystem fileSystem = null; } }
- 按住
Ctrl
点击FilrSystem
可以看到跳转到hadoop目录下,说明安装成功了
连接hdfs
- 新建java文件
- 在
HDFSApp
文件中写入以下代码,在hadoop中创建文件夹
package com.syh.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.net.URI; public class HDFSApp { Configuration configuration = null; FileSystem fileSystem = null; // 配置路径(ip地址) public static final String HDFS_PATH = "hdfs://192.168.207.128:8020"; // 测试(新建文件夹) public void mkdir() throws Exception { fileSystem.mkdirs(new Path("/emperorlawd/test")); } // Java 连接hdfs 需要先建立一个连接 // 测试方法执行之前要执行的操作 public void setUp() throws Exception { System.out.println("开始建立与HDFS的连接"); configuration = new Configuration(); fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration, "hadoop"); } // 测试之后要执行的代码 public void tearDown() { configuration = null; fileSystem = null; System.out.println("关闭与HDFS的连接"); } }
- 运行成功的效果
创建文件
- 在
HDFSApp
文件中加入以下代码
// 创建文件 public void create() throws Exception { Path path = new Path("/emperorlawd/test/hello.txt"); FSDataOutputStream outputStream = fileSystem.create(path); outputStream.write("hello hadoop".getBytes()); outputStream.flush(); outputStream.close(); }
- 运行
create()
重命名文件
- 在
HDFSApp
文件中加入以下代码
// 重命名文件 public void rename() throws Exception { Path oldPath = new Path("/emperorlawd/test/hello.txt"); Path newPath = new Path("/emperorlawd/test/rehello.txt"); fileSystem.rename(oldPath, newPath); }
- 运行
rename()
查看文件
- 在
HDFSApp
文件中加入以下代码
// 查看文件 public void cat() throws Exception { Path path = new Path("/emperorlawd/test/rehello.txt"); FSDataInputStream inputStream = fileSystem.open(path); IOUtils.copyBytes(inputStream, System.out, 1024); inputStream.close(); }
- 运行
cat()
上传文件
- 在
HDFSApp
文件中加入以下代码
// 上传文件 public void upload() throws Exception { Path localPath = new Path("cifar-10-python.tar.gz"); Path hdfsPath = new Path("/"); fileSystem.copyFromLocalFile(localPath, hdfsPath); }
- 运行
upload()
下载文件
- 在
HDFSApp
文件中加入以下代码
// 下载文件 public void download() throws Exception { Path hdfsPath = new Path("/hadoop-2.6.0-cdh5.7.0.tar.gz"); Path localPath = new Path("./down/hadoop-2.6.0-cdh5.7.0.tar.gz"); fileSystem.copyToLocalFile(false, hdfsPath, localPath, true); }
- 运行
download()