HDFS 学习笔记
简介
HDFS (The Hadoop Distributed File System )即 Hadoop 分布式文件系统;
架构
启动 hdfs
su morningcat
cd /usr/local/.hadoop
./sbin/start-dfs.sh
常用命令
可以访问 http://172.16.206.129:9870/explorer.html#/ 在线查看 HDFS 文件系统中所包含的文件
hdfs dfs
- -mkdir
绝对路径
hdfs dfs -mkdir /user
hdfs dfs -mkdir /user/morningcat
相对路径
hdfs dfs -mkdir input
- -ls
hdfs dfs -ls /
默认家路径 /user/morningcat
hdfs dfs -ls
- -put
hdfs dfs -put etc/hadoop/*.xml input
- -get
hdfs dfs -get output myoutput
- -cat
hdfs dfs -cat output/*
- -touch
hdfs dfs -touch theFile
当使用 hdfs dfs --help
时,提示使用 hadoop fs
替换
Usage: hadoop fs [generic options]
[-appendToFile <localsrc> ... <dst>]
[-cat [-ignoreCrc] <src> ...]
[-checksum <src> ...]
[-chgrp [-R] GROUP PATH...]
[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-copyFromLocal [-f] [-p] [-l] [-d] [-t <thread count>] <localsrc> ... <dst>]
[-copyToLocal [-f] [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-count [-q] [-h] [-v] [-t [<storage type>]] [-u] [-x] [-e] <path> ...]
[-cp [-f] [-p | -p[topax]] [-d] <src> ... <dst>]
[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-df [-h] [<path> ...]]
[-du [-s] [-h] [-v] [-x] <path> ...]
[-expunge]
[-find <path> ... <expression> ...]
[-get [-f] [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-getfacl [-R] <path>]
[-getfattr [-R] {-n name | -d} [-e en] <path>]
[-getmerge [-nl] [-skip-empty-file] <src> <localdst>]
[-head <file>]
[-help [cmd ...]]
[-ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [-e] [<path> ...]]
[-mkdir [-p] <path> ...]
[-moveFromLocal <localsrc> ... <dst>]
[-moveToLocal <src> <localdst>]
[-mv <src> ... <dst>]
[-put [-f] [-p] [-l] [-d] <localsrc> ... <dst>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]
[-rm [-f] [-r|-R] [-skipTrash] [-safely] <src> ...]
[-rmdir [--ignore-fail-on-non-empty] <dir> ...]
[-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
[-setfattr {-n name [-v value] | -x name} <path>]
[-setrep [-R] [-w] <rep> <path> ...]
[-stat [format] <path> ...]
[-tail [-f] <file>]
[-test -[defsz] <path>]
[-text [-ignoreCrc] <src> ...]
[-touch [-a] [-m] [-t TIMESTAMP ] [-c] <path> ...]
[-touchz <path> ...]
[-truncate [-w] <length> <path> ...]
[-usage [cmd ...]]
- -mkdir
hadoop fs -mkdir /home
hadoop fs -ls /
递归创建
hadoop fs -mkdir -p /path1/path2
- -put
hadoop fs -put /usr/local/hadoop-3.2.0/etc/hadoop/core-site.xml /path1/path2
- -get
hadoop fs -get /path1/path2 mypath
- -cat
hadoop fs -cat /path1/path2/core-site.xml
- -touch
hadoop fs -touch theFile
java
依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.2.0</version>
</dependency>
创建文件夹
FileSystem fs = null;
@Before
public void init() throws Exception {
Configuration conf = new Configuration();
//conf.set("fs.defaultFS", "hdfs://hadoop1:9000");
// 参数优先级: 1、客户端代码中设置的值 2、classpath下的用户自定义配置文件 3、然后是服务器的默认配置
// 获取一个hdfs的访问客户端,根据参数,这个实例应该是DistributedFileSystem的实例
//fs = FileSystem.get(conf);
// 如果这样去获取,那conf里面就可以不要配"fs.defaultFS"参数,而且,这个客户端的身份标识已经是hadoop用户
fs = FileSystem.get(new URI("hdfs://hadoop1:9000"), conf, "morningcat");
}
@After
public void destory() throws Exception {
fs.close();
}
@Test
public void mkdir() throws Exception {
Path path = new Path("/hello");
fs.mkdirs(path);
}
// ...