Hadoop --hdfs 简单操作
前言
引入Hadoop依赖包
<!--添加Hadoop依赖包-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
引入cdh的仓库
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
hdfs的基本操作命令说明
hadoop常用命令:
hadoop fs -ls / 查看指定目录下内容
hadoop fs -put 将本地文件存储至hadoop
hadoop fs -copyFromLocal 本地文件系统复制文件到HDFS文件系统
hadoop fs -moveFromLocal 本地文件系统移到文件到HDFS文件系统
hadoop fs -cat 打开某个已存在文件
hadoop fs -text 看指定目录下内容
hadoop fs -get 将HDFS中的test.txt复制到本地文件系统中
hadoop fs -mkdir 在hadoop指定目录内创建新目录
hadoop fs -mv 移动/改名
hadoop fs -getmerge 合并文件
hadoop fs -rm 删除hadoop上指定文件
hadoop fs -rmdir 删除hadoop上指定文件夹
代码实现
创建HDFS文件夹
/**
* 创建HDFS文件夹
*/
@Test
public void mkdir() throws Exception {
fileSystem.mkdirs(new Path("/hdfsapi/test"));
}
查看HDFS内容
/**
* 查看HDFS内容
*/
@Test
public void text()throws Exception {
FSDataInputStream in = fileSystem.open(new Path("/cdh_version.properties"));
IOUtils.copyBytes(in, System.out, 1024);
}
创建文件
/**
* 创建文件
*/
@Test
public void create()throws Exception {
// FSDataOutputStream out = fileSystem.create(new Path("/hdfsapi/test/a.txt"));
FSDataOutputStream out = fileSystem.create(new Path("/hdfsapi/test/b.txt"));
out.writeUTF("hello pk: replication 1");
out.flush();
out.close();
}
文件名更改
/**
* 测试文件名更改
* @throws Exception
*/
@Test
public void rename() throws Exception {
Path oldPath = new Path("/hdfsapi/test/b.txt");
Path newPath = new Path("/hdfsapi/test/c.txt");
boolean result = fileSystem.rename(oldPath, newPath);
System.out.println(result);
}
拷贝本地文件到HDFS文件系统
/**
* 拷贝本地文件到HDFS文件系统
*/
@Test
public void copyFromLocalFile() throws Exception {
Path src = new Path("/Users/rocky/data/hello.txt");
Path dst = new Path("/hdfsapi/test/");
fileSystem.copyFromLocalFile(src,dst);
}
拷贝大文件到HDFS文件系统:带进度
@Test
public void copyFromLocalBigFile() throws Exception {
InputStream in = new BufferedInputStream(new FileInputStream(new File("/Users/rocky/tmp/software/jdk-8u91-linux-x64.tar.gz")));
FSDataOutputStream out = fileSystem.create(new Path("/hdfsapi/test/jdk.tgz"),
new Progressable() {
public void progress() {
System.out.print(".");
}
});
IOUtils.copyBytes(in, out ,4096);
}
拷贝HDFS文件到本地:下载
@Test
public void copyToLocalFile() throws Exception {
Path src = new Path("/hdfsapi/test/hello.txt");
Path dst = new Path("/Users/rocky/tmp/software");
fileSystem.copyToLocalFile(src, dst);
}
查看目标文件夹下的所有文件
@Test
public void listFiles() throws Exception {
FileStatus[] statuses = fileSystem.listStatus(new Path("/hdfsapi/test"));
for(FileStatus file : statuses) {
String isDir = file.isDirectory() ? "文件夹" : "文件";
String permission = file.getPermission().toString();
short replication = file.getReplication();
long length = file.getLen();
String path = file.getPath().toString();
System.out.println(isDir + "\t" + permission
+ "\t" + replication + "\t" + length
+ "\t" + path
);
}
}
递归查看目标文件夹下的所有文件
@Test
public void listFilesRecursive() throws Exception {
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(new Path("/hdfsapi/test"), true);
while (files.hasNext()) {
LocatedFileStatus file = files.next();
String isDir = file.isDirectory() ? "文件夹" : "文件";
String permission = file.getPermission().toString();
short replication = file.getReplication();
long length = file.getLen();
String path = file.getPath().toString();
System.out.println(isDir + "\t" + permission
+ "\t" + replication + "\t" + length
+ "\t" + path
);
}
}
查看文件块信息
public void getFileBlockLocations() throws Exception {
FileStatus fileStatus = fileSystem.getFileStatus(new Path("/hdfsapi/test/jdk.tgz"));
BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus,0,fileStatus.getLen());
for(BlockLocation block : blocks) {
for(String name: block.getNames()) {
System.out.println(name +" : " + block.getOffset() + " : " + block.getLength() + " : " + block.getHosts());
}
}
}
结语
本实例提前配置完成Hadoop配置的,只是HDFS的基础使用