一个大文件,包含很多行,每一行都是int类型的数据,按照从小到大的顺序进行排序
package com.example.test;
import java.io.*;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.Random;
/**
* 大数据排序 合并
*/
public class BigFileSort {
/**
* 大数据文件路径
*/
private static final String SOURCE_FILE_PATH = "/Users/enjoy/Documents/test";
/**
* 中间临时小文件的路径
*/
private static final String TEMP_FILE_PATH = "/Users/enjoy/Documents/test/temp";
/**
* 大数据文件名称
*/
private static final String SOURCE_FILE_NAME = "data";
/**
* 生成的目标文件名
*/
private static final String SORTED_FILE_NAME = "sorted";
/**
* 临时小文件前缀
*/
private static final String TEMP_FILE_NAME_PREFIX = "temp-";
/**
* 文件后缀
*/
private static final String FILE_SUFFIX = ".txt";
/**
* 生成源文件 行数
*/
private static final int SOURCE_DATA_COUNT = 1000000;
/**
* 临时文件数量
*/
private static final int TEMP_FILE_COUNT = 10;
public static void main(String[] args) throws IOException {
long startNumber = System.currentTimeMillis();
String sourceFileName = SOURCE_FILE_PATH + "/" + SOURCE_FILE_NAME + FILE_SUFFIX;
// 生成测试数据
mockBigDataFile(sourceFileName, SOURCE_DATA_COUNT);
System.out.println("存储完毕");
// 将大数据文件分割到若干个小文件中
splitBigFile(SOURCE_FILE_PATH, TEMP_FILE_PATH, TEMP_FILE_COUNT);
System.out.println("文件切割完毕!");
// 把每个文件的数据进行排序
sortTempFile(TEMP_FILE_PATH, TEMP_FILE_COUNT);
System.out.println("每个子文件排序完毕!");
// 排序后的多个文件数据进行整合
mergeTempSortedFile(SOURCE_FILE_PATH, TEMP_FILE_PATH, TEMP_FILE_COUNT);
System.out.println("整合完毕");
long stopNumber = System.currentTimeMillis();
System.out.println("耗时" + (stopNumber - startNumber) + "毫秒");
}
public static void mockBigDataFile(String fileName, int count) throws IOException {
makeSureFileExists(SOURCE_FILE_PATH, fileName);
FileWriter fs = new FileWriter(fileName);
BufferedWriter fw = new BufferedWriter(fs);
for (int i = 0; i < count; i++) {
fw.write(new Random().nextInt(SOURCE_DATA_COUNT) + "\r\n");
}
fw.close();
fs.close();
}
public static void makeSureFileExists(String filePath, String fileName) throws IOException {
File path = new File(filePath);
if (!path.exists()) {
path.mkdirs();
}
File file = new File(fileName);
if (!file.exists()) {
file.createNewFile();
}
}
// 将大数据文件切分到多个小文件中
public static void splitBigFile(String sourceFilePath, String tempFilePath,
int fileCount) throws IOException {
FileReader fr = new FileReader(sourceFilePath + "/" + SOURCE_FILE_NAME + FILE_SUFFIX);
BufferedReader br = new BufferedReader(fr); // 读取获取整行数据
LinkedList<FileWriter> sourceFWList = new LinkedList<>(); //初始化文件流对象集合
LinkedList<BufferedWriter> sourceBWList = new LinkedList<>();
for (int j = 1; j <= fileCount; j++) {
String fileName = tempFilePath + "/" + TEMP_FILE_NAME_PREFIX + j + FILE_SUFFIX;
makeSureFileExists(tempFilePath, fileName);
//声明对象
FileWriter sourceFW = new FileWriter(fileName, false);
BufferedWriter sourceBW = new BufferedWriter(sourceFW);
//将对象装入集合
sourceFWList.add(sourceFW);
sourceBWList.add(sourceBW);
}
int i = 1;
while (br.ready()) {
int count = 1; // 从第一行开始写
for (BufferedWriter type : sourceBWList) {
if (i == count) {
type.write(br.readLine() + "\r\n");
break;
}
// 第一行写完,写第二行
count++;
}
// 一个文件写完一行之后,切换下一个文件
if (i >= fileCount) {
i = 1;
} else {
i++;
}
}
br.close();
fr.close();
for (BufferedWriter object : sourceBWList) {
object.close();
}
for (FileWriter object : sourceFWList) {
object.close();
}
}
// 把每个小文件中的数据进行排序
public static void sortTempFile(String filePath, int fileCount) {
LinkedList<Integer> nums;
for (int i = 1; i <= fileCount; i++) {
nums = new LinkedList<>();
String path = filePath + "/" + TEMP_FILE_NAME_PREFIX + i + FILE_SUFFIX;
try (FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr)) {
while (br.ready()) {
// 将读取的单个数据加入到集合里面
nums.add(Integer.valueOf(br.readLine()));
}
// 对集合进行排序
Collections.sort(nums);
// 将排序好的数据写入源文件
sortedToFile(nums, path);
} catch (NumberFormatException | IOException e) {
e.printStackTrace();
}
}
}
// 对每个文件数据进行排序,写入文件
public static void sortedToFile(LinkedList<Integer> list, String path) {
try (FileWriter fs = new FileWriter(path);
BufferedWriter fw = new BufferedWriter(fs)) {
for (Integer str : list) {
fw.write(str + "\r\n");
}
} catch (IOException e) {
e.printStackTrace();
}
}
// 合并排序后的文件
public static void mergeTempSortedFile(String filepath, String splitFilePath, int fileCount) throws IOException {
LinkedList<ReadNode> readOneLineList = new LinkedList<>();
int hasNoDataCount = 0;
FileWriter sortedFW = new FileWriter(filepath + "/" + SORTED_FILE_NAME + FILE_SUFFIX, false); //创建文件流,以便整合的数据写入
BufferedWriter sortedBW = new BufferedWriter(sortedFW);
LinkedList<BufferedReader> tempFileBR = new LinkedList<>();
LinkedList<FileReader> tempFileFR = new LinkedList<>();
for (int j = 1; j <= fileCount; j++) {
FileReader fr = new FileReader(splitFilePath + "/" + TEMP_FILE_NAME_PREFIX + j + FILE_SUFFIX);
BufferedReader br = new BufferedReader(fr);
tempFileFR.add(fr);
tempFileBR.add(br);
}
for (BufferedReader br : tempFileBR) {
if (br.ready()) {
readOneLineList.add(new ReadNode(Integer.valueOf(br.readLine()), br));
continue;
}
if (!br.ready()) {
hasNoDataCount++;
}
}
readOneLineList.sort(Comparator.comparingInt(o -> o.value));
for (; ; ) {
if (hasNoDataCount == fileCount) {
break;
}
ReadNode rn = readOneLineList.get(0);
sortedBW.write(rn.value + "\r\n");
if (!rn.getBr().ready()) {
hasNoDataCount++;
readOneLineList.remove(0);
continue;
}
if (rn.getBr().ready()) {
rn.setValue(Integer.valueOf(rn.getBr().readLine()));
readOneLineList.sort(Comparator.comparingInt(o -> o.value));
}
}
sortedBW.close();
sortedFW.close();
for (BufferedReader object2 : tempFileBR) {
object2.close();
}
for (FileReader object : tempFileFR) {
object.close();
}
}
}
class ReadNode {
Integer value;
BufferedReader br;
public ReadNode(Integer value, BufferedReader br) {
this.value = value;
this.br = br;
}
public void setValue(Integer value) {
this.value = value;
}
public BufferedReader getBr() {
return br;
}
}