生成测试文件
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Random;
public class WriteBigFile {
public static void main(String[] args) throws Exception {
long startTime = System.currentTimeMillis();
File file = new File("D:\\testFile\\test.txt");
OutputStream out = new FileOutputStream(file);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out, 1024*1024*50); //缓冲区设置为50mb
Random random = new Random();
String index;
for (int i = 1; i < 873810000;i++ ) { //大概 833 *1024 * 1024
index = random.nextInt(10000)+","; //每次最多五个字符
bufferedOutputStream.write(index.getBytes());
}
bufferedOutputStream.close();//文件大小为833 *1024 * 1024 * 5左右 ==> 大概4G
long endTime = System.currentTimeMillis();
System.out.println( (endTime - startTime) );
}
}
读文件
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
/***
* 读文件,并将大文件分隔成小文件
* @author
*
*/
public class ReadBigFile {
public static void main(String[] args) throws Exception {
HashMap<Integer, List<String>> map = new HashMap<Integer, List<String>>();
long start = System.currentTimeMillis();
String filePath = "D:\\testFile";
File file = new File("D:\\testFile\\test.txt");
// 读取字符流
FileReader fileReader = new FileReader(file);
BufferedReader bufferedReader = new BufferedReader(fileReader);
StringBuffer strBuffer = new StringBuffer();
int i;
char ch;
int num;
int remainder;
while ((i = bufferedReader.read()) != -1) {
ch = (char) i;
if (ch != ',') {
strBuffer.append((char) ch);
continue;
}
// 写入文件
num = Integer.valueOf(strBuffer.toString());
remainder = num % 1000;
if (map.containsKey(remainder)) {
List<String> list = map.get(remainder);
list.add(num + ",");
if(list.size() > 1024 ) {
writeChildFiles(filePath+"\\child_"+remainder+".txt",list);
}
} else {
ArrayList<String> list = new ArrayList<String>(1024);
list.add(num + ",");
map.put(remainder, list);
}
strBuffer.delete(0, strBuffer.length());
}
bufferedReader.close();
long end = System.currentTimeMillis();
System.out.println(end - start);
}
public static void writeChildFiles(String filePath,List<String> data) throws Exception {
File file = new File(filePath);
OutputStream out = new FileOutputStream(file,true);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(out);
int len = data.size();
for (int i = 0; i < len; i++) {
bufferedOutputStream.write(data.get(i).getBytes());
}
bufferedOutputStream.close();
data.clear();
}
}
获取出现次数最多的数
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/***
* 获取子文件中,出现次数最多的数
* @author
*
*/
public class getChildFileMaxNum {
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
File file = new File("D:\\testFile");
Map<Integer, Integer> map = new HashMap<Integer,Integer>();
Set<Integer> keySet = null;
Map<Integer, Integer> childMap = null;
if(file.isDirectory()) {
String[] list = file.list();
String path = file.getPath();
for (int i = 0; i < list.length; i++) {
if( !list[i].startsWith("child")) {
continue;
}
childMap = readChildFileAndWriteMax(path+"\\"+list[i]);
keySet = childMap.keySet();
for (Integer integer : keySet) {
if(map.containsKey(integer)) {
map.replace(integer, childMap.get(integer)+map.get(integer));
}else {
map.put(integer, childMap.get(integer));
}
}
}
}
int max = 0;
int key = -999;
keySet = map.keySet();
for (Integer integer : keySet) {
if(map.get(integer) > max ) {
max = map.get(integer);
key = integer;
}
}
System.out.println("max:"+max+",key:"+key);
long end = System.currentTimeMillis();
System.out.println(end-start);
}
private static Map<Integer, Integer> readChildFileAndWriteMax(String filePath) throws Exception {
Map<Integer, Integer> map = new HashMap<Integer, Integer>();
File file = new File(filePath);
FileReader fileReader = new FileReader(file);
BufferedReader bufferedReader = new BufferedReader(fileReader);
StringBuffer strBuffer = new StringBuffer();
int i;
char ch;
int num;
int remainder;
while ((i = bufferedReader.read()) != -1) {
ch = (char) i;
if (ch != ',') {
strBuffer.append((char) ch);
continue;
}
num = Integer.valueOf(strBuffer.toString());
if( map.containsKey(num) ) {
map.replace(num, map.get(num)+1);
}else {
map.put(num, 1);
}
strBuffer.delete(0, strBuffer.length());
}
int max = 0;
int key = -999;
Set<Integer> keySet = map.keySet();
for (Integer integer : keySet) {
if(map.get(integer) > max ) {
max = map.get(integer);
key = integer;
}
}
bufferedReader.close();
map.clear();
map.put(key, max);
//这里可以删除子文件
return map;
}
}
测试结果:
分隔文件大概花了10分钟
从1000个文件中找出出现次数最多的数大概花费1分钟