项目需求中,简历信息涉及10张表,需要联查获取组装数据,实测,查询一条数据需要5s,大概算了一下总时间的话需要60个小时左右导入完成。
优化方法:数据分段+多线程
1、10万条数据为例,数据分成10份,每一份10000条;
2、每一份数据起一个线程,10个线程,
代码大致如下:
public Result putAllJobUserInfo() throws ExecutionException, InterruptedException {
long l = System.currentTimeMillis();
threadExecutorService.runSubmit(() -> this.putEsUserInfo(1, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(2, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(3, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(4, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(5, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(6, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(7, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(8, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(9, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(10, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(11, 10000));
threadExecutorService.runSubmit(() -> this.putEsUserInfo(12, 10000));
long l1 = System.currentTimeMillis();
System.out.println(l1 - l);
return Result.ok();
}
private Result putEsUserInfo(Integer page, Integer limit) {
int count = 0;
PageHelper.startPage(page, limit);
List<Long> ids = jobUserMapper.getUserIdList();
for (Long id : ids) {
saveEsJobUserInfo(id);
count++;
logger.info("线程#" + page + ":已导入:[" + count + "]条数据;用户id为{" + id + "}");
}
logger.info("{全部完成 ==>:}线程# " + page + " #全部完成,总数为" + count);
return Result.ok("{全部完成 ==>:}线程# " + page + " #全部完成,总数为" + count);
}