mysql中百万级数据查询sql优化

2022-06-07 00:14:56

1.在做项目的时候遇到这样的问题就是：当数据达到百万级的时候分页查询的速率非常慢，下面个给直观的现象截图：测试数据为500W条数据

　　平时在分页的时候这么查询总数的：但是当数据达到百万级的时候会发现致命问题

SELECT COUNT(*) from test

可以直观看到查询时间达到近乎20S，啥意思你懂的，客户点一下要等待这么长的时间直接导致超时，这是不能容忍的，但是加一点就可以提高10倍查询速率，下面截图可以很清晰看到，所以以后在这么写的要注意别给自己挖坑，当然有其他条件在后面加就可以了

2.本文不是要优化这个问题，而是再次基础上更加优化，正常分页都是先查询数据然后再查询总数，要查2次，后面介绍查询一次就可以解决：

（1）建立自己的表，表中达到500W数据左右即可，添加方法很多，我是在idea中写的代码加进去的，挺慢的，有好的方法欢迎提出来；

（2）表建立完成后就开始写代码了：

　　本次的优化在于mysql的 SQL_CALC_FOUND_ROWS函数，这个是什么可以自行查阅下相关资料，下面基于mybatias，springboot上代码讲述；

（3）开始之前需要注意，需要在配置文件application.yml的数据源加上 &allowMultiQueries=true如下，加这个是可以执行多条sql不然会报错！！！

jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowMultiQueries=true

(4)然后就是xml文件，主要这么写

　　<sql id="whereCaus">
        <if test="name != null">
            and name like '%${name}%'
        </if>
    </sql>

　　<resultMap id="count" type="java.lang.Integer">
        <result column="count" />
    </resultMap>
    <resultMap id="BaseResultMap" type="com.example.demo.model.Test">
        <result column="id" jdbcType="INTEGER" property="id" />
        <result column="name" jdbcType="VARCHAR" property="name" />
    </resultMap>

    <select id="getListData" resultMap="BaseResultMap,count">
           SELECT SQL_CALC_FOUND_ROWS name,id
            FROM test
        <where>
            <include refid="whereCaus"/>
        </where>
        order by id
            LIMIT #{startOff},#{pageSize};
           SELECT FOUND_ROWS() as count;
    </select>
    

    <select id="getListData2" resultType="com.example.demo.model.Test">
        SELECT name,id
        FROM test
        <where>
          <include refid="whereCaus"/>
        </where>
        order by id
        LIMIT #{startOff},#{pageSize};
    </select>

    <select id="getTotal" resultType="java.lang.Integer">
       select count(id) from test where id >0
        <include refid="whereCaus"/>
    </select>

<select id="getTotal2" resultType="java.lang.Integer">
    select count(id) from test
    <where>
        <include refid="whereCaus"/>
    </where>
</select>

　　dao层：(getTotal2这边就不做测试了，前面试过了执行时间达到20S可能更久肯定不行的，可以自行试试)

List<?> getListData(ParamsDto paramsDto);//返回类型必须这么写，否则会出问题

int getTotal(String name); 
List<Test> getListData2(ParamsDto paramsDto);

　　controller层会解析数据，ParamsDto参数对象的话就3个参数private String name;private Integer pageSize;private Integer startOff;自行构建即可

package com.example.demo.controller;


import com.example.demo.dto.ParamsDto;
import com.example.demo.mapper.TestMapper;
import com.example.demo.model.Test;
import com.example.demo.run.RunScan;
import com.example.demo.utils.JsonResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RequestMapping;

import org.springframework.web.bind.annotation.RestController;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * <p>
 *  前端控制器
 * </p>
 *
 * @author lxp
 * @since 2021-12-29
 */
@RestController
@RequestMapping("/test")
public class TestController {
    @Autowired
    private TestMapper testMapper;
    @RequestMapping("/v1.do")
    public  JsonResult test1(ParamsDto paramsDto) {
        Integer currentPage = paramsDto.getStartOff();
        //每页显示数量
        Integer pageSize = paramsDto.getPageSize();
        paramsDto.setName(paramsDto.getName());
        paramsDto.setPageSize(pageSize);
        paramsDto.setStartOff((currentPage-1)*pageSize);
        long startTime = System.currentTimeMillis();    //获取开始时间
        List<?> listData = testMapper.getListData(paramsDto);
        long endTime = System.currentTimeMillis();    //获取结束时间
        //接收count数据
        Integer totalCount = ((List<Integer>) listData.get(1)).get(0);
        Integer totalPage = totalCount % pageSize == 0 ?
                totalCount / pageSize :
                totalCount / pageSize + 1;
        List<Test> studentManageVoList = (List<Test>)listData.get(0);

        Map map = new HashMap();
        map.put("listData",studentManageVoList);
        map.put("totalPage",totalPage);
        map.put("finishTime",(endTime - startTime) + "ms");
        return JsonResult.successResult(map);
    }
    @RequestMapping("/v2.do")
    public JsonResult test2(ParamsDto paramsDto){
        Integer currentPage = paramsDto.getStartOff();
        //每页显示数量
        Integer pageSize = paramsDto.getPageSize();
        paramsDto.setName(paramsDto.getName());
        paramsDto.setPageSize(pageSize);
        paramsDto.setStartOff((currentPage-1)*pageSize);
        long startTime = System.currentTimeMillis();    //获取开始时间
        List<Test> listData2 = testMapper.getListData2(paramsDto);
        int totalCount = testMapper.getTotal(paramsDto.getName());
        long endTime = System.currentTimeMillis();    //获取结束时间

        Integer totalPage = totalCount % pageSize == 0 ?
                totalCount / pageSize :
                totalCount / pageSize + 1;
        Map map = new HashMap();
        map.put("listData",listData2);
        map.put("totalPage",totalPage);
        map.put("finishTime",(endTime - startTime) + "ms");
        return JsonResult.successResult(map);

    }
}

可以对比下这两个方法的优劣，测试的结果显而易见/test/v1.do的接口速度比/test/v2.do速度要快，差不多2倍速度

下面实验下对比这两个接口访问时间：

就此可以得到简单结论，使用SELECT SQL_CALC_FOUND_ROWS * .... ；SELECT FOUND_ROWS() as count；相对于平时使用select count(*)查询总数+查询数据效率上快上快1倍的速度，分页到后面的越明显，分页靠前的可能差不多，测试的时候可以自行实验，当然2S的时间要是还能在优化就更好了，小伙伴有更加好的建议可以一起探讨，感谢观看！！！

码农公寓

相关文章