由于线上的MySQL实时表数据量太大,即使建了索引查询速度也不理想,上周下班前经理让我对线上MySQL的七张源数据层面的实时表进行归档,现表仅保留近三天的数据,三天之前的数据全部归档到历史表中
一、基本思想
考虑到按照时间进行归档,因此MySQL按时间创建分区表,并且动态维护每张历史表的分区,将三天前的数据插入到历史表中,根据时间的不同会落到不同的分区中;校验数据量在没有丢失的情况下删除原表数据并记录日志。
二、前期准备
所谓磨刀不误砍柴工,首先需要了解MySQL分区表的理论、如何创建分区表、如何动态维护分区表…
2.1 MySQL创建分区表
原表的字段基本都是varchar
类型,为了方便分区创建历史表的同时增加一个归档日期字段,并按照该字段进行分区给定一个初始分区
DROP TABLE IF EXISTS `aj_gaaj_archive `;
CREATE TABLE `aj_gaaj_archive `
(
...,
`BACKUP_TIME` date comment '归档日期'
) PARTITION BY RANGE (to_days(`BACKUP_TIME`))(
partition p20201224 values less than (to_days('20201225'))
);
查看分区表的分区情况
SELECT partition_name part,
partition_expression expr,
partition_description descr,
FROM_DAYS(partition_description) lessthan_sendtime,
table_rows
FROM INFORMATION_SCHEMA.partitions
WHERE TABLE_SCHEMA = SCHEMA()
AND TABLE_NAME = 'aj_gaaj_archive';
2.2 MySQL分区表维护
使用过hive
的都知道,hive
是可以进行动态分区的,根据数据的不同动态的添加分区,据了解MySQL不支持这种功能,因此需要我们手动的去增加分区,从一位老哥的博客中拔下一段方便维护分区表的存储过程 [传送门]
create procedure auto_set_partitions(in databasename varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
in tablename varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
in partition_number int, in partitiontype int, in gaps int)
L_END:
begin
declare max_partition_description varchar(255) default '';
declare p_name varchar(255) default 0;
declare p_description varchar(255) default 0;
declare isexist_partition varchar(255) default 0;
declare i int default 1;
-- 查看对应数据库对应表是否已经有手动分区[自动分区前提是必须有手动分区]
select partition_name
into isexist_partition
from information_schema.partitions
where table_schema = databasename
and table_name = tablename
limit 1;
-- 如果不存在则打印错误并退出存储过程
if isexist_partition <=> '' then
select 'partition table not is exist' as "ERROR";
leave L_END;
end if;
-- 获取最大[降序获取]的分区描述[值]
select partition_description
into max_partition_description
from information_schema.partitions
where table_schema = databasename
and table_name = tablename
order by partition_description desc
limit 1;
-- 如果最大分区没有,说明没有手动分区,则无法创建自动分区
if max_partition_description <=> '' then
select 'partition table is error' as "ERROR";
leave L_END;
end if;
-- 替换前后的单引号[''两个引号表示一个单引号的转义]
-- set max_partition_description = REPLACE(max_partition_description, '''', '');
-- 或使用如下语句
set max_partition_description = REPLACE(max_partition_description - 1, '\'', '');
-- 自动创建number个分区
while (i <= partition_number)
do
if (partitiontype = 0) then
-- 每个分区按天递增,递增gaps天
set p_description = DATE_ADD(FROM_DAYS(max_partition_description), interval i * gaps day);
elseif (partitiontype = 1) then
-- 每个分区按月递增,递增gaps月
set p_description = DATE_ADD(FROM_DAYS(max_partition_description), interval i * gaps month);
else
-- 每个分区按年递增,递增gaps年
set p_description = DATE_ADD(FROM_DAYS(max_partition_description), interval i * gaps year);
end if;
-- 删除空格
set p_name = REPLACE(p_description, ' ', '');
-- 例如10.20的记录实际是less than 10.21
set p_description = DATE_ADD(p_description, interval 1 day);
-- 如果有横杆替换为空
set p_name = REPLACE(p_name, '-', '');
-- 删除时间冒号
set p_name = REPLACE(p_name, ':', '');
-- alter table tablename add partition ( partition pname values less than ('2017-02-20 10:05:56') );
set @sql = CONCAT('ALTER TABLE ', tablename, ' ADD PARTITION ( PARTITION p', p_name,
' VALUES LESS THAN (TO_DAYS(\'', p_description, '\')))');
-- set @sql=CONCAT('ALTER TABLE ', tablename ,' ADD PARTITION ( PARTITION p', p_name ,' VALUES LESS THAN (TO_DAYS(\'', p_description ,'\')))');
-- 打印sql变量
-- select @sql;
-- 准备sql语句
PREPARE stmt from @sql;
-- 执行sql语句
EXECUTE stmt;
-- 释放资源
DEALLOCATE PREPARE stmt;
-- 递增变量
set i = (i + 1);
end while;
end;
三、本地实施
因为归档的表属于ODS级别,不得不慎重啊,稍有不慎就要跑路的