性能优化
策略
分表
3.PostgreSQL 创建分区表,SQL优化之PostgreSQL Table Partitioning
4.PostgreSQL 9.5+ 高效分区表实现 - pg_pathman
测试
EXPLAIN ANALYZE
模拟数据
-- 范例
select
(select
array_agg(i::date)
from generate_series('2015-12-01'::date, '2015-12-30'::date, '1 day'::interval) as t(i)
)[floor(random()*4)+1] as date_key,
floor(random()*24) as hour_key,
floor(random()*1000000)+1 as client_key,
floor(random()*100000)+1 as item_key,
floor(random()*20)+1 as account,
floor(random()*10000)+1 as expense
from
generate_series(1,30,1);
分表[触发器&索引]:
-- 添加主表---------------------------------------------------------------------------------------------------
create sequence seq_auth_detail cycle;
create table tb_auth_detail
(
auth_detail_id integer not null,
person_id varchar(48) not null,
dept_id varchar(48) not null,
dev_id varchar(48) not null,
door_id varchar(48) not null,
auth_status integer not null,
finger_status integer not null,
face_status integer not null,
del_card_number varchar(1024),
auth_person_schedule_id varchar(48),
auth_dept_schedule_id varchar(48),
auth_person_grp_schedule_id varchar(48),
config_time timestamp not null,
download_time timestamp,
dev_group_id integer not null,
download_code varchar(48),
constraint pk_auth_detail primary key (auth_detail_id)
);
comment on table tb_auth_detail is '权限记录表';
comment on column tb_auth_detail.auth_detail_id is '主键';
comment on column tb_auth_detail.person_id is '人员id';
comment on column tb_auth_detail.dept_id is '组织id';
comment on column tb_auth_detail.dev_id is '设备id';
comment on column tb_auth_detail.door_id is '门id';
comment on column tb_auth_detail.auth_status is '权限状态1:待下载;2:待删除;3:已下载;4:已删除;5:正在下载;6:正在删除';
comment on column tb_auth_detail.finger_status is '指纹权限状态1:待下载;2:待删除;3:已下载;4:已禁用;5:正在下载;6:正在删除';
comment on column tb_auth_detail.face_status is '人脸权限状态1:待下载;2:待删除;3:已下载;4:已禁用;5:正在下载;6:正在删除';
comment on column tb_auth_detail.del_card_number is '待删除的卡号';
comment on column tb_auth_detail.auth_person_schedule_id is '人员计划模板';
comment on column tb_auth_detail.auth_dept_schedule_id is '组织计划模板';
comment on column tb_auth_detail.auth_person_grp_schedule_id is '人组计划模板';
comment on column tb_auth_detail.config_time is '配置时间';
comment on column tb_auth_detail.download_time is '下载时间';
comment on column tb_auth_detail.dev_group_id is '设备组id';
comment on column tb_auth_detail.download_code is '下载标识';
-- 添加触发器---------------------------------------------------------------------------------------------------
-- tb_auth_detail权限记录表进行表分区设置:begin
create or replace function func_tri_auth_detail() returns trigger as
$$
declare my_tbname varchar(128);
declare my_dev_grp_id varchar(64);
declare sql_str text;
begin
my_tbname = TG_TABLE_NAME || '_' || NEW.dev_group_id::text;
sql_str = 'INSERT INTO '||my_tbname ||' SELECT $1.* ';
execute sql_str using NEW;
return null;
exception when undefined_table then
begin
my_dev_grp_id = NEW.dev_group_id::text;
execute 'create table ' ||my_tbname || '(constraint ck_'|| my_tbname ||' check (dev_group_id =' || my_dev_grp_id || ')) inherits ('|| tg_table_name || ')';
execute 'alter table '||my_tbname ||' add constraint pk_'||my_tbname||' primary key (auth_detail_id)';
execute 'create index idx_'||my_tbname ||'_pid on ' || my_tbname || ' (person_id)';
execute 'create index idx_'||my_tbname ||'_did on ' || my_tbname || ' (dept_id)';
execute 'create index idx_'||my_tbname ||'_devid on ' || my_tbname || ' (dev_id)';
execute 'create index idx_'||my_tbname ||'_doorid on ' || my_tbname || ' (door_id)';
execute 'create index idx_'||my_tbname ||'_sid on ' || my_tbname || ' (dev_group_id)';
execute 'create index idx_'||my_tbname ||'_conf on ' || my_tbname || ' (config_time)';
execute sql_str using NEW;
return null;
exception when others then
execute sql_str using NEW;
return null;
end;
end;
$$ language plpgsql;
create trigger tri_ins_auth_detail BEFORE insert on tb_auth_detail for each row EXECUTE PROCEDURE func_tri_auth_detail();
-- tb_auth_detail权限记录表进行表分区设置:end
-- 定义ID自增序列---------------------------------------------------------------------------------------------------
alter table tb_auth_detail alter column auth_detail_id set default nextval('seq_auth_detail');
-- 添加索引---------------------------------------------------------------------------------------------------
create index idx_auth_detail_person_id on tb_auth_detail (person_id);
create index idx_auth_detail_dept_id on tb_auth_detail (dept_id);
create index idx_auth_detail_dev_id on tb_auth_detail (dev_id);
create index idx_auth_detail_door_id on tb_auth_detail (door_id);
create index idx_auth_detail_sdev_group_id on tb_auth_detail (dev_group_id);
create index idx_auth_detail_config_time on tb_auth_detail (config_time);
模拟数据构造:
-- N:数据量
-- 随机截取: N=1
select substr('cc3483bf-9d1f-4eaa-a6e2-a376ba312ff1',1,(random()*26)::integer);
-- 随机复制: N=1
select repeat('cc3483bf-9d1f-4eaa-a6e2-a376ba312ff1',(random()*40)::integer);
-- 生成序列
SELECT * FROM generate_series(1,5);
select id from generate_series(1,10) t(id); -- 自定义column
-- 生成随机数
select (random()*100)::int from generate_series(1,10);
-- 序列、随机字符串、时间戳
select generate_series(1,100000),md5(random()::text),clock_timestamp();
-- 生成时间
SELECT date(generate_series(now(), now() + '1 week', '1 day'));
-- 模拟随机数据
select
md5(random()::text) as person_id,
md5(random()::text) as dept_id,
md5(random()::text) as dev_id,
md5(random()::text) as door_id,
floor(random()*2)+1 as auth_status,
floor(random()*3)+1 as finger_status,
floor(random()*4)+1 as face_status,
floor(random()*5)+1 as del_card_number,
md5(random()::text) as auth_person_schedule_id,
md5(random()::text) as auth_dept_schedule_id,
md5(random()::text) as auth_person_grp_schedule_id,
clock_timestamp() as config_time,
clock_timestamp() as download_time,
md5(random()::text) as dev_group_id,
md5(random()::text) as download_code
from
generate_series(1,10,1);
数据插入Example
explain analyze insert into tb_auth_detail(
"person_id",
"dept_id",
"dev_id",
"door_id",
"auth_status",
"finger_status",
"face_status",
"del_card_number",
"auth_person_schedule_id",
"auth_dept_schedule_id",
"auth_person_grp_schedule_id",
"config_time",
"download_time",
"dev_group_id",
"download_code"
)
select
floor(random()*1000000)+1 as person_id,
floor(random()*1000000)+1 as dept_id,
floor(random()*1000000)+1 as dev_id,
floor(random()*1000000)+1 as door_id,
floor(random()*2)+1 as auth_status,
floor(random()*3)+1 as finger_status,
floor(random()*4)+1 as face_status,
floor(random()*5)+1 as del_card_number,
floor(random()*1000000)+1 as auth_person_schedule_id,
floor(random()*1000000)+1 as auth_dept_schedule_id,
floor(random()*1000000)+1 as auth_person_grp_schedule_id,
clock_timestamp() as config_time,
now() as download_time,
floor(random()*200)+1 as dev_group_id,
floor(random()*1000000)+1 as download_code
from
generate_series(1,100000,1);
性能分析
1.创建相同表结构的tb_auth_detail_all不进行分表
2.分别插入10W、100W、1000W数据进行测试
10W 数据
插入性能分析
- 分表
-- 0数据[初次建分区表耗时]
Insert on tb_auth_detail (cost=0.00..242.50 rows=1000 width=120) (actual time=118005.833..118005.833 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=18.495..7147.848 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=14.280..461.711 rows=100000 loops=1)
Planning time: 0.248 ms
Trigger tri_ins_auth_detail: time=110770.614 calls=100000
Execution time: 118008.532 ms
-- 已有10W
Insert on tb_auth_detail (cost=0.00..242.50 rows=1000 width=120) (actual time=33649.149..33649.149 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=13.439..1652.186 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=13.396..411.380 rows=100000 loops=1)
Planning time: 0.187 ms
Trigger tri_ins_auth_detail: time=31926.839 calls=100000
Execution time: 33651.926 ms
- 单表
-- 0数据
Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=14424.540..14424.540 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=20.354..1273.261 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=19.205..273.179 rows=100000 loops=1)
Planning time: 0.552 ms
Execution time: 14429.240 ms
-- 已有10W
Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=25942.760..25942.760 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=14.394..1307.630 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=14.282..283.456 rows=100000 loops=1)
Planning time: 0.281 ms
Execution time: 25948.271 ms
搜索性能分析
- 分表
explain analyze select * from tb_auth_detail_all;
Seq Scan on tb_auth_detail_all (cost=0.00..2635.00 rows=100000 width=86) (actual time=0.007..6.387 rows=100000 loops=1)
Planning time: 2.292 ms
Execution time: 7.462 ms
- 单表
explain analyze select * from tb_auth_detail;
-> Seq Scan on tb_auth_detail_172 (cost=0.00..14.22 rows=522 width=86) (actual time=0.004..0.172 rows=522 loops=1)
....
-> Seq Scan on tb_auth_detail_100 (cost=0.00..14.06 rows=506 width=86) (actual time=0.004..0.099 rows=506 loops=1)
-> Seq Scan on tb_auth_detail_72 (cost=0.00..12.80 rows=480 width=86) (actual time=0.005..0.197 rows=480 loops=1)
Planning time: 419.074 ms
Execution time: 79.472 ms
配置约束
官网解释 (避免扫描 PostgreSQL 分区表所有分区 )
constraint_exclusion 的含义是:当PG生产执行计划时是否考虑表上的约束,这个参数有三个选项
"off,on ,partition" ,默认参数为 off, 意思不使用表上的 constraint 来生成计划,如果设置成
on ,则对所有表生效,生成 PLAN 时会考虑表上的 constraint, 建议设置成 partition,只对分区表
生效,从而避免扫描分区表所有分区。
-- constraint_exclusion = partition # on, off, or partition
set constraint_exclusion = off;
10W数据
插入
-- off
Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=7993.545..7993.545 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=13.112..1516.601 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=12.765..287.764 rows=100000 loops=1)
Planning time: 0.402 ms
Execution time: 7997.900 ms
-- on
Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=8746.615..8746.615 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=13.932..1342.997 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=13.331..275.213 rows=100000 loops=1)
Planning time: 0.732 ms
Execution time: 8751.193 ms
-- partition
Insert on tb_auth_detail_all (cost=0.00..242.50 rows=1000 width=120) (actual time=10885.670..10885.670 rows=0 loops=1)
-> Subquery Scan on "*SELECT*" (cost=0.00..242.50 rows=1000 width=120) (actual time=14.209..1342.818 rows=100000 loops=1)
-> Function Scan on generate_series (cost=0.00..145.00 rows=1000 width=0) (actual time=14.165..290.517 rows=100000 loops=1)
Planning time: 0.124 ms
Execution time: 10888.549 ms
搜索
explain analyze select * from tb_auth_detail where dev_group_id = 21;
-- off
Append (cost=0.00..882.51 rows=1176 width=87) (actual time=10.694..13.909 rows=976 loops=1)
-> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.003..0.003 rows=0 loops=1)
Filter: (dev_group_id = 21)
-> Index Scan using idx_tb_auth_detail_192_sid on tb_auth_detail_192 (cost=0.28..4.29 rows=1 width=86) (actual time=0.215..0.215 rows=0 loops=1)
Index Cond: (dev_group_id = 21)
...
-> Index Scan using idx_tb_auth_detail_39_sid on tb_auth_detail_39 (cost=0.28..4.29 rows=1 width=86) (actual time=0.041..0.041 rows=0 loops=1)
Index Cond: (dev_group_id = 21)
Planning time: 652.183 ms
Execution time: 17.949 ms
-- on
Append (cost=0.00..28.20 rows=977 width=87) (actual time=0.110..1.498 rows=976 loops=1)
-> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.003..0.003 rows=0 loops=1)
Filter: (dev_group_id = 21)
-> Seq Scan on tb_auth_detail_21 (cost=0.00..28.20 rows=976 width=86) (actual time=0.105..1.472 rows=976 loops=1)
Filter: (dev_group_id = 21)
Planning time: 536.421 ms
Execution time: 2.257 ms
-- partition
Append (cost=0.00..28.20 rows=977 width=87) (actual time=0.009..0.170 rows=976 loops=1)
-> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.001..0.001 rows=0 loops=1)
Filter: (dev_group_id = 21)
-> Seq Scan on tb_auth_detail_21 (cost=0.00..28.20 rows=976 width=86) (actual time=0.007..0.153 rows=976 loops=1)
Filter: (dev_group_id = 21)
Planning time: 409.550 ms
Execution time: 0.841 ms
explain analyze select * from tb_auth_detail_all where dev_group_id = 20;
-- off
Bitmap Heap Scan on tb_auth_detail_all (cost=68.16..2746.17 rows=999 width=86) (actual time=1.079..48.217 rows=993 loops=1)
Recheck Cond: (dev_group_id = 20)
Heap Blocks: exact=876
-> Bitmap Index Scan on idx_auth_detail_all_sdev_group_id (cost=0.00..67.91 rows=999 width=0) (actual time=0.901..0.901 rows=993 loops=1)
Index Cond: (dev_group_id = 20)
Planning time: 2.386 ms
Execution time: 48.690 ms
-- on
Append (cost=0.00..28.20 rows=977 width=87) (actual time=0.110..1.498 rows=976 loops=1)
-> Seq Scan on tb_auth_detail (cost=0.00..0.00 rows=1 width=1464) (actual time=0.003..0.003 rows=0 loops=1)
Filter: (dev_group_id = 21)
-> Seq Scan on tb_auth_detail_21 (cost=0.00..28.20 rows=976 width=86) (actual time=0.105..1.472 rows=976 loops=1)
Filter: (dev_group_id = 21)
Planning time: 536.421 ms
Execution time: 2.257 ms
-- partition
Bitmap Heap Scan on tb_auth_detail_all (cost=68.16..2746.17 rows=999 width=86) (actual time=0.195..0.751 rows=993 loops=1)
Recheck Cond: (dev_group_id = 20)
Heap Blocks: exact=876
-> Bitmap Index Scan on idx_auth_detail_all_sdev_group_id (cost=0.00..67.91 rows=999 width=0) (actual time=0.112..0.112 rows=993 loops=1)
Index Cond: (dev_group_id = 20)
Planning time: 0.138 ms
Execution time: 0.889 ms
分析约束对于搜索的影响
- 必须将constraint_exclusion设置为on或partition,否则planner将无法正常跳过不符合条件的分区表,也即无法发挥表分区的优势
(当constraint_exclusion为on或者partition时,查询计划器会根据分区表的检查限制将对主表的查询限制在符合检查限制条件的分区表上,直接避免了对不符合条件的分区表的扫描。) - 单表(10W)情况下,性能差异不大
- 分表(10W)情况下,开启约束对于有where条件子句的可以明显提高性能。
其他方案
- 使用Rule将对主表的插入请求重定向到对应的子表
CREATE RULE almart_rule_2015_12_31 AS
ON INSERT TO almart
WHERE
date_key = DATE '2015-12-31'
DO INSTEAD
INSERT INTO almart_2015_12_31 VALUES (NEW.*);
与Trigger相比,Rule会带来更大的额外开销,但每个请求只造成一次开销而非每条数据都引入一次开销,所以该方法对大批量的数据插入操作更具优势。然而,实际上在绝大部分场景下,Trigger比Rule的效率更高。同时,COPY操作会忽略Rule,而可以正常触发Trigger。另外,如果使用Rule方式,没有比较简单的方法处理没有被Rule覆盖到的插入操作。此时该数据会被插入到主表中而不会报错,从而无法有效利用表分区的优势。除了使用表继承外,还可使用UNION ALL的方式达到表分区的效果。
CREATE VIEW almart AS
SELECT * FROM almart_2015_12_10
UNION ALL
SELECT * FROM almart_2015_12_11
UNION ALL
SELECT * FROM almart_2015_12_12
...
UNION ALL
SELECT * FROM almart_2015_12_30;
当有新的分区表时,需要更新该View。实践中,与使用表继承相比,一般不推荐使用该方法。
REFRENCES
1.PostgreSQL 创建分区表,SQL优化之PostgreSQL Table Partitioning
2.PostgreSQL wiki 表分区
微信公众号
扫码关注或搜索架构探险之道
获取最新文章,坚持每周一更,坚持技术分享的我和你们一起成长 ^_^ !