标签
PostgreSQL , GIS , PostGIS , Greenplum , 空间检索 , GiST , B-Tree , geohash
背景
《Greenplum 行存、列存,堆表、AO表的原理和选择》
以上文档详细的介绍了行存、列存,堆表、AO表的原理以及选择的依据。
《一个简单算法可以帮助物联网,金融 用户 节约98%的数据存储成本 (PostgreSQL,Greenplum帮你做到)》
以上文档介绍了提升基于列存的全局数据压缩比的方法。
《解密上帝之手 - 阿里云HDB for PostgreSQL数据库metascan特性(存储级、块级、batch级过滤与数据编排)》
以上文档介绍了局部编排,以及阿里云HDB for PostgreSQL数据库的metascan特性,(在不需要索引的情况下,如何提升任意列的选择性)。
压缩实际上是计算(CPU)换空间(磁盘)的做法,该不该做,我们还是先看看压缩比、性能损耗吧。
堆表
postgres=# create table t_heap(id int, c1 text, c2 int);
CREATE TABLE
postgres=# insert into t_heap select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
INSERT 0 10000000
Time: 120526.098 ms
某个维度count查询。
postgres=# explain analyze select c2,count(*) from t_heap group by c2;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 48:1 (slice2; segments: 48) (cost=1578949.03..1579074.98 rows=10076 width=12)
Rows out: 10001 rows at destination with 1354 ms to end, start offset by 519 ms.
-> HashAggregate (cost=1578949.03..1579074.98 rows=210 width=12)
Group By: t_heap.c2
Rows out: Avg 208.4 rows x 48 workers. Max 223 rows (seg17) with 0.001 ms to first row, 692 ms to end, start offset by 581 ms.
-> Redistribute Motion 48:48 (slice1; segments: 48) (cost=1578596.37..1578797.89 rows=210 width=12)
Hash Key: t_heap.c2
Rows out: Avg 10001.0 rows x 48 workers at destination. Max 10704 rows (seg17) with 596 ms to end, start offset by 581 ms.
-> HashAggregate (cost=1578596.37..1578596.37 rows=210 width=12)
Group By: t_heap.c2
Rows out: Avg 10001.0 rows x 48 workers. Max 10001 rows (seg0) with 0.006 ms to first row, 131 ms to end, start offset by 566 ms.
-> Seq Scan on t_heap (cost=0.00..1528595.58 rows=208337 width=4)
Rows out: Avg 208333.3 rows x 48 workers. Max 208401 rows (seg18) with 26 ms to first row, 901 ms to end, start offset by 573 ms.
Slice statistics:
(slice0) Executor memory: 359K bytes.
(slice1) Executor memory: 724K bytes avg x 48 workers, 724K bytes max (seg0).
(slice2) Executor memory: 388K bytes avg x 48 workers, 388K bytes max (seg0).
Statement statistics:
Memory used: 128000K bytes
Settings: optimizer=off
Optimizer status: legacy query optimizer
Total runtime: 1874.143 ms
(22 rows)
Time: 1879.480 ms
无索引,某个单值查询
postgres=# explain analyze select * from t_heap where c2=1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 48:1 (slice1; segments: 48) (cost=0.00..1553595.98 rows=980 width=40)
Rows out: 1001 rows at destination with 1489 ms to end, start offset by 1.419 ms.
-> Seq Scan on t_heap (cost=0.00..1553595.98 rows=21 width=40)
Filter: c2 = 1
Rows out: Avg 20.9 rows x 48 workers. Max 35 rows (seg6) with 21 ms to first row, 613 ms to end, start offset by 853 ms.
Slice statistics:
(slice0) Executor memory: 295K bytes.
(slice1) Executor memory: 230K bytes avg x 48 workers, 230K bytes max (seg0).
Statement statistics:
Memory used: 128000K bytes
Settings: optimizer=off
Optimizer status: legacy query optimizer
Total runtime: 1490.889 ms
(13 rows)
Time: 1492.516 ms
有索引(IO放大),某个单值查询
create index idx_t_heap on t_heap(c2);
explain analyze select * from t_heap where c2=1;
postgres=# explain analyze select * from t_heap where c2=1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 48:1 (slice1; segments: 48) (cost=107.99..95579.14 rows=980 width=40)
Rows out: 1001 rows at destination with 34 ms to end, start offset by 1.331 ms.
-> Bitmap Heap Scan on t_heap (cost=107.99..95579.14 rows=21 width=40)
Recheck Cond: c2 = 1
Rows out: Avg 20.9 rows x 48 workers. Max 35 rows (seg6) with 0.236 ms to first row, 0.959 ms to end, start offset by 19 ms.
-> Bitmap Index Scan on idx_t_heap (cost=0.00..107.74 rows=21 width=0)
Index Cond: c2 = 1
Bitmaps out: Avg 1.0 x 48 workers. Max 1 (seg0) with 0.169 ms to end, start offset by 19 ms.
Work_mem used: 168K bytes avg, 282K bytes max (seg6).
Slice statistics:
(slice0) Executor memory: 303K bytes.
(slice1) Executor memory: 901K bytes avg x 48 workers, 901K bytes max (seg0). Work_mem: 282K bytes max.
Statement statistics:
Memory used: 128000K bytes
Settings: optimizer=off
Optimizer status: legacy query optimizer
Total runtime: 35.093 ms
(17 rows)
Time: 37.198 ms
APPENDONLY 行存储、列存储
测试脚本如下
vi test.sql
\timing
-- 堆表
create table t_heap(id int, c1 text, c2 int);
insert into t_heap select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_heap group by c2;
explain analyze select * from t_heap where c2=1;
create index idx_t_heap on t_heap(c2);
explain analyze select * from t_heap where c2=1;
-- AO 行存
-- 不压缩, 8K
create table t_ao_row_8k_0(id int, c1 text, c2 int) with (appendonly=true, blocksize=8192, ORIENTATION=row, COMPRESSTYPE=NONE, CHECKSUM=false);
insert into t_ao_row_8k_0 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_row_8k_0 group by c2;
explain analyze select * from t_ao_row_8k_0 where c2=1;
create index idx_t_ao_row_8k_0 on t_ao_row_8k_0(c2);
explain analyze select * from t_ao_row_8k_0 where c2=1;
-- 不压缩, 2M
create table t_ao_row_2m_0(id int, c1 text, c2 int) with (appendonly=true, blocksize=2097152, ORIENTATION=row, COMPRESSTYPE=NONE, CHECKSUM=false);
insert into t_ao_row_2m_0 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_row_2m_0 group by c2;
explain analyze select * from t_ao_row_2m_0 where c2=1;
create index idx_t_ao_row_2m_0 on t_ao_row_2m_0(c2);
explain analyze select * from t_ao_row_2m_0 where c2=1;
-- 压缩比5, 8K
create table t_ao_row_8k_5(id int, c1 text, c2 int) with (appendonly=true, blocksize=8192, ORIENTATION=row, COMPRESSTYPE=zlib, COMPRESSLEVEL=5, CHECKSUM=false);
insert into t_ao_row_8k_5 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_row_8k_5 group by c2;
explain analyze select * from t_ao_row_8k_5 where c2=1;
create index idx_t_ao_row_8k_5 on t_ao_row_8k_5(c2);
explain analyze select * from t_ao_row_8k_5 where c2=1;
-- 压缩比5, 2M
create table t_ao_row_2m_5(id int, c1 text, c2 int) with (appendonly=true, blocksize=2097152, ORIENTATION=row, COMPRESSTYPE=zlib, COMPRESSLEVEL=5, CHECKSUM=false);
insert into t_ao_row_2m_5 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_row_2m_5 group by c2;
explain analyze select * from t_ao_row_2m_5 where c2=1;
create index idx_t_ao_row_2m_5 on t_ao_row_2m_5(c2);
explain analyze select * from t_ao_row_2m_5 where c2=1;
-- 压缩比9, 8K
create table t_ao_row_8k_9(id int, c1 text, c2 int) with (appendonly=true, blocksize=8192, ORIENTATION=row, COMPRESSTYPE=zlib, COMPRESSLEVEL=9, CHECKSUM=false);
insert into t_ao_row_8k_9 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_row_8k_9 group by c2;
explain analyze select * from t_ao_row_8k_9 where c2=1;
create index idx_t_ao_row_8k_9 on t_ao_row_8k_9(c2);
explain analyze select * from t_ao_row_8k_9 where c2=1;
-- 压缩比9, 2M
create table t_ao_row_2m_9(id int, c1 text, c2 int) with (appendonly=true, blocksize=2097152, ORIENTATION=row, COMPRESSTYPE=zlib, COMPRESSLEVEL=9, CHECKSUM=false);
insert into t_ao_row_2m_9 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_row_2m_9 group by c2;
explain analyze select * from t_ao_row_2m_9 where c2=1;
create index idx_t_ao_row_2m_9 on t_ao_row_2m_9(c2);
explain analyze select * from t_ao_row_2m_9 where c2=1;
-- AO 列存
-- 不压缩, 8K
create table t_ao_COLUMN_8k_0(id int, c1 text, c2 int) with (appendonly=true, blocksize=8192, ORIENTATION=COLUMN, COMPRESSTYPE=NONE, CHECKSUM=false);
insert into t_ao_COLUMN_8k_0 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_COLUMN_8k_0 group by c2;
explain analyze select * from t_ao_COLUMN_8k_0 where c2=1;
create index idx_t_ao_COLUMN_8k_0 on t_ao_COLUMN_8k_0(c2);
explain analyze select * from t_ao_COLUMN_8k_0 where c2=1;
-- 不压缩, 2M
create table t_ao_COLUMN_2m_0(id int, c1 text, c2 int) with (appendonly=true, blocksize=2097152, ORIENTATION=COLUMN, COMPRESSTYPE=NONE, CHECKSUM=false);
insert into t_ao_COLUMN_2m_0 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_COLUMN_2m_0 group by c2;
explain analyze select * from t_ao_COLUMN_2m_0 where c2=1;
create index idx_t_ao_COLUMN_2m_0 on t_ao_COLUMN_2m_0(c2);
explain analyze select * from t_ao_COLUMN_2m_0 where c2=1;
-- 压缩比5, 8K
create table t_ao_COLUMN_8k_5(id int, c1 text, c2 int) with (appendonly=true, blocksize=8192, ORIENTATION=COLUMN, COMPRESSTYPE=zlib, COMPRESSLEVEL=5, CHECKSUM=false);
insert into t_ao_COLUMN_8k_5 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_COLUMN_8k_5 group by c2;
explain analyze select * from t_ao_COLUMN_8k_5 where c2=1;
create index idx_t_ao_COLUMN_8k_5 on t_ao_COLUMN_8k_5(c2);
explain analyze select * from t_ao_COLUMN_8k_5 where c2=1;
-- 压缩比5, 2M
create table t_ao_COLUMN_2m_5(id int, c1 text, c2 int) with (appendonly=true, blocksize=2097152, ORIENTATION=COLUMN, COMPRESSTYPE=zlib, COMPRESSLEVEL=5, CHECKSUM=false);
insert into t_ao_COLUMN_2m_5 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_COLUMN_2m_5 group by c2;
explain analyze select * from t_ao_COLUMN_2m_5 where c2=1;
create index idx_t_ao_COLUMN_2m_5 on t_ao_COLUMN_2m_5(c2);
explain analyze select * from t_ao_COLUMN_2m_5 where c2=1;
-- 压缩比9, 8K
create table t_ao_COLUMN_8k_9(id int, c1 text, c2 int) with (appendonly=true, blocksize=8192, ORIENTATION=COLUMN, COMPRESSTYPE=zlib, COMPRESSLEVEL=9, CHECKSUM=false);
insert into t_ao_COLUMN_8k_9 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_COLUMN_8k_9 group by c2;
explain analyze select * from t_ao_COLUMN_8k_9 where c2=1;
create index idx_t_ao_COLUMN_8k_9 on t_ao_COLUMN_8k_9(c2);
explain analyze select * from t_ao_COLUMN_8k_9 where c2=1;
-- 压缩比9, 2M
create table t_ao_COLUMN_2m_9(id int, c1 text, c2 int) with (appendonly=true, blocksize=2097152, ORIENTATION=COLUMN, COMPRESSTYPE=zlib, COMPRESSLEVEL=9, CHECKSUM=false);
insert into t_ao_COLUMN_2m_9 select id, repeat(md5(random()::text), 128), random()*10000 from generate_series(1,10000000) t(id);
explain analyze select c2,count(*) from t_ao_COLUMN_2m_9 group by c2;
explain analyze select * from t_ao_COLUMN_2m_9 where c2=1;
create index idx_t_ao_COLUMN_2m_9 on t_ao_COLUMN_2m_9(c2);
explain analyze select * from t_ao_COLUMN_2m_9 where c2=1;
select tablename, pg_size_pretty(pg_total_relation_size('postgres.'||tablename)) from pg_tables where schemaname='postgres';
nohup psql -f ./test.sql > ./log 2>&1 &
结果对比
存储形态 | 写入1000万耗时 | 空间占用 | 分组聚合 | 单值多行查询(走全表) | 建索引耗时 | 单值多行查询(走索引) |
---|---|---|---|---|---|---|
堆表 | 120秒 | 44GB | 1.8秒 | 1.5秒 | 13秒 | 37毫秒 |
AO行存8K不压缩 | 81秒 | 1.3GB | 168毫秒 | 96毫秒 | 356毫秒 | 32毫秒 |
AO行存2MB不压缩 | 101秒 | 39GB | 1.8秒 | 1.7秒 | 1.9秒 | 158毫秒 |
AO行存8K压缩5级 | 80秒 | 557MB | 322毫秒 | 269毫秒 | 505毫秒 | 54毫秒 |
AO行存2MB压缩5级 | 104秒 | 690MB | 1.6秒 | 1.5秒 | 1.7秒 | 163毫秒 |
AO行存8K压缩9级 | 80秒 | 557MB | 331毫秒 | 247毫秒 | 500毫秒 | 58毫秒 |
AO行存2MB压缩9级 | 106秒 | 690MB | 1.7秒 | 1.38秒 | 1.67秒 | 162毫秒 |
AO列存8K不压缩 | 92.6秒 | 39GB | 362毫秒 | 622毫秒 | 877毫秒 | 36毫秒 |
AO列存2MB不压缩 | 98.8秒 | 38GB | 140毫秒 | 1.62秒 | 1.8秒 | 176毫秒 |
AO列存8K压缩5级 | 83秒 | 1.4GB | 125毫秒 | 2.2秒 | 2.5秒 | 58毫秒 |
AO列存2MB压缩5级 | 104秒 | 593MB | 152毫秒 | 1.37秒 | 1.73秒 | 189毫秒 |
AO列存8K压缩9级 | 83秒 | 1.4GB | 122毫秒 | 2.3秒 | 2.5秒 | 62毫秒 |
AO列存2MB压缩9级 | 106秒 | 593MB | 136毫秒 | 1.5秒 | 1.77秒 | 181毫秒 |