Description:
pg_reorg is an utility program toreorganize tables in PostgreSQL databases. Unlike clusterdb, it doesn‘t blockany selections and updates during reorganization. You can choose one of thefollowing methods to reorganize.
-
Online CLUSTER (ordered by cluster index)
-
Ordered by specified columns
-
Online VACUUM FULL (packing rows only)
NOTICE:
-
Only superusers can use the utility.
-
Target table must have PRIMARY KEY.
1、安装
编译安装
下载地址:http://pgfoundry.org/frs/?group_id=1000411&release_id=2083#pg_reorg-_1.1.10-title-content
[root@masterdb ~]# tar -zxvfpg_reorg-1.1.10.tar.gz
[root@masterdb ~]# cd pg_reorg-1.1.10
[root@masterdb pg_reorg-1.1.10]# ./home/postgres/.bash_profile
[root@masterdb pg_reorg-1.1.10]# make
[root@masterdb pg_reorg-1.1.10]# makeinstall
引入扩展
[postgres@masterdb ~]$ createdb tt
[postgres@masterdb ~]$ psql tt
psql (9.3.4)
Type "help" for help.
tt=# create extension pg_reorg ;
CREATE EXTENSION
tt=# \dx
List of installedextensions
Name | Version | Schema | Description
----------+---------+------------+------------------------------------
pg_reorg | 1.1.10 | public | re-organizes a PostgreSQL database
plpgsql | 1.0 | pg_catalog | PL/pgSQLprocedural language
(2 rows)
2、测试
建立测试表
tt=# create table t1(id int primarykey,name text);
CREATE TABLE
tt=# insert into t1 select generate_series(1,5000000),‘HighGo‘;
INSERT 0 5000000
tt=# \d+
List of relations
Schema | Name | Type | Owner | Size | Description
--------+------+-------+----------+---------+-------------
public | t1 | table | postgres | 211 MB |
(1 row)
tt=# select pg_relation_filepath(‘t1‘);
pg_relation_filepath
----------------------
base/16812/16874
(1 row)
使用vacuum full
tt=# \timing
Timing is on.
tt=# vacuum FULL VERBOSE t1;
INFO: vacuuming "public.t1"
在vacuum full操作进行的同时,在另一终端执行:
tt=# select * from t1 limit 5;
[一直等待vacuum full操作完成]
最终输出信息如下:
tt=# vacuum FULL VERBOSE t1;
INFO: vacuuming "public.t1"
INFO: "t1": found 0 removable, 5000000 nonremovable row versions in27028 pages
DETAIL: 0 dead row versions cannot be removed yet.
CPU 1.11s/2.71u sec elapsed 5.39 sec.
VACUUM
Time: 22358.823 ms
tt=# select pg_relation_filepath(‘t1‘);
pg_relation_filepath
----------------------
base/16812/16878
(1 row)
[表文件发生了变化,索引文件也同时被重组]
使用pg_reorg
[postgres@masterdb ~]$ time pg_reorg -n -tt1 -d tt -e -E DEBUG
LOG: (query) SET statement_timeout = 0
LOG: (query) SET search_path = pg_catalog,pg_temp, public
LOG: (query) SET client_min_messages =warning
LOG: (query) SELECT * FROM reorg.tablesWHERE relid = $1::regclass
LOG: (param:0)= t1
INFO: ---- reorganize one table with 7steps. ----
INFO: target table name : t1
DEBUG: target_oid : 16843
DEBUG: target_toast : 16846
DEBUG: target_tidx : 16848
DEBUG: pkid : 16849
DEBUG: ckid : 0
DEBUG: create_pktype : CREATE TYPE reorg.pk_16843 AS (id integer)
DEBUG: create_log : CREATE TABLE reorg.log_16843 (idbigserial PRIMARY KEY, pk reorg.pk_16843, row t1)
DEBUG: create_trigger : CREATE TRIGGERz_reorg_trigger BEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTEPROCEDURE reorg.reorg_trigger(‘INSERT INTO reorg.log_16843(pk, row) VALUES(CASE WHEN $1 IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)‘)
DEBUG: create_table : CREATE TABLE reorg.table_16843 WITH(oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1
DEBUG: drop_columns : (skipped)
DEBUG: delete_log : DELETE FROM reorg.log_16843
DEBUG: lock_table : LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
DEBUG: sql_peek : SELECT * FROM reorg.log_16843 ORDER BYid LIMIT $1
DEBUG: sql_insert : INSERT INTO reorg.table_16843 VALUES($1.*)
DEBUG: sql_delete : DELETE FROM reorg.table_16843 WHERE (id)= ($1.id)
DEBUG: sql_update : UPDATE reorg.table_16843 SET (id, name)= ($2.id, $2.name) WHERE (id) = ($1.id)
DEBUG: sql_pop : DELETE FROM reorg.log_16843 WHERE id= $1
INFO: ---- STEP1. setup ----
INFO: This needs EXCLUSIVE LOCK against thetarget table.
LOG: (query) BEGIN ISOLATION LEVEL READCOMMITTED
LOG: (query) SET LOCAL statement_timeout =100
LOG: (query) LOCK TABLE t1 IN ACCESSEXCLUSIVE MODE
LOG: (query) RESET statement_timeout
LOG: (query) SELECTreorg.conflicted_triggers($1)
LOG: (param:0)= 16843
LOG: (query) CREATE TYPE reorg.pk_16843 AS(id integer)
LOG: (query) CREATE TABLE reorg.log_16843(id bigserial PRIMARY KEY, pk reorg.pk_16843, row t1)
LOG: (query) CREATE TRIGGER z_reorg_triggerBEFORE INSERT OR DELETE OR UPDATE ON t1 FOR EACH ROW EXECUTE PROCEDUREreorg.reorg_trigger(‘INSERT INTO reorg.log_16843(pk, row) VALUES( CASE WHEN $1IS NULL THEN NULL ELSE (ROW($1.id)::reorg.pk_16843) END, $2)‘)
LOG: (query) SELECTreorg.disable_autovacuum(‘reorg.log_16843‘)
LOG: (query) COMMIT
INFO: ---- STEP2. copy tuples into temptable----
LOG: (query) BEGIN ISOLATION LEVELSERIALIZABLE
LOG: (query) SELECT set_config(‘work_mem‘,current_setting(‘maintenance_work_mem‘), true)
LOG: (query) SET LOCAL synchronize_seqscans= off
LOG: (query) SELECTreorg.array_accum(virtualtransaction) FROM pg_locks WHERE locktype =‘virtualxid‘ AND pid <> pg_backend_pid() AND (virtualxid, virtualtransaction)<> (‘1/1‘, ‘-1/0‘)
LOG: (query) DELETE FROM reorg.log_16843
LOG: (query) CREATE TABLE reorg.table_16843WITH (oids=false) TABLESPACE pg_default AS SELECT id,name FROM ONLY t1
LOG: (query) SELECTreorg.disable_autovacuum(‘reorg.table_16843‘)
LOG: (query) COMMIT
INFO: ---- STEP3. create indexes ----
LOG: (query) SELECT indexrelid,reorg.reorg_indexdef(indexrelid, indrelid), indisvalid,pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = $1
LOG: (param:0)= 16843
DEBUG: [0]
DEBUG: target_oid : 16849
DEBUG: create_index : CREATE UNIQUE INDEXindex_16849 ON reorg.table_16843 USING btree (id)
LOG: (query) CREATE UNIQUE INDEXindex_16849 ON reorg.table_16843 USING btree (id)
INFO: ---- STEP4. apply logs ----
LOG: (query) SELECT reorg.reorg_apply($1,$2, $3, $4, $5, $6)
LOG: (param:0)= SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1
LOG: (param:1)= INSERT INTO reorg.table_16843 VALUES ($1.*)
LOG: (param:2)= DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)
LOG: (param:3)= UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) =($1.id)
LOG: (param:4)= DELETE FROM reorg.log_16843 WHERE id = $1
LOG: (param:5)= 1000
LOG: (query) SELECT pid FROM pg_locks WHERElocktype = ‘virtualxid‘ AND pid <> pg_backend_pid() AND virtualtransaction= ANY($1)
LOG: (param:0)= {}
INFO: ---- STEP5. swap tables ----
INFO: This needs EXCLUSIVE LOCK against thetarget table.
LOG: (query) BEGIN ISOLATION LEVEL READCOMMITTED
LOG: (query) SET LOCAL statement_timeout =100
LOG: (query) LOCK TABLE t1 IN ACCESSEXCLUSIVE MODE
LOG: (query) RESET statement_timeout
LOG: (query) SELECT reorg.reorg_apply($1,$2, $3, $4, $5, $6)
LOG: (param:0)= SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1
LOG: (param:1)= INSERT INTO reorg.table_16843 VALUES ($1.*)
LOG: (param:2)= DELETE FROM reorg.table_16843 WHERE (id) = ($1.id)
LOG: (param:3)= UPDATE reorg.table_16843 SET (id, name) = ($2.id, $2.name) WHERE (id) =($1.id)
LOG: (param:4)= DELETE FROM reorg.log_16843 WHERE id = $1
LOG: (param:5)= 0
LOG: (query) SELECT reorg.reorg_swap($1)
LOG: (param:0)= 16843
LOG: (query) COMMIT
INFO: ---- STEP6. drop old table----
LOG: (query) BEGIN ISOLATION LEVEL READCOMMITTED
LOG: (query) SELECT reorg.reorg_drop($1)
LOG: (param:0)= 16843
LOG: (query) COMMIT
INFO: ---- STEP7. analyze ----
LOG: (query) BEGIN ISOLATION LEVEL READCOMMITTED
LOG: (query) ANALYZE t1
LOG: (query) COMMIT
real 0m21.524s
user 0m0.007s
sys 0m0.006s
[该过程中使用到了一个中间临时表和一个中间日志表以及触发器,通过触发器将重组过程业务中发送的请求语句记录到日志表中,在完成时将日志表中记录的变更同步到中间表中,最后将中间表与实际表调换(通过调换两个表在pg_class中的信息实现),结束后将触发器、中间日志表及新的中间表(原业务表)删除。具体的过程可查看后附的数据库日志。]
在pg_reorg执行的过程中查询数据:
tt=# select * from t1 limit 5;
id| name
----+--------
1 |HighGo
2 |HighGo
3 |HighGo
4 |HighGo
5 |HighGo
(5 rows)
[不会等待pg_reorg执行完毕,因为在重组的过程中未一直将表锁住]
tt=# select pg_relation_filepath(‘t1‘);
pg_relation_filepath
----------------------
base/16812/16900
(1 row)
[数据文件发生了变化,当然,索引文件也同时被重组]
对比结果
表大小 |
索引大小 |
pg_reorg |
vacuum full |
211MB |
107MB |
0m21.524s |
22358.823 ms |
845MB |
428MB |
2m42.015s |
99549.960 ms |
3、限制
Temp tables
pg_reorg cannot reorganize temp tables.
GiST indexes
pg_reorg cannot reorganize tables usingGiST indexes.
DDL commands
You cannot do DDL commands except VACUUMand ANALYZE during pg_reorg. In many cases pg_reorg will fail and rollback collectly,but there are some cases which may result in data-corruption .
TRUNCATE
TRUNCATE islost. Deleted rows still exist after pg_reorg.
CREATE INDEX
It causes indexcorruptions.
ALTER TABLE ... ADD COLUMN
It causes lostof data. Newly added columns are initialized with NULLs.
ALTER TABLE ... ALTER COLUMN TYPE
It causes datacorruptions.
ALTER TABLE ... SET TABLESPACE
It causes datacorruptions by wrong relfilenode.
注意:
-
重组过程中会增大I/O压力,执行重组时应避开系统繁忙的时间段;
-
重组过程中需要创建一些临时对象,所以执行重组时应确保有足够的磁盘空间。
附
LOG: statement: SET statement_timeout = 0
LOG: statement: SET search_path = pg_catalog, pg_temp, public
LOG: statement: SET client_min_messages = warning
LOG: execute <unnamed>: SELECT * FROM reorg.tables WHERE relid =$1::regclass
DETAIL: parameters: $1 = ‘t1‘
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: statement: SET LOCAL statement_timeout = 100
LOG: statement: LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
LOG: statement: RESET statement_timeout
LOG: execute <unnamed>: SELECT reorg.conflicted_triggers($1)
DETAIL: parameters: $1 = ‘16843‘
LOG: statement: CREATE TYPE reorg.pk_16843 AS (id integer)
LOG: statement: CREATE TABLE reorg.log_16843 (id bigserial PRIMARY KEY, pkreorg.pk_16843, row t1)
LOG: statement: CREATE TRIGGER z_reorg_trigger BEFORE INSERT OR DELETE ORUPDATE ON t1 FOR EACH ROW EXECUTE PROCEDURE reorg.reorg_trigger(‘INSERT INTOreorg.log_16843(pk, row) VALUES( CASE WHEN $1 IS NULL THEN NULL ELSE(ROW($1.id)::reorg.pk_16843) END, $2)‘)
LOG: statement: SELECT reorg.disable_autovacuum(‘reorg.log_16843‘)
LOG: statement: COMMIT
LOG: statement: BEGIN ISOLATION LEVEL SERIALIZABLE
LOG: statement: SELECT set_config(‘work_mem‘,current_setting(‘maintenance_work_mem‘), true)
LOG: statement: SET LOCAL synchronize_seqscans = off
LOG: statement: SELECT reorg.array_accum(virtualtransaction) FROM pg_locksWHERE locktype = ‘virtualxid‘ AND pid <> pg_backend_pid() AND(virtualxid, virtualtransaction) <> (‘1/1‘, ‘-1/0‘)
LOG: statement: DELETE FROM reorg.log_16843
LOG: statement: CREATE TABLE reorg.table_16843 WITH (oids=false) TABLESPACEpg_default AS SELECT id,name FROM ONLY t1
LOG: statement: SELECT reorg.disable_autovacuum(‘reorg.table_16843‘)
LOG: statement: COMMIT
LOG: execute <unnamed>: SELECT indexrelid,reorg.reorg_indexdef(indexrelid, indrelid), indisvalid,pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = $1
DETAIL: parameters: $1 = ‘16843‘
LOG: statement: CREATE UNIQUE INDEX index_16849 ON reorg.table_16843 USINGbtree (id)
LOG: execute <unnamed>: SELECT reorg.reorg_apply($1, $2, $3, $4, $5,$6)
DETAIL: parameters: $1 = ‘SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1‘,$2 = ‘INSERT INTO reorg.table_16843 VALUES ($1.*)‘, $3 = ‘DELETE FROMreorg.table_16843 WHERE (id) = ($1.id)‘, $4 = ‘UPDATE reorg.table_16843 SET(id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)‘, $5 = ‘DELETE FROMreorg.log_16843 WHERE id = $1‘, $6 = ‘1000‘
LOG: execute <unnamed>: SELECT pid FROM pg_locks WHERE locktype =‘virtualxid‘ AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1)
DETAIL: parameters: $1 = ‘{}‘
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: statement: SET LOCAL statement_timeout = 100
LOG: statement: LOCK TABLE t1 IN ACCESS EXCLUSIVE MODE
LOG: statement: RESET statement_timeout
LOG: execute <unnamed>: SELECT reorg.reorg_apply($1, $2, $3, $4, $5,$6)
DETAIL: parameters: $1 = ‘SELECT * FROM reorg.log_16843 ORDER BY id LIMIT $1‘,$2 = ‘INSERT INTO reorg.table_16843 VALUES ($1.*)‘, $3 = ‘DELETE FROMreorg.table_16843 WHERE (id) = ($1.id)‘, $4 = ‘UPDATE reorg.table_16843 SET(id, name) = ($2.id, $2.name) WHERE (id) = ($1.id)‘, $5 = ‘DELETE FROMreorg.log_16843 WHERE id = $1‘, $6 = ‘0‘
LOG: execute <unnamed>: SELECT reorg.reorg_swap($1)
DETAIL: parameters: $1 = ‘16843‘
LOG: statement: COMMIT
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: execute <unnamed>: SELECT reorg.reorg_drop($1)
DETAIL: parameters: $1 = ‘16843‘
LOG: statement: COMMIT
LOG: statement: BEGIN ISOLATION LEVEL READ COMMITTED
LOG: statement: ANALYZE t1
LOG: statement: COMMIT