在template1数据库中进行vacuum freeze出现如下报错:
template1=> vacuum freeze template1.pg_catalog.pg_authid;
ERROR: found xmin 1988747257 from before relfrozenxid 2810153180
问题原因
出现这种报错一般第一反应都是因为长事务引起的事务号无法回收,通过查询pg_stat_activity没有发现有长事务,而且是单表无法进行回收。原因是pg会把一些关键的系统表的元数据信息存储在relcache中,rd_isnailed=true就是表明这个表的relcache的relcache是不会去进行更新的,关键的系统表一般也不会去进行表元数据的变更,所以系统表不去更新relcache也是正常的
typedef struct RelationData
{
RelFileNode rd_node; /* relation physical identifier */
/* use "struct" here to avoid needing to include smgr.h: */
struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */
int rd_refcnt; /* reference count */
BackendId rd_backend; /* owning backend id, if temporary relation */
bool rd_islocaltemp; /* rel is a temp rel of this session */
bool rd_isnailed; /* rel is nailed in cache */
bool rd_isvalid; /* relcache entry is valid */
char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 =
* valid, 2 = temporarily forced */
bool rd_statvalid; /* is rd_statlist valid? */
/*
* rd_createSubid is the ID of the highest subtransaction the rel has
* survived into; or zero if the rel was not created in the current top
* transaction. This can be now be relied on, whereas previously it could
* be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is
* the ID of the highest subtransaction the relfilenode change has
* survived into, or zero if not changed in the current transaction (or we
* have forgotten changing it). rd_newRelfilenodeSubid can be forgotten
* when a relation has multiple new relfilenodes within a single
* transaction, with one of them occurring in a subsequently aborted
* subtransaction, e.g. BEGIN; TRUNCATE t; SAVEPOINT save; TRUNCATE t;
* ROLLBACK TO save; -- rd_newRelfilenode is now forgotten
*/
SubTransactionId rd_createSubid; /* rel was created in current xact */
SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in
* current xact */
Form_pg_class rd_rel; /* RELATION tuple */
TupleDesc rd_att; /* tuple descriptor */
Oid rd_id; /* relation's object id */
LockInfoData rd_lockInfo; /* lock mgr's info for locking relation */
RuleLock *rd_rules; /* rewrite rules */
MemoryContext rd_rulescxt; /* private memory cxt for rd_rules, if any */
TriggerDesc *trigdesc; /* Trigger info, or NULL if rel has none */
/* use "struct" here to avoid needing to include rowsecurity.h: */
struct RowSecurityDesc *rd_rsdesc; /* row security policies, or NULL */
/* data managed by RelationGetFKeyList: */
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */
MemoryContext rd_partkeycxt; /* private memory cxt for the below */
struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
MemoryContext rd_pdcxt; /* private context for partdesc */
struct PartitionDescData *rd_partdesc; /* partitions, or NULL */
List *rd_partcheck; /* partition CHECK quals */
/* data managed by RelationGetIndexList: */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Oid rd_pkindex; /* OID of primary key, if any */
Oid rd_replidindex; /* OID of replica identity index, if any */
/* data managed by RelationGetStatExtList: */
List *rd_statlist; /* list of OIDs of extended stats */
/* data managed by RelationGetIndexAttrBitmap: */
Bitmapset *rd_indexattr; /* identifies columns used in indexes */
Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
Bitmapset *rd_pkattr; /* cols included in primary key */
Bitmapset *rd_idattr; /* included in replica identity index */
PublicationActions *rd_pubactions; /* publication actions */
/*
* rd_options is set whenever rd_rel is loaded into the relcache entry.
* Note that you can NOT look into rd_rel for this data. NULL means "use
* defaults".
*/
bytea *rd_options; /* parsed pg_class.reloptions */
/* These are non-NULL only for an index relation: */
Form_pg_index rd_index; /* pg_index tuple describing this index */
/* use "struct" here to avoid needing to include htup.h: */
struct HeapTupleData *rd_indextuple; /* all of pg_index tuple */
/*
* index access support info (used only for an index relation)
*
* Note: only default support procs for each opclass are cached, namely
* those with lefttype and righttype equal to the opclass's opcintype. The
* arrays are indexed by support function number, which is a sufficient
* identifier given that restriction.
*
* Note: rd_amcache is available for index AMs to cache private data about
* an index. This must be just a cache since it may get reset at any time
* (in particular, it will get reset by a relcache inval message for the
* index). If used, it must point to a single memory chunk palloc'd in
* rd_indexcxt. A relcache reset will include freeing that chunk and
* setting rd_amcache = NULL.
*/
Oid rd_amhandler; /* OID of index AM's handler function */
MemoryContext rd_indexcxt; /* private memory cxt for this stuff */
/* use "struct" here to avoid needing to include amapi.h: */
struct IndexAmRoutine *rd_indam; /* index AM's API struct */
Oid *rd_opfamily; /* OIDs of op families for each index col */
Oid *rd_opcintype; /* OIDs of opclass declared input data types */
RegProcedure *rd_support; /* OIDs of support procedures */
FmgrInfo *rd_supportinfo; /* lookup info for support procedures */
int16 *rd_indoption; /* per-column AM-specific flags */
List *rd_indexprs; /* index expression trees, if any */
List *rd_indpred; /* index predicate tree, if any */
Oid *rd_exclops; /* OIDs of exclusion operators, if any */
Oid *rd_exclprocs; /* OIDs of exclusion ops' procs, if any */
uint16 *rd_exclstrats; /* exclusion ops' strategy numbers, if any */
void *rd_amcache; /* available for use by index AM */
Oid *rd_indcollation; /* OIDs of index collations */
/*
* foreign-table support
*
* rd_fdwroutine must point to a single memory chunk palloc'd in
* CacheMemoryContext. It will be freed and reset to NULL on a relcache
* reset.
*/
/* use "struct" here to avoid needing to include fdwapi.h: */
struct FdwRoutine *rd_fdwroutine; /* cached function pointers, or NULL */
/*
* Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new
* version of a table, we need to make any toast pointers inserted into it
* have the existing toast table's OID, not the OID of the transient toast
* table. If rd_toastoid isn't InvalidOid, it is the OID to place in
* toast pointers inserted into this rel. (Note it's set on the new
* version of the main heap, not the toast table itself.) This also
* causes toast_save_datum() to try to preserve toast value OIDs.
*/
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
/* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
relfrozenxid存储在rd_rel中
CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,RelationRelation_Rowtype_Id) BKI_SCHEMA_MACRO
{
Oid oid; /* oid */
NameData relname; /* class name */
Oid relnamespace; /* OID of namespace containing this class */
Oid reltype; /* OID of entry in pg_type for table's
* implicit row type */
Oid reloftype; /* OID of entry in pg_type for underlying
* composite type */
Oid relowner; /* class owner */
Oid relam; /* index access method; 0 if not an index */
Oid relfilenode; /* identifier of physical storage file */
/* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
Oid reltablespace; /* identifier of table space for relation */
int32 relpages; /* # of blocks (not always up-to-date) */
float4 reltuples; /* # of tuples (not always up-to-date) */
int32 relallvisible; /* # of all-visible blocks (not always
* up-to-date) */
Oid reltoastrelid; /* OID of toast table; 0 if none */
bool relhasindex; /* T if has (or has had) any indexes */
bool relisshared; /* T if shared across databases */
char relpersistence; /* see RELPERSISTENCE_xxx constants below */
char relkind; /* see RELKIND_xxx constants below */
int16 relnatts; /* number of user attributes */
/*
* Class pg_attribute must contain exactly "relnatts" user attributes
* (with attnums ranging from 1 to relnatts) for this class. It may also
* contain entries with negative attnums for system attributes.
*/
int16 relchecks; /* # of CHECK constraints for class */
bool relhasrules; /* has (or has had) any rules */
bool relhastriggers; /* has (or has had) any TRIGGERs */
bool relhassubclass; /* has (or has had) child tables or indexes */
bool relrowsecurity; /* row security is enabled or not */
bool relforcerowsecurity; /* row security forced for owners or
* not */
bool relispopulated; /* matview currently holds query results */
char relreplident; /* see REPLICA_IDENTITY_xxx constants */
bool relispartition; /* is relation a partition? */
Oid relrewrite; /* heap for rewrite during DDL, link to
* original rel */
TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
TransactionId relminmxid; /* all multixacts in this rel are >= this.
* this is really a MultiXactId */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
aclitem relacl[1]; /* access permissions */
text reloptions[1]; /* access-method-specific options */
pg_node_tree relpartbound; /* partition bound node tree */
#endif
} FormData_pg_class;
vacuum过程中这个函数会对这行的xid和relfrozenxid进行比较,如果xid早于relfrozenxid就会出现这个报错,
heap_prepare_freeze_tuple(HeapTupleHeader tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, TransactionId cutoff_multi,
xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
{
bool changed = false;
bool xmax_already_frozen = false;
bool xmin_frozen;
bool freeze_xmax;
TransactionId xid;
frz->frzflags = 0;
frz->t_infomask2 = tuple->t_infomask2;
frz->t_infomask = tuple->t_infomask;
frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
/* Process xmin */
xid = HeapTupleHeaderGetXmin(tuple); #取当前行的xmin,会先判断是否是frozenxid,如果是就会等于FrozenTransactionId,否则就是当前行的xmin。
xmin_frozen = ((xid == FrozenTransactionId) ||
HeapTupleHeaderXminFrozen(tuple));
if (TransactionIdIsNormal(xid))
{
if (TransactionIdPrecedes(xid, relfrozenxid)) #如果xid早于relfrozenxid就会出现上述的报错
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("found xmin %u from before relfrozenxid %u",
xid, relfrozenxid)));
if (TransactionIdPrecedes(xid, cutoff_xid))
{
if (!TransactionIdDidCommit(xid))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen",
xid, cutoff_xid)));
frz->t_infomask |= HEAP_XMIN_FROZEN;
changed = true;
xmin_frozen = true;
}
}
.....
}
xid是如何进行比较的呢,如果不超过21亿是正常比较,超过21亿的需要取模进行比较,如果行上的t_infomask中HEAP_XMIN_COMMITTED,HEAP_XMIN_INVALID同时存在就会被认为是FrozenTransactionId。
#define HEAP_XMIN_COMMITTED 0x0100 /* t_xmin committed */
#define HEAP_XMIN_INVALID 0x0200 /* t_xmin invalid/aborted */
#define HEAP_XMIN_FROZEN (HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID)
#define InvalidTransactionId ((TransactionId) 0)
#define BootstrapTransactionId ((TransactionId) 1)
#define FrozenTransactionId ((TransactionId) 2)
#define FirstNormalTransactionId ((TransactionId) 3)
#define MaxTransactionId ((TransactionId) 0xFFFFFFFF)
所以问题的根因就是vacuum是读到的relfrozexid来自relcache的,更新catalog中的relfrozexid不会去刷新relcache,导致一直读到的是错误的relfrozexid,所以出现vacuum freeze报错的问题。10.2,9.6.7,9.5.11,9.4.16到修复版本之间的版本的pg实例都会存在相关问题。
10.5, 9.6.10, 9.5.14, 9.4.19对这个问题进行了修复。https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=817f9f9a8a1932a0cd8c6bc5c9d3e77f6a80e659
问题解法
目前可以通过两种方式进行修复
1.重启数据库,重启后会重新读入新数据内容到relcache中,相当于刷新relcache。
2.删除$PGDATA/global/pg_internal.init,这个文件就是存储的relcache的内容,有新的连接连入会创建新的pg_internal.init文件。