参考文档: PostgreSQL 9 Administration Cookbook (第二版)中文版 第八章 监控和诊断
select * from pg_stat_user_tables --和pg_stat_all_tables一样,但只显示用户表,当前数据库中每个表一行,显示有关访问指定表的统计信息
select * from pg_stat_user_indexes --和pg_stat_all_indexes一样,但只显示系统表上的索引.
-- 可以使用pgadmin快速查看数据库的当前状态,需要安装adminpack扩展
create extension adminpack;
--检查postgres用户是否已经连接上
select * from pg_stat_activity where usename='postgres'; -- 返回值,表示已经连接
pg_stat_activity,每个服务器进程一行,显示与那个进程的当前活动相关的信息,例如状态和当前查询
select datname,usename,client_addr,client_port,application_name from pg_stat_activity; -- 查看连接信息,比如用户、ip、端口等等
-- 在psql中,反复执行某一个查询语句,比如,先查询时间,然后每隔4秒执行一次该查询语句
mydb=# select now();
now
-------------------------------
2021-02-03 09:28:55.455629+08
(1 row)
mydb=# \watch 4
Wed 03 Feb 2021 09:29:00 AM CST (every 4s)
now
-------------------------------
2021-02-03 09:29:00.447043+08
(1 row)
Wed 03 Feb 2021 09:29:04 AM CST (every 4s)
now
-------------------------------
2021-02-03 09:29:04.452628+08
(1 row)
Wed 03 Feb 2021 09:29:08 AM CST (every 4s)
now
------------------------------
2021-02-03 09:29:08.45936+08
(1 row)
Wed 03 Feb 2021 09:29:12 AM CST (every 4s)
now
------------------------------
2021-02-03 09:29:12.46499+08
(1 row)
--检查那个查询在运行(设置了track_activities=on后,pg会自动收集所有当前运行的查询)
select datname,usename,state,query from pg_stat_activity; --检查已连接的用户正在运行什么
select datname,usename,state,query from pg_stat_activity where state='active'; --检查已连接的用户正在运行什么,只查询活跃的用户
-- 捕获运行时间数毫秒的查询(OLTP查询,一般都很短,在pg中用视图,可能会查询不到)
-- 获得执行时间最长的查询
select current_timestamp - query_start as runtime,datname,usename,query from pg_stat_activity where state='active' order by 1 desc;
select current_timestamp - query_start as runtime,datname,usename,query
from pg_stat_activity where state='active' and current_timestamp - query_start>'1 min' order by 1 desc; -- 获取运行时间超过1分钟的查询
也可以使用ps命令查询,但是不一定每次都能查询的到。需要设置参数update_process_title = on(默认已设置)
-- 检查那个查询正在运行或被阻塞
SELECT datname,usename,current_query FROM pg_stat_activity WHERE waiting = true; -- 10.0中没有这个字段
mydb=# \timing on
Timing is on.
mydb=# select pg_sleep(10); -- 可以使用pg_sleep来制造一个查询,然后捕获该查询,该语句意思是等待10秒再执行
pg_sleep
----------
(1 row)
Time: 10008.825 ms (00:10.009)
mydb=#
-- 确定谁阻塞了一个查询 (因为where条件,没有执行成功)
SELECT
w.query as waiting_query,
w.pid as w_pid,
w.usename as w_user,
l.query as locking_query,
l.pid as l_pid,
l.usename as l_user,
t.schemaname || '.' || t.relname as tablename
from pg_stat_activity w
join pg_locks l1 on w.pid = l1.pid and not l1.granted
join pg_locks l2 on l1.relation = l2.relation and l2.granted
join pg_stat_activity l on l2.pid = l.pid
join pg_stat_user_tables t on l1.relation = t.relid
where w.waiting;
--kill掉指定的会话,可以通过pg_stat_activity查询到pid,再根据pg_terminate_backend(pid)来kill 。
select pg_terminate_backend(pid) from pg_stat_activity; -- 会断开当前的链接
select pg_terminate_backend(21044); -- kill掉会话21044 ,这个是直接kill掉会话了
select pg_cancel_backend(); -- 取消当前查询 。这个只是取消查询,不kill掉会话
select pg_sleep(100); -- 先进行一个查询,100秒后再查询
select * from pg_stat_activity; select pid from pg_stat_activity where query like 'select pg%'; -- 查询到具体的pid
select pg_cancel_backend(36465); -- 取消查询
mydb=# select pg_sleep(100);
ERROR: canceling statement due to user request --取消的结果
Time: 46710.351 ms (00:46.710)
mydb=#
-- 使用语句超时设置来清除超长时间运行的查询,statement_timeout,默认是0,是不设置的。设置为10s,做个测试
mydb=# show statement_timeout;
statement_timeout
-------------------
0
(1 row)
mydb=#
mydb=# set statement_timeout to '10 s'; -- 在当前会话中设置参数为10秒,超过10秒取消查询
SET
mydb=# \timing on
Timing is on.
mydb=# select pg_sleep(100); -- 进行一个查询,100秒,10秒后取消查询
ERROR: canceling statement due to statement timeout
Time: 10002.691 ms (00:10.003)
mydb=#
--kill掉"在事务中空闲(idle in transaction)的查询"
select pg_terminate_backend(pid)
from pg_stat_activity
where current_query = 'idle in transaction' -- idle
and current_timestamp - query_start > '10 min';
-- 确定是否某人在使用某张表(原理是,查询所有与查询数据相关的使用计数器发生变化的表)
create temp table tmp_stat_user_tables as select * from pg_stat_user_tables;
select * from pg_stat_user_tables n
join tmp_stat_user_tables t
on n.relid=t.relid
and (n.seq_scan,n.idx_scan,n.n_tup_ins,n.n_tup_upd,n.n_tup_del) <>
(t.seq_scan,t.idx_scan,t.n_tup_ins,t.n_tup_upd,t.n_tup_del);
select pg_stat_reset(); -- 或重置所有表的统计信息。重置后为0,可以通过查询使用次数不为0的记录来检测表的使用情况。
--确定一张表的最后被使用的时间
--旧版本的pg的做法,需要安装python扩展
CREATE TYPE fileinfo AS (
filename text,
filesize bigint,
ctime abstime,
mtime abstime,
atime abstime
);
CREATE OR REPLACE FUNCTION table_file_info(schemaname text, tablename
text)
RETURNS SETOF fileinfo
AS $$
import datetime, glob, os
db_info = plpy.execute("""
select datname as database_name,
current_setting('data_directory') || '/base/' || db.oid as
data_directory
from pg_database db
where datname = current_database()
""")
#return db_info[0]['data_directory']
table_info_plan = plpy.prepare("""
select nspname as schemaname,
relname as tablename,
relfilenode as filename
from pg_class c
join pg_namespace ns on c.relnamespace=ns.oid
where nspname = $1
and relname = $2;
""", ['text', 'text'])
table_info = plpy.execute(table_info_plan, [schemaname, tablename])
filemask = '%s/%s*' % (db_info[0]['data_directory'], table_info[0]
['filename'])
res = []
for filename in glob.glob(filemask):
fstat = os.stat(filename)
res.append((
filename,
fstat.st_size,
datetime.datetime.fromtimestamp(fstat.st_ctime).isoformat(),
datetime.datetime.fromtimestamp(fstat.st_mtime).isoformat(),
datetime.datetime.fromtimestamp(fstat.st_atime).isoformat()
))
return res
$$ LANGUAGE plpythonu;
select
max(mtime) as latest_mod,
max(atime) as latest_read
from table_file_info(<schemaname>, <tablename>);
-- pg新版本的做法 .该方法使用文件系统作为信息源,获取PG修改和读取文件的时间来初步判断。该方法不是太可靠,因为autovacuum可能也会访问表
create or replace function table_file_access_info (
in schemaname text,in tablename text,
out last_access timestamp with time zone,
out last_change timestamp with time zone ) language plpgsql as $func$
declare
tabledir text;
filenode text;
begin
select regexp_replace(
current_setting('data_directory') || '/' || pg_relation_filepath(c.oid),
pg_relation_filenode(c.oid) || '$',''),
pg_relation_filenode(c.oid)
into tabledir,filenode
from pg_class c
join pg_namespace ns
on c.relnamespace = ns.oid
and c.relname = tablename
and ns.nspname = schemaname ;
raise notice 'tabledir :% -filenode :%',tabledir,filenode;
-- find lastest access and modification times over all segments
select max((pg_stat_file(tabledir || filename)).access),
max((pg_stat_file(tabledir || filename)).modification)
into last_access,last_change from pg_ls_dir(tabledir) as filename
-- only use files matching <basefilename>[.segmentnumber]
where filename ~('^' || filenode || '([.]?[0-9]+)?$');
end;
$func$;
select table_file_access_info('public','eater')
mydb=# select table_file_access_info('public','eater');
NOTICE: tabledir :/postgres/10/data/base/16393/ -filenode :17063
table_file_access_info
-----------------------------------------------------
("2021-02-03 12:44:57+08","2021-01-26 16:31:10+08")
(1 row)
Time: 4.889 ms
mydb=#
-- 临时数据使用的磁盘空间
--旧版本pg的做法,其实就是查询到临时表,使用操作系统命令,查看大小
db=# select current_setting('temp_tablespaces');
current_setting
-----------------
temp1, temp2
(1 row)
db=#
db=# select spcname,spclocation from pg_tablespace where spcname in
('temp1', 'temp2');
spcname | spclocation
---------+---------------
temp1 | /test/pg_tmp1
temp2 | /test/pg_tmp2
(2 rows)
db=# \q
user@host:~$
sudo du -s /test/pg_tmp1 /test/pg_tmp2
102136 /test/pg_tmp1
35144 /test/pg_tmp2
--新版本pg的做法,
--检查数据库是否使用了自定义的表以存放临时文件,空则表示没有使用临时表空间,而临时对象则被放置到每个数据库的默认表空间中
select current_setting('temp_tablespaces');
-- 如果存在临时表,则使用以下的语句查看临时表的大小
with temporary_tablespaces as (
select unnest(string_to_array(
current_setting('temp_tablespaces'),',')
) as temp_tablespace
)
select tt.temp_tablespace,pg_tablespace_location(t.oid) as location,
-- t.spclocation as location , -- for 9.0 and 9.1
pg_tablespace_size(t.oid) as size -- 可以通过pg_tablespace_size(oid) ,pg_tablespace(name) 获取到表空间的大小
from temporary_tablespaces tt
join pg_tablespace t on t.spcname = tt.temp_tablespace
order by 1 ;
-- 如果temp_tablespace设置为空,则临时表放在和普通表相同的位置 ,通常存放在主数据库目录的pgsql_tmp目录下
-- 查询实例的主目录的临时目录
mydb=# select current_setting('data_directory') || '/base/pgsql_tmp';
?column?
----------------------------------
/postgres/10/data/base/pgsql_tmp
(1 row)
mydb=#
-- 数据库使用的临时文件的总量,可以在pg_stat_database视图中查看到
select datname,temp_files,temp_bytes,stats_reset from pg_stat_database;
select sum(pg_total_relation_size(relid)) -- 旧版本的书上的查询方法
from pg_stat_all_tables
where schemaname like 'pg_%temp%';
mydb=# select sum(pg_total_relation_size(relid))
mydb-# from pg_stat_all_tables
mydb-# where schemaname like 'pg_%temp%';
sum
-------
16384
(1 row)
mydb=#
-- 查看表或者索引是否有膨胀(膨胀的原因是,大量的增删改,PG的运行机制,并没有彻底删除掉这些数据)
select pg_relation_size(relid) as tablesize,schemaname,relname,n_live_tup -- n_live_tup活着的行的估计数量,还记录增删改的行数
from pg_stat_user_tables
where relname = <tablename>;
-- 使用pgBadger来处理pg的log日志 (略)
--或者使用pg自己的方式来记录日志,然后使用egrep命令来查看相关的错误 (主要是查看FATAL和ERROR)
-bash-4.2$ egrep "FATAL|ERROR" postgresql-2021-02-03_134453.log
2021-02-03 13:45:37.572 CST [65512] ERROR: relation "pg_stat_statements" does not exist at character 15
2021-02-03 13:46:04.713 CST [65512] ERROR: relation "pg_stat_statements" does not exist at character 15
2021-02-03 13:47:15.839 CST [65512] ERROR: relation "pg_stat_statement" does not exist at character 15
2021-02-03 13:47:22.413 CST [65512] ERROR: relation "pg_stat_statements" does not exist at character 15
2021-02-03 13:53:44.961 CST [65512] ERROR: column "t" does not exist at character 116
-bash-4.2$
-- 分析查询的实时性能,通过pg_stat_statements模块,是个扩展,需要安装,安装后,就有pg_stat_statements表,否则没有的
pg_stat_statements模块提供一种方法追踪一个服务器所执行的所有 SQL 语句的执行统计信息。
该模块必须通过在postgresql.conf的shared_preload_libraries中增加pg_stat_statements来载入,
因为它需要额外的共享内存。这意味着增加或移除该模块需要一次服务器重启。
当pg_stat_statements被载入时,它会跟踪该服务器 的所有数据库的统计信息。该模块提供了
一个视图 pg_stat_statements以及函数pg_stat_statements_reset 和pg_stat_statements用于访问和
操纵这些统计信息。这些视图 和函数不是全局可用的,但是可以用CREATE EXTENSION
pg_stat_statements 为特定数据库启用它们。
-- 编辑pg的参数,编辑后需要重启pg库
#shared_preload_libraries = '' # (change requires restart)
shared_preload_libraries = 'pg_stat_statements'
-- 创建扩展
mydb=# create extension pg_stat_statements;
CREATE EXTENSION
mydb=#
-- 获取查询最频繁的查询 (pg_stat_statements视图里面有很多有用的统计,具体可以参考官方文档)
select query,calls from pg_stat_statements order by calls desc ; -- 按照被执行的次数排序 call 被执行的次数,
-- 获取最高平均执行时间的查询
select query,total_time/calls as avg,calls from pg_stat_statements order by 2 desc ;
END