gp sql

appendonly

-- drop table if exists test_appendonly;

-- create table test_appendonly with(appendonly=true, compresslevel=5) as

-- select generate_series(0, 1000) a, 'helloworld'::varchar(50) b

-- distributed by(a);

select oid from pg_class where relname='test_appendonly';

select oid, oid::regclass from pg_class where relname='test_appendonly' or relname like '%212266%';

-- \d pg_aoseg.pg_aoseg_212266

select * from get_ao_compression_ratio('test_appendonly');

select sum(eofuncompressed)/sum(eof) as compression_ratio from gp_dist_random('pg_aoseg.pg_aoseg_212266');

select * from get_ao_distribution('test_appendonly') order by segmentid;

select gp_segment_id, tupcount from gp_dist_random('pg_aoseg.pg_aoseg_212266') order by gp_segment_id;

drop table if exists public.table_info cascade;

create table public.table_info (

tablename text, -- 表名

subparname text, -- 分区各

tablecount bigint, -- 表的行数

tablesize bigint, -- 表大小

prettysize text, -- 格式化大小输出

max_div_avg float, -- 斜率,最大节点数据量/平均节点数据量

compression_ratio text -- 压缩率

);

-- 获取表信息

create or replace function public.get_table_info(tablename text) returns setof table_info as $$

def one_table_info(plpy, tablename, subparname, aosegname, privilege):
aosegsql = ""
# plpy.info(privilege)
if privilege == '1':
aosegsql = '''
select '%s' tablename, '%s' subparname,
coalesce(sum(tupcount)::bigint, 0) tablecount,
coalesce(sum(eof)::bigint, 0) tablesize,
pg_size_pretty(coalesce(sum(tupcount)::bigint, 0)) prettysize,
coalesce(max(tupcount)::bigint, 1)/(case when coalesce(avg(tupcount), 1.0) = 0 then 1
else coalesce(avg(tupcount), 1.0) end) max_div_avg,
coalesce(sum(eofuncompressed), 1)/(case when coalesce(sum(eof), 1.0) = 0 then 1
else coalesce(sum(eof), 1.0) end) compression_ratio
from gp_dist_random('%s');
'''%(tablename, subparname, aosegname)
else:
aosegsql = '''
select '%s' tablename, '%s' subparname,
0 tablecount, 0 tablesize, 'permission denied' prettysize,
0 max_div_avg, 0 compression_ratio;
'''%(tablename, subparname)
plpy.info(aosegsql)
result_rv=plpy.execute(aosegsql)
# plpy.info(result_rv[0]);
return result_rv[0] try:
table_name = tablename.lower().split('.')[1]
table_schema = tablename.lower().split('.')[0]
except(IndexError):
plpy.error('Please in put "tableschema.table_name"') # check version of database
check_version_sql = """
select substring(version(), 'Database (.*) build') as version;
"""
rv = plpy.execute(check_version_sql)
version = rv[0]['version']
plpy.execute("set enable_seqscan=off") # get table oid
get_table_oid = ''
if version > '3.4.0':
get_table_oid = """
select a.oid, reloptions, b.segrelid, regclass2text(b.segrelid::regclass) aosegname, relstorage,
case has_table_privilege(user, b.segrelid, 'select') when 't' then '1' else '0' end privilege
from pg_class a left join pg_appendonly b on a.oid=b.relid where a.oid='%s'::regclass;
"""%(tablename)
else:
get_table_oid = """
select oid, reloptions, relaosegrelid, regclass2text(relaosegrelid::regclass) aosegname, relstorage,
case has_table_privilege(user, relaosegrelid, 'select') when 't' then '1' else '0' end privilege
from pg_class where oid='%s'::regclass;
"""%(tablename) try:
rv_oid = plpy.execute(get_table_oid, 5)
if not rv_oid:
plpy.error('Did not find any relation named "' + tablename + '".')
except (Error):
plpy.error('Did not find any relation named "' + tablename + '".') #
table_oid = rv_oid[0]['oid']
if rv_oid[0]['relstorage'] != 'a':
plpy.error(tablename + ' is not appendonly table, this function only support appendonly talbe')
# plpy.info('table_oid') # check if table is partitin table
check_par_table="select count(*) from pg_partition where parrelid=%s"%(table_oid)
if version > '3.4.0':
tablecount_sql = """
select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pa.segrelid,
regclass2text(pa.segrelid::regclass) aosegname,
case has_table_privilege(user, pa.segrelid, 'select') when 't' then '1' else '0' end privilege
from pg_partition pp, pg_partition_rule prl, pg_appendonly pa
where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pa.relid=prl.parchildrelid
order by prl.parruleord;
"""%(table_oid)
else:
tablecount_sql = """
select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pc.relaosegrelid,
regclass2text(pc.relaosegrelid::regclass) aosegname,
case has_table_privilege(user, pc.relaosegrelid, 'select') when 't' then '1' else '0' end privilege
from pg_partition pp, pg_partition_rule prl, pg_class pc
where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pc.oid=prl.parchildrelid
and relaosegrelid <> 0 order by prl.parruleord;
"""%(table_oid) rv = plpy.execute(check_par_table) if rv[0]['count'] == 1:
al = plpy.execute(tablecount_sql)
result_rv = []
rv_tmp = [] totalcount = 0
totalsize = 0
unzipsize = 0
compression_ratio = 1
for i in al:
rv_ao = one_table_info(plpy, tablename, i['parname'], i['aosegname'], str(i['privilege']))
rv_tmp.append(rv_ao)
totalsize = totalsize + rv_ao['tablesize']
totalcount = totalcount + rv_ao['tablecount']
unzipsize = unzipsize + rv_ao['tablesize'] * rv_ao['compression_ratio']
if totalsize == 0:
compression_total = 1
else:
compression_ratio = unzipsize/totalsize
total_count_sql = """
select '%s' as tablename, '###ALL###' as subparname, %d as tablecount, %d as tablesize,
pg_size_pretty(%d::bigint) prettysize, null as max_div_avg, %f as compression_ratio;
"""%(tablename, totalcount, totalsize, totalsize, compression_ratio)
a2 = plpy.execute(total_count_sql)
result_rv.append(a2[0]) plpy.info('===' + total_count_sql) for i in rv_tmp:
result_rv.append(i)
return result_rv;
else:
result_rv = []
rv_ao = one_table_info(plpy, tablename, '', rv_oid[0]['aosegname'], str(rv_oid[0]['privilege']));
result_rv.append(rv_ao)
return result_rv

$$ language plpythonu;

select * from get_table_info('public.test_appendonly');

select get_table_info('public.test_appendonly');

hostname

-- create language plpythonu ;

create or replace function public.hostname() returns text as $$

import socket;

return socket.gethostname();

$$ language plpythonu;

create or replace function public.reverse(str text) returns text as $$

if str != None:

return str[::-1]

else:

return None

$$ language plpythonu;

create or replace function public.json_parse(data text) returns text as $$

import json

try:

mydata = json.loads(data)

except:

return ['Parse json error']

returndata = []

try:

for people in mydata['people']:

returndata.append(people['firstName'] + ' ' + people['lastName'])

except:

return ['Parse json error']

return returndata

$$ language plpythonu;

select hostname();

select hostname() from gp_dist_random('gp_id');

select gp_segment_id, count(1) from gp_dist_random('pg_class') group by 1 order by 1;

select * from gp_configuration;

select * from gp_segment_configuration;

select * from pg_filespace_entry;

select * from gp_configuration_history;

-- 判断某个表是否是分区表

select count(1) from pg_partition where parrelid='public.tb_partition_list_yyyymmdd'::regclass;

select * from pg_partition_rule;

drop view public.v_pg_partitions;

create view public.v_pg_partitions as

select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,

case

when pp.parkind='h'::"char" then 'hash'::text

when pp.parkind='r'::"char" then 'range'::text

when pp.parkind='l'::"char" then 'list'::text

else NULL::text

end as partitiontype,

case

when pg_get_expr(prl.parrangeend, prl.parchildrelid) = ''

then pg_get_expr(prl.parlistvalues, prl.parchildrelid)

else pg_get_expr(prl.parrangeend, prl.parchildrelid)

end as HIGH_VALUE,

pg_get_partition_rule_def(prl.oid, true) as partitionboundary,

prl.parruleord as partitionposition

from pg_partition pp, pg_partition_rule prl

where pp.paristemplate = false and prl.paroid=pp.oid;

select * from public.v_pg_partitions where tableoid='tb_partition_list_yyyymmdd'::regclass order by partitionposition;

partition

drop table if exists public.tb_partition_range_yyyymmdd cascade;

create table public.tb_partition_range_yyyymmdd (

id numeric,

yyyymmdd date

) with(appendonly=true, compresslevel=5)

distributed by(id)

partition by range(yyyymmdd)

(

partition p20120811 start ('2012-08-11'::date) end ('2012-08-12'::date)

-- partition p20120812 start ('2012-08-12'::date) end ('2012-08-13'::date)

);

drop table if exists public.tb_partition_list_yyyymmdd cascade;

create table public.tb_partition_list_yyyymmdd (

id numeric,

yyyymmdd varchar(128)

) with(appendonly=true, compresslevel=5)

distributed by(id)

partition by list(yyyymmdd)

(

partition p20120811 values('20120811'),

partition p20120812 values('20120812')

);

drop view if exists public.v_pg_add_partitions cascade;

create view public.v_pg_add_partitions as

select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,

case

when pp.parkind='h'::"char" then 'hash'::text

when pp.parkind='r'::"char" then 'range'::text

when pp.parkind='l'::"char" then 'list'::text

else NULL::text

end as partitiontype,

translate(pg_get_expr(prl.parlistvalues, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', '') as partitionlistvalue,

substring(translate(pg_get_expr(prl.parrangestart, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangestart,

substring(translate(pg_get_expr(prl.parrangeend, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangeend,

prl.parruleord as partitionposition,

substring(parlistvalues, 'consttype ([0-9]+)')::integer::regtype listtype,

substring(parrangeend, 'consttype ([0-9]+)')::integer::regtype rangetype

from pg_partition pp, pg_partition_rule prl where pp.paristemplate=false and prl.paroid=pp.oid;

create or replace function public.add_partition_info(tableoid oid, days_from_now integer) returns setof text as $$

import datetime

def now():
d = datetime.datetime.now()
format = '%Y%m%d'
return datetime.datetime.strftime(d, format) def add_day(d, n):
format = '%Y%m%d'
d2 = datetime.datetime.strptime(d, format)
d3 = d2 + datetime.timedelta(days = n)
return datetime.datetime.strftime(d3, format) def add_month(d, n):
format = '%Y%m%d'
formatymd = '%Y%m01'
if d.__len__() == 6:
format = '%Y%m'
formatymd = '%Y%m'
d2 = datetime.datetime.strptime(d, format)
d3 = d2 + datetime.timedelta(days = 31 * n)
return datetime.datetime.strftime(d3, formatymd) relist = [] # pre_value 是上一个分区的值,主要是 list 分区时使用
sql = """select *, tableoid::regclass tablename, lead(case when partitionrangeend <> '' then partitionrangeend
else partitionlistvalue end) over(partition by tableoid order by partitionposition desc) as pre_value,
row_number() over(partition by tableoid order by partitionposition desc) rn
from v_pg_add_partitions where substr(partitionname, 1, 3) = 'p20' and tableoid=%s;"""%(tableoid)
rv = plpy.execute(sql);
sql_relation = "select array_to_string(reloptions, ',') reloptions from pg_class where oid=%s"%(tableoid)
rv_relation = plpy.execute(sql_relation) if rv.nrows() == -1:
return []
else:
reloptions = rv_relation[0]['reloptions']
tablename = rv[0]['tablename']
partitiontype = rv[0]['partitiontype']
partitionname = rv[0]['partitionname']
pre_value = rv[0]['pre_value']
now_add_7days = add_day(now(), days_from_now) # 处理 range 分区
if partitiontype == 'range':
rangetype = rv[0]['rangetype']
partitionrangestart = rv[0]['partitionrangestart']
partitionrangeend = rv[0]['partitionrangeend']
interval = int(partitionrangeend) - int(partitionrangestart) # 按月分区
if partitionname.__len__() == 7:
func_add = add_month
interval = int(partitionrangeend[0:6]) - int(partitionrangestart[0:6]) # 按天分区
elif partitionname.__len__() == 9:
func_add = add_day # 分区名不规范,不处理
else:
return [] partitionrangestart = now()
while partitionrangestart < now_add_7days:
partitionrangeend = func_add(partitionrangestart, )
partitionname = 'p' + partitionrangestart
add_sql = "alter table %s add partition %s start ('%s'::%s) end ('%s'::%s)"%(tablename, partitionname, partitionrangestart, rangetype, partitionrangeend, rangetype) if reloptions != None and reloptions != '':
add_sql += 'with(%s);'%(reloptions)
else:
add_sql += ';'
plpy.execute(add_sql);
relist.append(add_sql)
partitionrangestart = func_add(partitionrangestart, interval) # 处理 list 分区
if partitiontype == 'list':
listtype = rv[0]['listtype']
partitionlistvalue = rv[0]['partitionlistvalue']
interval = int(partitionlistvalue) - int(pre_value) # 按月分区
if partitionname.__len__() == 7:
func_add = add_month # 按天分区
elif partitionname.__len__() == 9:
func_add = add_day # 分区名不规范,不处理
else:
return [] partitionlistvalue = now()
while partitionlistvalue < now_add_7days:
partitionname = 'p' + partitionlistvalue
add_sql = "alter table %s add partition %s values('%s'::%s)"%(tablename, partitionname, partitionlistvalue, listtype) if reloptions != None and reloptions != '':
add_sql += 'with(%s);'%(reloptions)
else:
add_sql += ';'
plpy.execute(add_sql);
relist.append(add_sql)
partitionlistvalue = func_add(partitionlistvalue, interval)
return relist

$$ language plpythonu;

select add_partition_info('tb_partition_list_yyyymmdd'::regclass, 1);

select 'grant select on ' || nspname || '.' || relname || ' to gpadmin;' from pg_class a, pg_namespace b

where relname not like '%_1_prt%' and relkind='r' and has_table_privilege('gpadmin', a.oid, 'select')='f'

and a.relnamespace=b.oid and nspname not in ('pg_catalog', 'information_schema') and nspname not like '%pg_tmp%';

view

-- 1.获取表的字段信息,表名pg_class,schema在pg_namespace,字段信息pg_attribute

select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a,

(

select c.oid from pg_catalog.pg_class c left join pg_catalog.pg_namespace n on n.oid=c.relnamespace

where c.relname='pg_class' and n.nspname='pg_catalog'

) b

where a.attrelid=b.oid and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 1.1 获取表的字段信息

select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a

where a.attrelid='pg_catalog.pg_class'::regclass and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 2. 获取表的分布键gp_distribution_policy中

-- 2.1 localoid与pg_class的oid关联,attrnums是一个数组,记录字段attnum,与pg_attribute中的attnum关联

drop table if exists public.cxfa2 cascade;

create table public.cxfa2(a int, b int, c int, d int) distributed by(c, a);

select * from gp_distribution_policy where localoid='cxfa2'::regclass;

select * from gp_distribution_policy a, (select generate_series(1, 10)) i (i), pg_attribute b

where a.attrnums[i.i] is not null and a.localoid=b.attrelid and a.attrnums[i.i]=b.attnum

and a.localoid='public.cxfa2'::regclass order by i.i;

-- 3. 获取建表的时间

select * from pg_stat_file('pg_hba.conf');

drop type if exists public.stat_file cascade;

create type public.stat_file as (

size bigint,

access timestamp(0),

modification timestamp(0),

change timestamp(0)

);

create or replace function public.get_file_stat(filename text) returns stat_file as $$

import os, time

size = None

access = None

modification = None

change = None

try:
a = os.stat(filename)
size = int(a.st_size)
access = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_atime))
modification = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_mtime))
change = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_ctime))
except Exception, e:
pass
return [size, access, modification, change]

$$ language plpythonu;

select * from get_file_stat('pg_hba.conf');

drop view if exists public.v_table_modify_time cascade;

create view public.v_table_modify_time as

select tab_oid, schemaname, tablename, (filestat).access, (filestat).modification, (filestat).change

from

(

select a.oid tab_oid, e.nspname as schemaname, a.relname as tablename,

get_file_stat(fselocation || '/' ||

case when reltablespace=1664 then 'global'

when reltablespace=0 then 'base' || '/' || d.oid

else reltablespace || '/' || d.oid

end

|| '/' || relfilenode) as filestat

from pg_class a, pg_tablespace b, pg_filespace_entry c, pg_namespace e, pg_database d

where d.datname=current_database()

and (case when a.reltablespace = 0 then 1663 else a.reltablespace end)=b.oid

and b.spcfsoid=c.fsefsoid

and e.oid=a.relnamespace

and c.fsedbid=1

and a.relstorage in ('a', 'h')

and a.relkind='r'

) t;

select 'drop table ' || schemaname || '.' || tablename || ' cascade;'

from v_table_modify_time where access < now() - '1 days'::interval and tablename not like '%_1_prt_p%'

and schemaname='public' order by access;

-- 自定义类型转换

select castfunc::regprocedure from pg_cast where castsource='text'::regtype and casttarget='date'::regtype;

-- select '20180526'::date;

-- select date('20180526');

-- select date('2018-05-26');

-- select cast('2018-05-26' as date);

create or replace function public.regclass2text(a regclass) returns text as $$

return a;

$$ language plpythonu;

drop cast if exists (regclass as text) cascade;

create cast(regclass as text) with function regclass2text(a regclass);

select 57377::regclass::text;

create view v_gp_configuration as

select content

from gp_segment_configuration a, pg_filespace_entry b, pg_filespace create

where a.dbid=b.fsedbid and b.fsefsoid=c.oid and c.fsname='pg_system';

https://www.cnblogs.com/someblue/p/4225694.html

https://blog.csdn.net/menggudaoke/article/details/78843749

上一篇:卷积神经网络的一些经典网络(Lenet,AlexNet,VGG16,ResNet)


下一篇:利用远程服务器在docker容器搭建pyspider运行时出错的问题