[20220104]in list 几种写法性能测试.txt
--//以前写过几种in list的写法,从来没有测试过这几种方法的性能测试看看.
1.环境:
SCOTT@book> @ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
create table job_times (sid number, time_ela number,method varchar2(20));
2.in list测试例子:
--//注:我的测试仅仅测试number类型列表,主要我们生产系统用的这类也最多,另外就是xmltable的字符列表估计比较麻烦.
--//也许下篇测试看看.
--//1.使用str2numlist,str2varlist函数,源代码在网上很容易找到.
CREATE OR REPLACE TYPE numtabletype AS TABLE OF NUMBER
/
CREATE OR REPLACE FUNCTION str2numlist (p_string IN VARCHAR2)
RETURN numtabletype
AS
v_str LONG DEFAULT p_string || ',';
v_n NUMBER;
v_data numtabletype := numtabletype ();
BEGIN
LOOP
v_n := TO_NUMBER (INSTR (v_str, ','));
EXIT WHEN (NVL (v_n, 0) = 0);
v_data.EXTEND;
v_data (v_data.COUNT) := LTRIM (RTRIM (SUBSTR (v_str, 1, v_n - 1)));
v_str := SUBSTR (v_str, v_n + 1);
END LOOP;
RETURN v_data;
END;
/
--//select * from table (cast(STR2NURLIST(:st2) as numtabletype));
--//2.使用xmltable,可能仅仅适合11g:
SQL> var a varchar2(60);
SQL> exec :a := '10,20';
PL/SQL procedure successfully completed.
SQL> select * from dept where deptno in (select (column_value).getnumberval() from xmltable(:a));
DEPTNO DNAME LOC
---------- -------------- -------------
10 ACCOUNTING NEW YORK
20 RESEARCH DALLAS
--//3.正则表达式例子:
SELECT *
FROM dept
WHERE deptno IN
( SELECT TO_NUMBER (REGEXP_SUBSTR ( '10,20' ,'[^,]+' ,1 ,LEVEL)) FROM DUAL CONNECT BY REGEXP_SUBSTR ( '10,20' ,'[^,]+' ,1 ,LEVEL) IS NOT NULL);
3.测试脚本:
$ seq -f "%-1.0f" 1e9 90000011 1e10|wc
100 100 1100
$ seq -f "%-1.0f" 1e9 90000011 1e10 | paste -sd',' >|aa.txt
$ cat m16.txt
set verify off
set linesize 32767
variable vmethod varchar2(20);
exec :vmethod := '&&2';
insert into job_times values ( sys_context ('userenv', 'sid') ,dbms_utility.get_time ,:vmethod) ;
commit ;
declare
v_string varchar2(4000);
l_count PLS_INTEGER;
begin
v_string := '1000000000,1090000011,...,9910001089';
for i in 1 .. &&1 loop
select count(*) into l_count from (select * from table (cast(str2numlist(v_string) as numtabletype)));
-- select count(*) into l_count from (select (column_value).getnumberval() from xmltable(v_string));
-- select count(*) into l_count from (select to_number (regexp_substr ( v_string ,'[^,]+' ,1 ,level)) from dual connect by regexp_substr ( v_string ,'[^,]+' ,1 ,level) is not null);
-- DBMS_OUTPUT.PUT_LINE (l_count);
end loop;
end ;
/
update job_times set time_ela = dbms_utility.get_time - time_ela where sid=sys_context ('userenv', 'sid') and method=:vmethod;
commit;
set linesize 270
quit
--//v_string 的值从前面的aa.txt复制过来,我截断了。
4.测试:
--//在测试开始前我猜测使用正则表达式最慢,使用函数应该最快。
$ zzdate ;sqlplus -s -l scott/book @m16.txt 1e6 str2numlist >/dev/null;zzdate
trunc(sysdate)+16/24+33/1440+03/86400 == 2022/01/04 16:33:03 == timestamp'2022-01-04 16:33:03'
trunc(sysdate)+16/24+40/1440+16/86400 == 2022/01/04 16:40:16 == timestamp'2022-01-04 16:40:16'
$ zzdate ;sqlplus -s -l scott/book @m16.txt 1e6 xmltable >/dev/null;zzdate
trunc(sysdate)+16/24+41/1440+00/86400 == 2022/01/04 16:41:00 == timestamp'2022-01-04 16:41:00'
trunc(sysdate)+17/24+12/1440+47/86400 == 2022/01/04 17:12:47 == timestamp'2022-01-04 17:12:47'
$ zzdate ;sqlplus -s -l scott/book @m16.txt 1e6 regexp_substr>/dev/null;zzdate
trunc(sysdate)+17/24+18/1440+50/86400 == 2022/01/04 17:18:50 == timestamp'2022-01-04 17:18:50'
trunc(sysdate)+00/24+43/1440+43/86400 == 2022/01/05 00:43:43 == timestamp'2022-01-05 00:43:43'
METHOD COUNT(*) ROUND(AVG(TIME_ELA),0) SUM(TIME_ELA)
-------------------- ---------- ---------------------- -------------
str2numlist 1 43286 43286
xmltable 1 190635 190635
regexp_substr 1 2668927 2668927
--//没有想到正则表达式执行时间有点夸张,可以明显看出使用函数str2numlist最快。
--//还可以看出正则表达式是一个很耗CPU资源的操作,一些语句即使出现在select部分,输出多条记录对CPU资源但是影响也会很大。