sql 批量查询数据问题处理

从数据库中获取数据

SELECT
    re.tenant_id as adm_div_code,
    re.pa_year, 
  re.pa_bt_bt_no nt_pa_vo_id, 
    #re.pa_bt_bt_no, 
  pi.created_date AS bi_date,
    re.accept_bt_code , 
    ''  as exec_bt_name, 
  '' as bt_fax_proj_code,
    tt.bt_fax_pro_code as bt_fax_code,
    tt.bt_fax_pro_name  as  bt_fax_name,
    tt.bt_type as bt_in_category_code,
    ''  as bt_in_category_name,
    tt.in_sort_code as in_sort_code,
   '' as in_sort_name,
    re.bi_no as bt_fax_pa_no,
  re.pa_bt_name  as act_payer_name, 
  '' as charge_stand_name,
  re.receivable_pb_am as pb_am, 
  re.ac_pb_am as pbd_am,
  p.qty  as pa_number,
  re.trade_time  as pa_date#
FROM
    bt_pa_red re
LEFT JOIN bt_pa_pro_be p ON re.bid = p.ref_pa_record_bid  and p.is_normal='1'
LEFT JOIN nt_prd tt ON tt.bid = p.ref_project_bid  and  tt.is_normal='1'
LEFT JOIN bt_bi pi  ON  pi.bi_no = re.bi_no   and pi.is_normal='1'
where re.is_normal='1'   ;

 

可以看出从bt_pa_red查询联表bt_pa_pro_be,nt_prd, bt_bi。
三张表数据量:
bt_bi 2900461
bt_pa_pro_be 084380
bt_pa_red 2970776
nt_prd 2370

实际查询过程中,执行查询的机器64G内存都不够用,无法查询出想要的数据。


使用python逐行读取bt_pa_red表,再按sql中jion条件去获取数据
在写代码执行发现某一行数据重复很多,重复量达到289052。找到有问题的数据行调试,原来bt_pa_pro_be 表中bi_no 列存在空值,导致匹配数据量很大。
python代码加判断 if bt_fax_pa_no is not None and len(bt_fax_pa_no)>0: python在执行没有重复数据。同时把sql修改条件,最后where 增加 and length(bi_no)>2,sql 也就能查询,最终查询出300万行数据


python 最终代码

#encoding: utf-8
#!/bin/python3

import sys
print(sys.path)
import pymysql
import time

# sql 插入完整版
conn = pymysql.connect(host="127.0.0.1",user="bt",password="123456",db="bt",port=3306)
db1 = pymysql.connect(host="127.0.0.1",user="bt",password="123456",db="bt",port=3306)
db2 = pymysql.connect(host="127.0.0.1",user="bt",password="123456",db="bt",port=3306)
db3 = pymysql.connect(host="127.0.0.1",user="bt",password="123456",db="bt",port=3306)
db4 = pymysql.connect(host="127.0.0.1",user="bt",password="123456",db="bt",port=3306)


if __name__ == "__main__":
    print(time.asctime( time.localtime(time.time()) ))
    cursor = pymysql.cursors.SSCursor(conn)
    cursor.execute("select * from bt_pa_red where is_normal='1' and  trade_time >='2021-01-01 00:00:00'  and trade_time <='2021-06-30 23:59:59'")
    count=0
    while True:
        count=count+1  #计数,放在最开始
        row = cursor.fetchone()
        if not row:
            break
        adm_div_code = row[9]
        pa_year=row[10]
        nt_pa_vo_id=row[17]
        bi_date=None
        accept_bt_code=row[12]
        exec_bt_name=""
        bt_fax_proj_code=""
        bt_fax_code=None
        bt_fax_name=None
        bt_in_category_code=None
        bt_in_category_name=""
        in_sort_code=None
        in_sort_name=""
        bt_fax_pa_no=row[22]
        act_payer_name=row[23]
        charge_stand_name=""
        pb_am=row[18]
        pbd_am=row[31]
        pa_number=None
        pa_date=row[33]

        #联表 bt_pa_pro_be
        rebid = row[11]
        bt_pa_pro_be_sql  = "select * from bt_pa_pro_be where ref_pa_record_bid='"+rebid+"'  and is_normal='1'"
        cursor_bt_pa_pro_be=db1.cursor()
        cursor_bt_pa_pro_be.execute(bt_pa_pro_be_sql)
        bt_pa_pro_be_all_data = cursor_bt_pa_pro_be.fetchall()

        for detail_item in bt_pa_pro_be_all_data:
            pa_number = detail_item[15]
            detail_ref_project_bid = detail_item[13]

            #开始处理 bt_bi 表
            if bt_fax_pa_no is not None and len(bt_fax_pa_no)>0:   #增加长度判断 去掉重复
                bt_bi_sql = "select * from bt_bi where bi_no = '"+bt_fax_pa_no+"' and is_normal='1'"
               # print(bt_bi_sql)
                bt_bi_cursor=db2.cursor()
                bt_bi_cursor.execute(bt_bi_sql)
                bt_bi_all_data=bt_bi_cursor.fetchall()
                for bt_bi_item in bt_bi_all_data:
                    bi_date = bt_bi_item[3]

                    #开始处理nt_prd
                    nt_prd_sql ="select * from nt_prd where bid ='"+detail_ref_project_bid+"' and is_normal='1'"
                    nt_prd_cursor=db3.cursor()
                    nt_prd_cursor.execute(nt_prd_sql)
                    nt_prd_all_data=nt_prd_cursor.fetchall()
                    for nt_prd_item in nt_prd_all_data:
                        bt_fax_code =nt_prd_item[12]
                        bt_fax_name=nt_prd_item[13]
                        bt_in_category_code=nt_prd_item[14]
                        in_sort_code=nt_prd_item[16]

                        insert_sql= """insert into p_result values("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")""".format(adm_div_code,adm_div_name,pa_year,nt_pa_vo_id,bi_date,accept_bt_code,exec_bt_name,bt_fax_proj_code,bt_fax_code,bt_fax_name,bt_in_category_code,bt_in_category_name,in_sort_code,in_sort_name,bt_fax_pa_no,act_payer_name,charge_stand_name,pb_am,pbd_am,pa_number,pa_date)
                        content = "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(adm_div_code,adm_div_name,pa_year,nt_pa_vo_id,bi_date,accept_bt_code,exec_bt_name,bt_fax_proj_code,bt_fax_code,bt_fax_name,bt_in_category_code,bt_in_category_name,in_sort_code,in_sort_name,bt_fax_pa_no,act_payer_name,charge_stand_name,pb_am,pbd_am,pa_number,pa_date)
                       # print(content)
                        # 写入文件内容
                        fout = open('result', 'a+', encoding='utf8')
                        fout.write(content)
                        fout.close()

                        #print(insert_sql)
                        insert_cursor = db4.cursor()
                        insert_cursor.execute(insert_sql)
                        db4.commit()

    print(time.asctime( time.localtime(time.time()) ))

 

上一篇:99、静态类型和动态类型,静态绑定和动态绑定的介绍


下一篇:链表上基本操作的实现