浅谈create table as 和 insert into select 复制表遇到的问题

1、 新建一个表,表结构与原表是相同的。建表语句:
CREATE TABLE huayu_test1 (
waybill_id bigint(20) NOT NULL AUTO_INCREMENT COMMENT '?????',
waybill_no varchar(20) NOT NULL COMMENT '???',
post_date datetime NOT NULL COMMENT '????????',
order_id varchar(50) DEFAULT NULL COMMENT '?????????',
batch_no varchar(50) DEFAULT NULL COMMENT '???',
biz_product_id varchar(20) NOT NULL COMMENT '??????',
biz_product_no varchar(20) NOT NULL COMMENT '??????',
biz_product_name varchar(50) NOT NULL COMMENT '??????',
sender_type varchar(20) DEFAULT NULL COMMENT '0 ?? 1????',
sender_id varchar(50) DEFAULT NULL COMMENT '??????',
sender_no varchar(50) DEFAULT NULL COMMENT '??????(??????)',
sender varchar(200) DEFAULT NULL COMMENT '??????',
sender_linker varchar(200) DEFAULT NULL,
sender_fixtel varchar(50) DEFAULT NULL COMMENT '??????',
sender_mobile varchar(50) DEFAULT NULL COMMENT '??????',
sender_addr varchar(200) DEFAULT NULL COMMENT '??????',
sender_country_no varchar(20) DEFAULT NULL COMMENT '??????',
sender_country_name varchar(50) DEFAULT NULL COMMENT '??????',
sender_province_no varchar(20) DEFAULT NULL COMMENT '??????',
sender_province_name varchar(50) DEFAULT NULL COMMENT '??????',
sender_city_no varchar(20) DEFAULT NULL COMMENT '??????',
sender_city_name varchar(50) DEFAULT NULL COMMENT '??????',
sender_county_no varchar(20) DEFAULT NULL COMMENT '??????',
sender_county_name varchar(50) DEFAULT NULL COMMENT '??????',
sender_notes varchar(1000) DEFAULT NULL COMMENT '????',
receiver_no varchar(50) DEFAULT NULL COMMENT '??????????????',
receiver varchar(200) DEFAULT NULL COMMENT '??????',
receiver_linker varchar(200) DEFAULT NULL,
receiver_fixtel varchar(50) DEFAULT NULL COMMENT '??????',
receiver_mobile varchar(50) DEFAULT NULL COMMENT '??????',
receiver_addr varchar(200) DEFAULT NULL COMMENT '??????',
receiver_country_no varchar(20) DEFAULT NULL COMMENT '??????',
receiver_country_name varchar(50) DEFAULT NULL COMMENT '??????',
receiver_province_no varchar(20) DEFAULT NULL COMMENT '??????',
receiver_province_name varchar(50) DEFAULT NULL COMMENT '??????',
receiver_city_no varchar(20) DEFAULT NULL COMMENT '??????',
receiver_city_name varchar(50) DEFAULT NULL COMMENT '??????',
receiver_county_no varchar(20) DEFAULT NULL COMMENT '??????',
receiver_county_name varchar(50) DEFAULT NULL COMMENT '??????',
insurance_flag char(1) DEFAULT NULL COMMENT '????????????1:?? 2:?? 3:??',
insurance_amount decimal(12,2) DEFAULT NULL COMMENT '??????',
pickup_type varchar(20) DEFAULT NULL COMMENT '?????0 ???????1 ??????',
payment_mode varchar(20) DEFAULT NULL COMMENT '????(???) 1:??? 2:??? 3:??? 4:??????? 5:?? 6:?/??? 7:???',
real_weight decimal(8,0) DEFAULT NULL COMMENT '????',
fee_weight decimal(8,0) DEFAULT NULL,
volume_weight decimal(8,0) DEFAULT NULL,
length decimal(8,0) DEFAULT NULL,
width decimal(8,0) DEFAULT NULL,
height decimal(8,0) DEFAULT NULL,
quantity int(11) DEFAULT NULL,
packaging varchar(20) DEFAULT NULL,
package_material varchar(20) DEFAULT NULL,
goods_desc varchar(200) DEFAULT NULL,
contents_quantity int(11) DEFAULT NULL,
cod_flag char(1) DEFAULT NULL,
cod_amount decimal(12,2) DEFAULT NULL,
receipt_flag char(1) DEFAULT NULL,
receipt_waybill_no varchar(20) DEFAULT NULL,
receipt_fee_amount decimal(12,2) DEFAULT NULL,
insurance_premium_amount decimal(12,2) DEFAULT NULL,
valuable_flag char(1) DEFAULT NULL,
cargo_total_price decimal(12,2) DEFAULT NULL,
cargo_total_purchasing_price decimal(12,2) DEFAULT NULL,
allow_fee_flag char(1) DEFAULT NULL,
is_feed_flag char(1) DEFAULT NULL,
manual_fee_type char(1) DEFAULT NULL,
fee_date datetime DEFAULT NULL,
discount_rate decimal(6,2) DEFAULT NULL,
settlement_mode varchar(20) DEFAULT NULL,
payment_state char(1) DEFAULT NULL,
payment_date datetime DEFAULT NULL,
payment_id varchar(50) DEFAULT NULL,
manage_org_code varchar(20) DEFAULT NULL,
postage_suite_code varchar(20) DEFAULT NULL,
fee_area_suite_code varchar(20) DEFAULT NULL,
fee_area_code varchar(20) DEFAULT NULL,
fee_area_name varchar(20) DEFAULT NULL,
is_advance_flag char(1) DEFAULT NULL,
deliver_type char(1) DEFAULT NULL,
deliver_sign varchar(50) DEFAULT NULL,
deliver_date char(1) DEFAULT NULL,
deliver_notes varchar(1000) DEFAULT NULL,
deliver_pre_date date DEFAULT NULL,
battery_flag char(1) DEFAULT NULL,
is_jinguan varchar(20) DEFAULT NULL,
workbench varchar(20) DEFAULT NULL,
electronic_preferential_no varchar(50) DEFAULT NULL,
electronic_preferential_amount decimal(12,2) DEFAULT NULL,
pickup_attribute char(1) DEFAULT NULL,
adjust_type varchar(20) DEFAULT NULL,
postage_revoke decimal(12,2) DEFAULT NULL,
print_flag char(1) DEFAULT NULL,
print_date datetime DEFAULT NULL,
print_times int(11) DEFAULT NULL,
declare_source varchar(20) DEFAULT NULL,
declare_type varchar(20) DEFAULT NULL,
declare_curr_code varchar(20) DEFAULT NULL,
create_user_name varchar(50) DEFAULT NULL,
modify_user_name varchar(50) DEFAULT NULL,
volume decimal(8,0) DEFAULT NULL COMMENT '??',
contents_type_no varchar(20) DEFAULT NULL COMMENT '??????',
contents_type_name varchar(200) DEFAULT NULL COMMENT '??????',
contents_weight decimal(8,0) DEFAULT NULL COMMENT '??????',
transfer_type varchar(20) DEFAULT NULL COMMENT '????',
postage_total decimal(12,2) NOT NULL COMMENT '???=????+????',
postage_standard decimal(12,2) DEFAULT NULL COMMENT '????',
postage_paid decimal(12,2) DEFAULT NULL COMMENT '????',
postage_other decimal(12,2) DEFAULT NULL COMMENT '????',
is_deleted char(1) DEFAULT '0' COMMENT '?????n0??n1??',
create_user_id bigint(20) DEFAULT NULL COMMENT '???id',
gmt_created datetime DEFAULT NULL COMMENT '????',
modify_user_id bigint(20) DEFAULT NULL COMMENT '???id',
gmt_modified datetime DEFAULT NULL COMMENT '????',
reserved1 bigint(20) DEFAULT NULL COMMENT '????1',
reserved2 bigint(20) DEFAULT NULL COMMENT '????2',
reserved3 bigint(20) DEFAULT NULL COMMENT '????3',
reserved4 varchar(200) DEFAULT NULL COMMENT '????4',
reserved5 varchar(200) DEFAULT NULL COMMENT '????5',
reserved6 varchar(200) DEFAULT NULL COMMENT '????6',
reserved7 varchar(200) DEFAULT NULL COMMENT '????7',
reserved8 varchar(200) DEFAULT NULL COMMENT '????8',
reserved9 datetime DEFAULT NULL COMMENT '????9',
reserved10 text COMMENT '????10',
logistics_order_no bigint(50) DEFAULT NULL,
inner_channel varchar(20) DEFAULT NULL,
base_product_id bigint(20) DEFAULT NULL,
base_product_no varchar(20) DEFAULT NULL,
base_product_name varchar(20) DEFAULT NULL,
is_special_marketing char(1) DEFAULT NULL,
product_type varchar(20) DEFAULT NULL,
biz_product_type varchar(20) DEFAULT NULL,
product_reach_area char(1) DEFAULT NULL,
contents_attribute char(1) DEFAULT NULL,
contents_cargo_no varchar(1000) DEFAULT NULL,
cmd_code varchar(20) DEFAULT NULL,
manual_charge_reason varchar(200) DEFAULT NULL,
time_limit char(1) DEFAULT NULL,
io_type varchar(20) DEFAULT NULL,
ecommerce_no varchar(20) DEFAULT NULL,
waybill_type varchar(20) DEFAULT NULL,
pre_waybill_no varchar(50) DEFAULT NULL,
post_batch_id varchar(50) DEFAULT NULL,
biz_occur_date datetime DEFAULT NULL,
post_org_id bigint(20) DEFAULT NULL,
post_org_no varchar(50) DEFAULT NULL,
org_drds_code varchar(50) DEFAULT NULL,
post_org_simple_name varchar(50) DEFAULT NULL,
post_org_product_name varchar(20) DEFAULT NULL,
post_person_id bigint(20) DEFAULT NULL,
post_person_no varchar(50) DEFAULT NULL,
post_person_name varchar(50) DEFAULT NULL,
post_person_mobile varchar(50) DEFAULT NULL,
sender_warehouse_id bigint(20) DEFAULT NULL,
sender_warehouse_name varchar(200) DEFAULT NULL,
sender_safety_code varchar(50) DEFAULT NULL,
sender_im_type varchar(20) DEFAULT NULL,
sender_im_id varchar(50) DEFAULT NULL,
sender_id_type varchar(20) DEFAULT NULL,
sender_id_no varchar(50) DEFAULT NULL,
sender_id_encrypted_code varchar(50) DEFAULT NULL,
sender_agent_id_type varchar(20) DEFAULT NULL,
sender_agent_id_no varchar(50) DEFAULT NULL,
sender_id_encrypted_code_agent varchar(50) DEFAULT NULL,
sender_addr_additional varchar(200) DEFAULT NULL,
sender_district_no varchar(20) DEFAULT NULL,
sender_postcode varchar(20) DEFAULT NULL,
sender_gis varchar(20) DEFAULT NULL,
registered_customer_no varchar(50) DEFAULT NULL,
receiver_type char(1) DEFAULT NULL,
receiver_id bigint(20) DEFAULT NULL,
receiver_warehouse_id bigint(20) DEFAULT NULL,
receiver_warehouse_name varchar(200) DEFAULT NULL,
receiver_safety_code varchar(50) DEFAULT NULL,
receiver_im_type varchar(20) DEFAULT NULL,
receiver_im_id varchar(50) DEFAULT NULL,
receiver_addr_additional varchar(200) DEFAULT NULL,
receiver_district_no varchar(20) DEFAULT NULL,
receiver_postcode varchar(20) DEFAULT NULL,
receiver_gis varchar(20) DEFAULT NULL,
receiver_notes varchar(1000) DEFAULT NULL,
customer_manager_id bigint(20) DEFAULT NULL,
customer_manager_no varchar(50) DEFAULT NULL,
customer_manager_name varchar(50) DEFAULT NULL,
salesman_id bigint(20) DEFAULT NULL,
salesman_no varchar(50) DEFAULT NULL,
salesman_name varchar(50) DEFAULT NULL,
order_weight decimal(8,0) DEFAULT NULL,
post_org_name varchar(200) DEFAULT NULL,
PRIMARY KEY (waybill_id),
KEY auto_shard_key_post_date (post_date),
KEY auto_shard_key_waybill_no (waybill_no)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='???????';
2、 原表数据量

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='qps_waybill_base_02';
concat(round(sum(data_length/1024/1024/1024),3),'G')
18.636G

1 row in set (0.00 sec)

3、 执行命令insert into 复制表
mysql> insert into huayu_test1 select * from qps_waybill_base_02;
Query OK, 15861881 rows affected (16 min 8.37 sec)
Records: 15861881 Duplicates: 0 Warnings: 0

数据量:

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_test1';
concat(round(sum(data_length/1024/1024/1024),3),'G')
9.174G

1 row in set (0.00 sec)

4、 执行命令create table as select 复制表
mysql> create table huayu_test2 as select * from qps_waybill_base_02;
Query OK, 15861881 rows affected (14 min 44.98 sec)
Records: 15861881 Duplicates: 0 Warnings: 0

数据量

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_test2';
concat(round(sum(data_length/1024/1024/1024),3),'G')
9.818G

1 row in set (0.00 sec)

5、数据行数

mysql> select count(*) from huayu_test2;
count(*)
15861881

1 row in set (4.12 sec)

mysql> select count(*) from huayu_test1;
count(*)
15861881

1 row in set (7.00 sec)

mysql>

.ibd文件大小对比

ll -h

total 44G
-rw-rw---- 1 mysql mysql 61 May 3 2017 db.opt
-rw-rw---- 1 mysql mysql 51K Feb 2 09:33 huayu_test1.frm
-rw-rw---- 1 mysql mysql 12G Feb 2 09:49 huayu_test1.ibd
-rw-rw---- 1 mysql mysql 52K Feb 2 09:59 huayu_test2.frm
-rw-rw---- 1 mysql mysql 11G Feb 2 10:13 huayu_test2.ibd

6、由于源表是通过inert into select 将实际的生产上的表拼接起来的,现在以新建的huayu_test1为源表进行测试,这个表是通过inert into select 源表 新建的表,数据量如下分别为insert into select 、 create table as select 和 新的源表:

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_test1_2';
concat(round(sum(data_length/1024/1024/1024),3),'G')
9.472G

1 row in set (0.00 sec)

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_test1_1';
concat(round(sum(data_length/1024/1024/1024),3),'G')
9.364G

1 row in set (0.00 sec)

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_test1';
concat(round(sum(data_length/1024/1024/1024),3),'G')
9.174G

1 row in set (0.00 sec)

mysql>

从这里看,数据量的大小还是差不多的。
7、比较一下:
mysql> show table status like 'qps_waybill_base_02'G
1. row **

       Name: qps_waybill_base_02
     Engine: InnoDB
    Version: 10
 Row_format: Compact
       Rows: 15109587

Avg_row_length: 1324

Data_length: 20009975808

Max_data_length: 0
Index_length: 1240465408

  Data_free: 7340032

Auto_increment: 568964219

Create_time: 2018-01-24 09:42:57
Update_time: NULL
 Check_time: NULL
  Collation: utf8_general_ci
   Checksum: NULL

Create_options:

    Comment: ???????

Block_format: Original
1 row in set (0.00 sec)

mysql> show table status like 'huayu_test1'G
1. row **

       Name: huayu_test1
     Engine: InnoDB
    Version: 10
 Row_format: Compact
       Rows: 15412951

Avg_row_length: 639

Data_length: 9850322944

Max_data_length: 0
Index_length: 1268776960

  Data_free: 7340032

Auto_increment: 568964219

Create_time: 2018-02-02 09:33:40
Update_time: NULL
 Check_time: NULL
  Collation: utf8_general_ci
   Checksum: NULL

Create_options:

    Comment: ???????

Block_format: Original
1 row in set (0.01 sec)

mysql> show table status like 'huayu_test1_1'G
1. row **

       Name: huayu_test1_1
     Engine: InnoDB
    Version: 10
 Row_format: Compact
       Rows: 15310307

Avg_row_length: 656

Data_length: 10054795264

Max_data_length: 0
Index_length: 286179328

  Data_free: 4194304

Auto_increment: 15925006

Create_time: 2018-02-02 10:27:47
Update_time: NULL
 Check_time: NULL
  Collation: utf8_general_ci
   Checksum: NULL

Create_options:

    Comment: 

Block_format: Original
1 row in set (0.00 sec)

mysql> show table status like 'huayu_test1_2'G
1. row **

       Name: huayu_test1_2
     Engine: InnoDB
    Version: 10
 Row_format: Compact
       Rows: 14912979

Avg_row_length: 681

Data_length: 10170138624

Max_data_length: 0
Index_length: 1307574272

  Data_free: 7340032

Auto_increment: 568964219

Create_time: 2018-02-02 10:42:09
Update_time: NULL
 Check_time: NULL
  Collation: utf8_general_ci
   Checksum: NULL

Create_options:

    Comment: ???????

Block_format: Original
1 row in set (0.00 sec)
8、在源表执行了optimize table 之后,数据变成了10G左右,跟复制后的表的数据相近了

mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='qps_waybill_base_02';
concat(round(sum(data_length/1024/1024/1024),3),'G')
10.041G

1 row in set (0.00 sec)
9、初步结论

1、Create table as 比 insert into select 的速度更快,但是经实验创建表后的数据大小insert into select占用的数据量比较小。Create 是ddl语句,insert 是dml语句,insert的时候每条语句都会产生对应的redo和undo日志,所以相对create 语句是慢一些的。
官方文档说明:Create table as 语句不会复制原表的索引,如果想要复制索引的话,需要指定索引
mysql> CREATE TABLE bar (UNIQUE (n)) SELECT n FROM foo;

还有可能会发生数据类型的转换。例如,auto_increment列的属性不会保留,varchar列会转换成char列,
但是原表的数据量相比create 和 insert 复制后的表为什么会这么大,而重新复制一个相同的原表后,用新的原表测试后create 和insert 复制后的数据量就差不多了。怀疑是生产上的原表可能有一些多余的东西,或者表在合并的过程中有空隙只是占用表空间,而没有存储数据 。
2、Optimize table:InnoDB表上进行大量插入、更新或删除操作之后。ibd文件,因为它是通过启用innodb_file_per_table选项创建的。重新组织了表和索引,操作系统可以回收磁盘空间。

在delete 很大空间之后,这个空间不会被立即删除,而是等待新的插入的数据进行填充,后续的insert 数据会占用旧的位置,使用OPTIMIZE TABLE来重新利用未使用的空间,并整理数据文件的碎片,这个是针对myisam表和archive表的。

对于InnoDB表,Optimize table映射到ALTER TABLE…FORCE,它重新构建表以更新索引统计数据和聚集索引中的空闲未使用空间。在InnoDB表上运行时,optimize table 的时候会报这样的信息:Table does not support optimize, doing recreate + analyze instead。 会自动重建一个表再使用analyze命令进行优化
Analyse table是对表的索引分布进行分析,优化表的索引的性能

上一篇:RDS实例的性能测试报告----基础总结篇


下一篇:MongoDB位置查询内存使用优化