以下为生产环境的sqoop配置
一.登录Mysql,创建数据库和表,以及插入数据。
1.1创建数据库
create database mydb;
1.2创建表
use mydb; CREATE TABLE emp( id INT NOT NULL, name VARCHAR(100), deg VARCHAR(100), salary BIGINT, dept VARCHAR(50) ); CREATE TABLE empp( id INT NOT NULL, name VARCHAR(100), deg VARCHAR(100), salary BIGINT, dept VARCHAR(50) );
1.3插入数据
insert into emp values(1201,'gopal','manager','50000','TP'); insert into emp values(1202,'manisha','Proof reader','50000','TP'); insert into emp values(1203,'khalil','php dev','30000','AC'); insert into emp values(1204,'prasanth','php dev','30000','AC'); insert into emp values(1205,'kranthi','admin','20000','TP'); insert into empp values(12011,'gopal','manager','50000','TP'); insert into empp values(12022,'manisha','Proof reader','50000','TP'); insert into empp values(12033,'khalil','php dev','30000','AC'); insert into empp values(12044,'prasanth','php dev','30000','AC'); insert into empp values(12055,'kranthi','admin','20000','TP');
二.######sqoop import 从数据库导入到Hive
一般RDBMS的导出速度控制在60~80MB/s,每个 map 任务的处理速度5~10MB/s 估算,即 -m 参数一般设置4~8,表示启动 4~8 个map 任务并发抽取 ##最好不要超过6个
#sqoop 首先要kinit 才可以 如果是hive 要有hive的权限 如果是hdfs 要有acl的权限 不然无法写入和读取 kint user/user -kt /opt/user.keytab username='admin' psd='123456' table=('emp' 'empp') for mysql_table in ${table[@]} do sqoop import \ --connect jdbc:mysql://192.168.10.41:3307/mydb \ --username ${username} \ --password ${psd} \ --table ${mysql_table} \ --hive-import \ --hive-drop-import-delims \ --hive-database emp \ --hive-table mysql_${mysql_table} \ --fields-terminated-by '\t' \ --m 6 \ --delete-target-dir \ --null-string '\\N' \ --null-non-string '\\N' done
三.######sqoop export 从Hive导入到大数据
kint user/user -kt /opt/user.keytab username='admin' psd='123456' table=('emp' 'empp') for mysql_table in ${table[@]} do sqoop export \ --connect jdbc:mysql://192.168.10.40:3307/mydb \ --username ${username} \ --password ${psd} \ --table ${mysql_table} \ --export-dir '/user/hive/warehouse/mydb.db/'mysql_${mysql_table} \ --input-null-string '\\N' \ --input-null-non-string '\\N' \ --fields-terminated-by '\t' \ --input-fields-terminated-by '\t' \ --m 6 done