以前解决这个问题,具体怎么解决的忘记了,这次有遇到同样的问题了,bing了好一段时间,这次记录下来以防止再次忘记
爬虫抓取的数据有四个字节,还是表情符号
使用的技术是 python2.7 mysql8.0 mysqldb
1. 创建数据表
CREATE TABLE `tests` ( `url_id` int NOT NULL AUTO_INCREMENT, `a1` varchar(520) DEFAULT NULL, `hashcode` varchar(520) DEFAULT NULL, PRIMARY KEY (`url_id`), UNIQUE KEY `hashcode` (`hashcode`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
2. 修改数据表
这一步很重要
alter table tests convert to character set utf8mb4 collate utf8mb4_0900_ai_ci;
3. 插入数据
def insert_data(dbName, data_dict): print "dbname",dbName """ # 插入数据到数据库中 # 新增功能是程序报 2003错误,插入数据停止5秒后重新执行插入操作 :param dbName: 数据表的名称 :param data_dict: 需要插入的一条数据,格式是字典格式。例如 {"author":"张三","pubTime":"2018-05-06"} :return: 插入成功返回1,失败返回0 """ data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(‘,)‘, ‘)‘) dbField = data_dict.keys() dbField2 = dbField dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("‘", ‘‘)
MysqlDataBase = {
‘db‘: ‘spider_2019‘,
‘USER‘: ‘root‘,
‘PASSWORD‘: ‘root‘,
‘HOST‘: "1.0.1.2",
‘PORT‘: 33006,
}
def insert_data(dbName, data_dict): print "dbname",dbName """ # 插入数据到数据库中 # 新增功能是程序报 2003错误,插入数据停止5秒后重新执行插入操作 :param dbName: 数据表的名称 :param data_dict: 需要插入的一条数据,格式是字典格式。例如 {"author":"张三","pubTime":"2018-05-06"} :return: 插入成功返回1,失败返回0 """ data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(‘,)‘, ‘)‘) dbField = data_dict.keys() dbField2 = dbField dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("‘", ‘‘) try: conn = MySQLdb.connect(host=MysqlDataBase[‘HOST‘], user=MysqlDataBase[‘USER‘], passwd=MysqlDataBase[‘PASSWORD‘],db=MysqlDataBase[‘db‘], charset="utf8",port= MysqlDataBase[‘PORT‘]) cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName, dbField, data_values) params = dataTuple cursor.execute(sql, params) conn.commit() cursor.close() conn.close() print "正在往 " + str(dbName) + " 数据单个插入数据, 插入成功" return "1" except Exception as e: print "正在往 " + str(dbName) + " 插入数据, 插入失败 请查找原因,失败原因是:", e a1 = ‘??挑战高薪等你来战??5险3金/房产销售/网络营销j‘