/** * # +======================================================================== * # | - @name 抓取全国统计用区划代码和城乡划分代码 * # | - @author cq <just_leaf@foxmail.com> * # | - @copyright zmtek 2019-12-26 * # +------------------------------------------------------------------------ * # | - 1.http://www.stats.gov.cn/ - 国家统计局官网 * # +======================================================================== */ public function getcity() { header("Content-type: text/html; charset=gb2312"); # 超时设置 set_time_limit(0); $index = file_get_contents("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html"); $url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/'; error_reporting(0); $prov = array( array(), array(11, 12, 13, 14, 15, 21, 22, 23, 31, 32, 33, 34, 35, 36, 37, 41, 42, 43, 44, 45, 46, 50, 51, 52, 53, 54, 61, 62, 63, 64, 65), array('北京市', '天津市', '河北省', '山西省', '内蒙古自治区', '辽宁省', '吉林省', '黑龙江省', '上海市', '江苏省', '浙江省', '安徽省', '福建省', '江西省', '山东省', '河南省', '湖北省', '湖南省', '广东省', '广西壮族自治区', '海南省', '重庆市', '四川省', '贵州省', '云南省', '*自治区', '陕西省', '甘肃省', '青海省', '宁夏回族自治区', '**自治区', ), ); $matches = $prov; $i = 0; for ($i = 0, $e = count($matches[1]); $i < $e; $i++) { preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $matche); for ($a = 0, $b = count($matche[1]); $a < $b; $a++) { $index = file_get_contents($url . $matches[1][$i] . '/' . $matche[1][$a] . '.html'); preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $match); for ($c = 0, $d = count($match[1]); $c < $d; $c++) { //补0处理 $provinces_id = str_pad($matches[1][$i], 12, "0", STR_PAD_RIGHT); $province_name = iconv("GB2312", "UTF-8", $matches[2][$i]); $city_id = str_pad($matche[1][$a], 12, "0", STR_PAD_RIGHT); $city_name = iconv("GB2312", "UTF-8", $matche[2][$a]); $aru = substr($matche[1][$a], 2, 2); $index = file_get_contents($url . $matches[1][$i] . '/' . $aru . '/' . $match[1][$c] . '.html'); preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $matc); //部分省市的html和大部分的不一样,重写规则 if(!$matc[0]) preg_match_all('/<td>(.{1,30})<\/td><td>\d{1,10}<\/td><td>(.{1,30})<\/td><\/tr>/', $index, $matc); for ($v = 0, $n = count($matc[1]); $v < $n; $v++) { $county_id = str_pad($match[1][$c], 12, "0", STR_PAD_RIGHT); $county_name= iconv("GB2312", "UTF-8", $match[2][$c]); $town_id = str_pad($matc[1][$v], 12, "0", STR_PAD_RIGHT); $town_name = iconv("GB2312", "UTF-8", $matc[2][$v]); $aru2 = substr($matche[1][$a],-2); $index = file_get_contents($url . $matches[1][$i] . '/' . $aru . '/' .$aru2 .'/'. $matc[1][$c] . '.html'); preg_match_all('/<a href=\'\d{2}\/(.{1,30}).html\'>(.{1,30})<\/a><\/td><\/tr>/', $index, $mat); //部分省市的html和大部分的不一样,重写规则 if(!$mat[0]) preg_match_all('/<td>(.{1,30})<\/td><td>\d{1,10}<\/td><td>(.{1,30})<\/td><\/tr>/', $index, $mat); if(!$mat[0]) { $add = array( 'province_id' => $provinces_id , 'province_name' => $province_name , 'city_id' => $city_id , 'city_name' => $city_name , 'county_id' => $county_id , 'county_name' => $county_name , 'town_id' => $town_id, 'town_name' => $town_name ); M('position') -> add($add); }else{ for($z = 0, $x = count($mat[1]); $z < $x; $z++){ $housing_id = $mat[1][$z]; $housing_name = iconv("GB2312", "UTF-8", $mat[2][$z]); $add = array( 'province_id' => $provinces_id , 'province_name' => $province_name , 'city_id' => $city_id , 'city_name' => $city_name , 'county_id' => $county_id , 'county_name' => $county_name , 'town_id' => $town_id, 'town_name' => $town_name , 'housing_id' => $housing_id , 'housing_name' => $housing_name , ); M('position') -> add($add); } } } } } } }
CREATE TABLE `gd_position` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, `province_id` varchar(20) DEFAULT NULL, `province_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '', `city_id` varchar(20) DEFAULT NULL, `city_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '', `county_id` varchar(20) DEFAULT NULL, `county_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '', `town_id` varchar(20) DEFAULT NULL, `town_name` varchar(255) CHARACTER SET utf8mb4 DEFAULT '', `housing_id` varchar(20) DEFAULT NULL, `housing_name` varchar(255) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM AUTO_INCREMENT=821 DEFAULT CHARSET=utf8;