需求介绍:
爬取指定地点的所有全国相关的列车班次详情。将结果写进mysql。
步骤及所遇到的问题:
1.寻取全国站点静态信息 https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002 一个静态的js文件
2.借助web工具,本人谷歌浏览器F12在12306相关页面找取所需要的接口
3.寻找规律,注意去重写进mysql
直接上代码,看注释:
object TrainSchedulesMain { def main(args: Array[String]): Unit = { // val dateStr = "2019-01-07" if (args == null || args.length < 2) { System.err.println("args is null or missing") System.exit(1) } val dateStrList = args(0).trim val station = args(1).trim assert(StringUtils.isNotBlank(dateStrList), "dateStrList is null or empty") assert(StringUtils.isNotBlank(station), "station is null or empty") // 打印参数 println(args.mkString(" ")) //获取全国所有的站点信息 val allStationsMap = analysisAllStations() /* * 1.深圳 * 2.深圳西 * 3.深圳东 * 4.深圳坪山 * 5.深圳北 * 6.福田 */ // val fromStations = collection.mutable.ListBuffer("深圳", "深圳西", "深圳东", "深圳坪山", "深圳北", "福田") var dateStr = ""; //出发站集合 val fromStations = collection.mutable.ListBuffer(station.trim) dateStrList.split(",").foreach(date => { dateStr = date fromStations.foreach(r => { var n = 0 //出发站 val fromStationRequest = allStationsMap.get(r).get allStationsMap.foreach(d => { val toStationRequest = d._2 //train code //根据出发站和到达站请求12306 excuteAnaly(fromStationRequest, toStationRequest) println(dateStr) println("进度:" + r) n += 1 println(n) }) }) }) def excuteAnaly(fromStationRequest: String, toStationRequest: String) = { val url1 = s"""https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=${dateStr}&leftTicketDTO.from_station=${fromStationRequest}&leftTicketDTO.to_station=${toStationRequest}&purpose_codes=ADULT""" .stripMargin println("url1: " + url1) //获取所查询的 出发站-》到达站 的所有班次列车信息 val responseStr = HttpRequest.sendGet(url1) println("url1Res: " + responseStr) if (StringUtils.isNotBlank(responseStr) && JSONUtil.isJson(responseStr)) { val allStationModel = JSONUtil.toJavaBean(responseStr, new AllStationTimes().getClass) //表示初始站为深圳 目的地为 toStationRequest 是有值 if (allStationModel != null && allStationModel.getHttpstatus == 200 && allStationModel.getData.getResult != null && allStationModel.getData.getResult.size() > 0) { val resList = allStationModel.getData.getResult for (item <- 0 until resList.size()) { val str = resList.get(item) val indexNumStart = str.indexOf("预订") if (indexNumStart > -1) { val arrs = str.substring(indexNumStart).split("\\|") val trainNo = arrs(1) val trainCode = arrs(2) val startStation = arrs(3) val endStation = arrs(4) val fromStation = arrs(5) val toStation = arrs(6) // 并且目的地站必须是终点站才会当做一条 班次 写入msyql[否则是过站,导致重复写入mysql.并且站点还不全] if (toStation.trim.equals(endStation.trim)) { val url2 =s"""https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no=${trainNo}&from_station_telecode=${fromStation}&to_station_telecode=${toStation}&depart_date=${dateStr}""" println("url2: " + url2) //查询上一步所有班次的详细各个站点顺序信息 val res = HttpRequest.sendGet(url2) println("url2Res: " + res) if (StringUtils.isNotBlank(res) && JSONUtil.isJson(res)) { val trainSchedulesModel = JSONUtil.toJavaBean(res, new TrainSchedules().getClass) if (trainSchedulesModel != null && trainSchedulesModel.getData.getData.size() > 0) { val data0 = trainSchedulesModel.getData.getData.get(0) if (!MysqlHandleUtil(MysqlConnect.trainDB).isHasValue( s"""select * | |from train_schedules |where train_code='${data0.getStation_train_code}' |and start_station_name='${data0.getStart_station_name}' |and end_station_name='${data0.getEnd_station_name}' |and start_time='${data0.getStart_time}' |and dates='$dateStr' |""" .stripMargin)) { val sql = s"""insert into |train_schedules(`train_code`,`start_station_name`,`end_station_name`,`start_time`,`arrive_time`,`dates`,`data`) |values('${data0.getStation_train_code}','${data0.getStart_station_name}','${data0.getEnd_station_name}','${data0.getStart_time}','${trainSchedulesModel.getData.getData.get(trainSchedulesModel.getData.getData.size() - 1).getArrive_time}','${dateStr}','${JSONUtil.toJsonString(trainSchedulesModel.getData.getData)}')""" .stripMargin MysqlHandleUtil(MysqlConnect.trainDB).insertData(sql) } } } } } } } } } } /** * 解析全国所有站点(中文名字,编码) * * @author XXXX * @date 17:08 * @param [] * @return scala.collection.mutable.HashMap<java.lang.String,java.lang.String> */ def analysisAllStations(): collection.mutable.HashMap[String, String] = { val hashMap = new mutable.HashMap[String, String]() val url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002" val response = HttpRequest.sendGet(url) assert(StringUtils.isNotBlank(response), "response is null or empty") val arrs = response.split("@") val len = arrs.length for (item <- 1 until len) { val ars = arrs(item).split("\\|") hashMap.put(ars(1), ars(2)) } hashMap } }
效果: