<?php //$page=file_get_contents("http://www.kmycjng.com/lsmdcx.aspx?sheng=4C26F8901DC98154&c=D39BF6B55B1AA80F"); //preg_match(); header("Content-type: text/html;charset=utf-8"); //连接数据库 $link = mysqli_connect("localhost", "root", "root"); mysqli_select_db("crawler"); mysqli_query('set names utf8'); function crawler($url) { $contents = file_get_contents($url); $content = iconv("gb2312", "utf-8", $contents); $data1 = preg_replace("'([\r\n])[\s]+'", "", $content); preg_match('/<h1>(.*?)<\/h1>/', $data1, $arr); $title = $arr[1]; //标题 $sql = "SELECT * FROM links WHERE url ='{$url}'"; $result = mysqli_query($sql); $row = mysqli_fetch_array($result); if ($row) { $sqlr = "UPDATE links SET status=1 , title='{$title}' WHERE id = {$row['id']}"; } else { $sqlr = "INSERT INTO links(url,title) VALUES ('$url','$title')"; } $result = mysqli_query($sqlr); //获取页面所有连接 $pattern = "/< a href=\"(.*?)\"/"; preg_match_all($pattern, $content, $matches); $links = $matches[1]; foreach ($links as $value) { $sql = "SELECT * FROM links WHERE url ='{$value}'"; $result = mysqli_query($sql); $row = mysqli_fetch_array($result); if (!$row) { $sqls = "INSERT INTO links(url) VALUES ('$value')"; $result = mysqli_query($sqls); } } //先查当前url,并写入数据 //再把当前页面找到的url写入表 } $url = "http://www.xingwei.edu.cn/"; while (true) { crawler($url); //获取数据表中没有爬取的连接 $sql = "SELECT * FROM links WHERE status = 0 order by id asc LIMIT 1"; $result = mysqli_query($sql); $row = mysqli_fetch_array($result); if (!$row) { break; } else { $url = $row['url']; } }