shell抓取

#!/bin/sh

dir=`dirname $`
configDir="$dir/config" ipport="$configDir/ip_port" url="http://www.youdaili.cn/Daili/http/"
indexs=$(curl -s --max-time "$url" |piconv -f utf8 -t gbk|awk '$0~/http:\/\/www.youdaili.cn\/static\/images\/hot.gif/{print substr($2,41,length($2)-46)}') pages="$(curl -s --max-time 200 "${url}${indexs}.html"|piconv -f utf8 -t gbk|awk '$0~/共.*页/{page=gensub(/.*共([^页]+).*/,"\\","",$0);print page}')" for((page=;page<=$pages;page++))
do
if [[ $page -eq ]]
then
curl -s --max-time "${url}${indexs}.html"|piconv -f utf8 -t gbk|awk '$0~/.*@HTTP#.*<br \/>/{gsub(".*<p>","",$0);gsub(".*<span>","",$0);gsub("@HTTP#.*","",$0);print}'
else
link="${url}${indexs}_$page.html"
curl -s --max-time "$link"|piconv -f utf8 -t gbk|awk '$0~/.*@HTTP#.*<br \/>/{gsub(".*<p>","",$0);gsub(".*<span>","",$0);gsub("@HTTP#.*","",$0);print}'
fi
done | sort -u >$ipport
上一篇:剑指offer中二进制中1的个数


下一篇:VCS仿真生成fsdb文件(Verilog)