用 ScrapySharp 并行下载天涯图片
#r "HtmlAgilityPack.dll"
#r "ScrapySharp.dll"
open System
open System.Threading.Tasks
open HtmlAgilityPack
open ScrapySharp.Extensions
let url = "http://bbs.tianya.cn/post-12-563201-1.shtml"
let web = new ScrapySharp.Network.ScrapingBrowser()
let html = web.DownloadString(new Uri(url))
let doc = new HtmlAgilityPack.HtmlDocument()
doc.LoadHtml( html )
let urls =
doc.DocumentNode.CssSelect("div.bbs-content > img")
|> Seq.map(fun i -> i.GetAttributeValue("original"))
let urls = [ "http://img3.laibafile.cn/p/m/166829011.jpg";
"http://img3.laibafile.cn/p/m/166829027.jpg";
"http://img3.laibafile.cn/p/m/166829000.jpg";
"http://img3.laibafile.cn/p/m/166829039.jpg";
"http://img3.laibafile.cn/p/m/166829034.jpg";
"http://img3.laibafile.cn/p/m/166829030.jpg";
"http://img3.laibafile.cn/p/m/166829016.jpg";
"http://img3.laibafile.cn/p/m/166829024.jpg" ]
let GetPicture (filePath: string) (url: string) =
let path = filePath.Substring(0, filePath.LastIndexOf("."))
let ty =
let t = new Uri( url )
match t.Authority with
| var when var.Contains("laibafile.cn") -> "http://bbs.tianya.cn"
| var when var.Contains("tianya.cn") -> "http://bbs.tianya.cn"
| _ -> t.Scheme + "://" + t.Authority
let web = new ScrapySharp.Network.ScrapingBrowser()
web.NavigateToPage( new Uri(ty)) |> ignore
if not( IO.Directory.Exists( path ) ) then IO.Directory.CreateDirectory( path ) |> ignore
let file = url.Substring( url.LastIndexOf("/") )
let pic = (web.NavigateToPage( new Uri( url ))).RawResponse.Body
printfn "%s" url
File.WriteAllBytes( ( path + file ), pic )
let outPic = GetPicture filePath
Parallel.ForEach(urls, outPic) |> ignore
相关文章
- 12-16用cn.hutool工具包进行图片上传下载示例
- 12-16爬虫,用协程下载图片中TypeError: a bytes-like object is required, not 'coroutine'
- 12-16用 ScrapySharp 并行下载天涯图片
- 12-16爬虫3 requests基础之下载图片用content(二进制内容)
- 12-16Python爬虫项目实战:看我用爬虫批量下载网站图片
- 12-16用 Python 批量下载百度图片
- 12-16用js脚本一键下载网页所有图片
- 12-16QT 5.8 用QNetwork 上传图片到php服务器,并通过url下载图片
- 12-16【python】10分钟教你用python下载和拼接微信好友头像图片
- 12-16一文简述用php实现爬虫(下载英雄联盟所有英雄图片)