html解析图片url,并用继承Thread类的多线程下载
把对应网页的img元素,通过继承Thread类的多线程下载下来。
pom依赖
<dependencies>
<!--实现文件下载-->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<!--提取html-->
<dependency>
<groupId>org.htmlparser</groupId>
<artifactId>htmlparser</artifactId>
<version>2.1</version>
</dependency>
</dependencies>
Java代码
package testthread;
import org.apache.commons.io.FileUtils;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;
import java.io.File;
import java.io.IOException;
import java.net.URL;
//多线程同步下载图片
public class DownloadThread1 extends Thread {
private String url;//网图地址
private String fileName;//保存文件名
public DownloadThread1(String url, String fileName) {
this.url = url;
this.fileName = fileName;
}
//线程执行体:下载图片
@Override
public void run() {
Downloader downloader = new Downloader();
downloader.downloader(url, fileName);
System.out.println("下载文件名为" + fileName);
}
public static NodeList parser(String url) throws ParserException {
/**根据Url创建parser对象**/
Parser parser = new Parser(url);
/**设置编码,必须与Url编码一样 **/
parser.setEncoding("utf-8");
/** 构建一个Html页面对象 **/
HtmlPage htmlPage = new HtmlPage(parser);
parser.visitAllNodesWith(htmlPage);
/** 获取Body下面所有的节点,可以想象成类似树状结构 **/
NodeList list = htmlPage.getBody();
/** 建立一个Filter,用于过滤节点,此处获得形如“<img></img>”这样的节点 **/
NodeFilter filter = new TagNameFilter("IMG");
/** 得到过滤后的节点 **/
list = list.extractAllNodesThatMatch(filter, true);
for (int c = 0; c < list.size(); c++) {
ImageTag imageTag = (ImageTag) list.elementAt(c);
/** 输出图片的链接Url **/
System.out.println(imageTag.getImageURL());
}
return list;
}
public static void main(String[] args) throws ParserException {
NodeList list = DownloadThread1.parser("http://2t6y.mydown.com/yuanqidesktop/tianji.html?softid=585&tid1=256&tid2=1001&tod1=17111");
DownloadThread1 thread1 = new DownloadThread1(((ImageTag) list.elementAt(0)).getImageURL(), "1.png");
DownloadThread1 thread2 = new DownloadThread1(((ImageTag) list.elementAt(1)).getImageURL(), "2.png");
DownloadThread1 thread3 = new DownloadThread1(((ImageTag) list.elementAt(2)).getImageURL(), "3.png");
DownloadThread1 thread4 = new DownloadThread1(((ImageTag) list.elementAt(3)).getImageURL(), "4.png");
DownloadThread1 thread5 = new DownloadThread1(((ImageTag) list.elementAt(4)).getImageURL(), "5.png");
DownloadThread1 thread6 = new DownloadThread1(((ImageTag) list.elementAt(5)).getImageURL(), "6.png");
DownloadThread1 thread7 = new DownloadThread1(((ImageTag) list.elementAt(6)).getImageURL(), "7.png");
DownloadThread1 thread8 = new DownloadThread1(((ImageTag) list.elementAt(7)).getImageURL(), "8.png");
DownloadThread1 thread9 = new DownloadThread1(((ImageTag) list.elementAt(8)).getImageURL(), "9.png");
DownloadThread1 thread10 = new DownloadThread1(((ImageTag) list.elementAt(9)).getImageURL(), "10.png");
DownloadThread1 thread11 = new DownloadThread1(((ImageTag) list.elementAt(10)).getImageURL(), "11.png");
thread1.start();
thread2.start();
thread3.start();
thread4.start();
thread5.start();
thread6.start();
thread7.start();
thread8.start();
thread9.start();
thread10.start();
thread11.start();
/*
"C:\Program Files\Java\jdk1.8.0_271\bin\java.exe" "-javaagent:D:\Users\Kingsoft\IntelliJ IDEA 2021.1.3\lib\idea_rt.jar=59308:D:\Users\Kingsoft\IntelliJ IDEA 2021.1.3\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_271\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\rt.jar;D:\workspace\Tests\target\classes;D:\Users\maven01_202112011747\apache-maven-3.8.4\.m2\commons-io\commons-io\2.11.0\commons-io-2.11.0.jar;D:\Users\maven01_202112011747\apache-maven-3.8.4\.m2\org\htmlparser\htmlparser\2.1\htmlparser-2.1.jar;D:\Users\maven01_202112011747\apache-maven-3.8.4\.m2\org\htmlparser\htmllexer\2.1\htmllexer-2.1.jar" testthread.DownloadThread1
https://dh1.cmcmcdn.com/sem/4/2/b/e/b/42bebba6bfd923aefede0b56d170013d.png
https://img-baofun.zhhainiao.com/market/semvideo/b8c4da367a3c730ca2c2e1eacf422107_preview.jpg
https://img-baofun.zhhainiao.com/market/238/F567AC46D63B85D2AA7E62AD01CD3692_preview.jpg
https://img-baofun.zhhainiao.com/market/238/b9c0ae90409426a9965e7247f9953658_preview.jpg
https://img-baofun.zhhainiao.com/market/238/c4302c9a3921d2f88b1322dc6173915d_preview.jpg
https://img-baofun.zhhainiao.com/market/238/9520e3c1f3b87509042affff4f446fe7_preview.jpg
https://img-baofun.zhhainiao.com/market/238/357E3204B3996B03966EE8116D229DF9_preview.jpg
https://img-baofun.zhhainiao.com/market/39/b25a301ef486487eb7992f1e81bd48d6_preview.jpg
https://img-baofun.zhhainiao.com/market/238/b9c0ae90409426a9965e7247f9953658_preview.jpg
https://img-baofun.zhhainiao.com/market/238/c4302c9a3921d2f88b1322dc6173915d_preview.jpg
https://img-baofun.zhhainiao.com/market/238/9520e3c1f3b87509042affff4f446fe7_preview.jpg
https://img-baofun.zhhainiao.com/market/238/357E3204B3996B03966EE8116D229DF9_preview.jpg
https://img-baofun.zhhainiao.com/market/39/b25a301ef486487eb7992f1e81bd48d6_preview.jpg
https://img-baofun.zhhainiao.com/market/semvideo/b8c4da367a3c730ca2c2e1eacf422107_preview.jpg
https://img-baofun.zhhainiao.com/market/238/F567AC46D63B85D2AA7E62AD01CD3692_preview.jpg
https://img-baofun.zhhainiao.com/market/238/F567AC46D63B85D2AA7E62AD01CD3692_preview.jpg
https://img-baofun.zhhainiao.com/market/238/b9c0ae90409426a9965e7247f9953658_preview.jpg
https://img-baofun.zhhainiao.com/market/238/c4302c9a3921d2f88b1322dc6173915d_preview.jpg
https://img-baofun.zhhainiao.com/market/238/9520e3c1f3b87509042affff4f446fe7_preview.jpg
https://img-baofun.zhhainiao.com/market/238/357E3204B3996B03966EE8116D229DF9_preview.jpg
https://img-baofun.zhhainiao.com/market/39/b25a301ef486487eb7992f1e81bd48d6_preview.jpg
https://img-baofun.zhhainiao.com/market/semvideo/b8c4da367a3c730ca2c2e1eacf422107_preview.jpg
下载文件名为1.png
下载文件名为7.png
下载文件名为9.png
下载文件名为6.png
下载文件名为8.png
下载文件名为11.png
下载文件名为3.png
下载文件名为10.png
下载文件名为5.png
下载文件名为4.png
下载文件名为2.png
Process finished with exit code 0
*/
}
}
//下载器
class Downloader {
//下载方法
public void downloader(String url, String fileName) {
//拷贝url地址到文件
try {
FileUtils.copyURLToFile(new URL(url), new File(fileName));
} catch (IOException e) {
e.printStackTrace();
System.out.println("IO异常,Downloader方法异常");
}
}
}