html解析图片url,并用继承Thread类的多线程下载

html解析图片url,并用继承Thread类的多线程下载

html解析图片url,并用继承Thread类的多线程下载
把对应网页的img元素,通过继承Thread类的多线程下载下来。

pom依赖

<dependencies>
    <!--实现文件下载-->
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.11.0</version>
    </dependency>
    <!--提取html-->
    <dependency>
        <groupId>org.htmlparser</groupId>
        <artifactId>htmlparser</artifactId>
        <version>2.1</version>
    </dependency>
</dependencies>

Java代码

package testthread;

import org.apache.commons.io.FileUtils;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;

import java.io.File;
import java.io.IOException;
import java.net.URL;

//多线程同步下载图片
public class DownloadThread1 extends Thread {

    private String url;//网图地址
    private String fileName;//保存文件名

    public DownloadThread1(String url, String fileName) {
        this.url = url;
        this.fileName = fileName;
    }

    //线程执行体:下载图片
    @Override
    public void run() {
        Downloader downloader = new Downloader();
        downloader.downloader(url, fileName);
        System.out.println("下载文件名为" + fileName);
    }

    public static NodeList parser(String url) throws ParserException {

        /**根据Url创建parser对象**/
        Parser parser = new Parser(url);

        /**设置编码,必须与Url编码一样 **/
        parser.setEncoding("utf-8");

        /** 构建一个Html页面对象 **/
        HtmlPage htmlPage = new HtmlPage(parser);
        parser.visitAllNodesWith(htmlPage);

        /** 获取Body下面所有的节点,可以想象成类似树状结构 **/
        NodeList list = htmlPage.getBody();

        /** 建立一个Filter,用于过滤节点,此处获得形如“<img></img>”这样的节点 **/
        NodeFilter filter = new TagNameFilter("IMG");

        /** 得到过滤后的节点 **/
        list = list.extractAllNodesThatMatch(filter, true);

        for (int c = 0; c < list.size(); c++) {
            ImageTag imageTag = (ImageTag) list.elementAt(c);
            /** 输出图片的链接Url **/
            System.out.println(imageTag.getImageURL());
        }
        return list;
    }

    public static void main(String[] args) throws ParserException {
        NodeList list = DownloadThread1.parser("http://2t6y.mydown.com/yuanqidesktop/tianji.html?softid=585&tid1=256&tid2=1001&tod1=17111");
        DownloadThread1 thread1 = new DownloadThread1(((ImageTag) list.elementAt(0)).getImageURL(), "1.png");
        DownloadThread1 thread2 = new DownloadThread1(((ImageTag) list.elementAt(1)).getImageURL(), "2.png");
        DownloadThread1 thread3 = new DownloadThread1(((ImageTag) list.elementAt(2)).getImageURL(), "3.png");
        DownloadThread1 thread4 = new DownloadThread1(((ImageTag) list.elementAt(3)).getImageURL(), "4.png");
        DownloadThread1 thread5 = new DownloadThread1(((ImageTag) list.elementAt(4)).getImageURL(), "5.png");
        DownloadThread1 thread6 = new DownloadThread1(((ImageTag) list.elementAt(5)).getImageURL(), "6.png");
        DownloadThread1 thread7 = new DownloadThread1(((ImageTag) list.elementAt(6)).getImageURL(), "7.png");
        DownloadThread1 thread8 = new DownloadThread1(((ImageTag) list.elementAt(7)).getImageURL(), "8.png");
        DownloadThread1 thread9 = new DownloadThread1(((ImageTag) list.elementAt(8)).getImageURL(), "9.png");
        DownloadThread1 thread10 = new DownloadThread1(((ImageTag) list.elementAt(9)).getImageURL(), "10.png");
        DownloadThread1 thread11 = new DownloadThread1(((ImageTag) list.elementAt(10)).getImageURL(), "11.png");
        thread1.start();
        thread2.start();
        thread3.start();
        thread4.start();
        thread5.start();
        thread6.start();
        thread7.start();
        thread8.start();
        thread9.start();
        thread10.start();
        thread11.start();
        
        /*
        "C:\Program Files\Java\jdk1.8.0_271\bin\java.exe" "-javaagent:D:\Users\Kingsoft\IntelliJ IDEA 2021.1.3\lib\idea_rt.jar=59308:D:\Users\Kingsoft\IntelliJ IDEA 2021.1.3\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_271\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_271\jre\lib\rt.jar;D:\workspace\Tests\target\classes;D:\Users\maven01_202112011747\apache-maven-3.8.4\.m2\commons-io\commons-io\2.11.0\commons-io-2.11.0.jar;D:\Users\maven01_202112011747\apache-maven-3.8.4\.m2\org\htmlparser\htmlparser\2.1\htmlparser-2.1.jar;D:\Users\maven01_202112011747\apache-maven-3.8.4\.m2\org\htmlparser\htmllexer\2.1\htmllexer-2.1.jar" testthread.DownloadThread1
https://dh1.cmcmcdn.com/sem/4/2/b/e/b/42bebba6bfd923aefede0b56d170013d.png
https://img-baofun.zhhainiao.com/market/semvideo/b8c4da367a3c730ca2c2e1eacf422107_preview.jpg
https://img-baofun.zhhainiao.com/market/238/F567AC46D63B85D2AA7E62AD01CD3692_preview.jpg
https://img-baofun.zhhainiao.com/market/238/b9c0ae90409426a9965e7247f9953658_preview.jpg
https://img-baofun.zhhainiao.com/market/238/c4302c9a3921d2f88b1322dc6173915d_preview.jpg
https://img-baofun.zhhainiao.com/market/238/9520e3c1f3b87509042affff4f446fe7_preview.jpg
https://img-baofun.zhhainiao.com/market/238/357E3204B3996B03966EE8116D229DF9_preview.jpg
https://img-baofun.zhhainiao.com/market/39/b25a301ef486487eb7992f1e81bd48d6_preview.jpg
https://img-baofun.zhhainiao.com/market/238/b9c0ae90409426a9965e7247f9953658_preview.jpg
https://img-baofun.zhhainiao.com/market/238/c4302c9a3921d2f88b1322dc6173915d_preview.jpg
https://img-baofun.zhhainiao.com/market/238/9520e3c1f3b87509042affff4f446fe7_preview.jpg
https://img-baofun.zhhainiao.com/market/238/357E3204B3996B03966EE8116D229DF9_preview.jpg
https://img-baofun.zhhainiao.com/market/39/b25a301ef486487eb7992f1e81bd48d6_preview.jpg
https://img-baofun.zhhainiao.com/market/semvideo/b8c4da367a3c730ca2c2e1eacf422107_preview.jpg
https://img-baofun.zhhainiao.com/market/238/F567AC46D63B85D2AA7E62AD01CD3692_preview.jpg
https://img-baofun.zhhainiao.com/market/238/F567AC46D63B85D2AA7E62AD01CD3692_preview.jpg
https://img-baofun.zhhainiao.com/market/238/b9c0ae90409426a9965e7247f9953658_preview.jpg
https://img-baofun.zhhainiao.com/market/238/c4302c9a3921d2f88b1322dc6173915d_preview.jpg
https://img-baofun.zhhainiao.com/market/238/9520e3c1f3b87509042affff4f446fe7_preview.jpg
https://img-baofun.zhhainiao.com/market/238/357E3204B3996B03966EE8116D229DF9_preview.jpg
https://img-baofun.zhhainiao.com/market/39/b25a301ef486487eb7992f1e81bd48d6_preview.jpg
https://img-baofun.zhhainiao.com/market/semvideo/b8c4da367a3c730ca2c2e1eacf422107_preview.jpg









下载文件名为1.png
下载文件名为7.png
下载文件名为9.png
下载文件名为6.png
下载文件名为8.png
下载文件名为11.png
下载文件名为3.png
下载文件名为10.png
下载文件名为5.png
下载文件名为4.png
下载文件名为2.png

Process finished with exit code 0

         */
    }
}

//下载器
class Downloader {
    //下载方法
    public void downloader(String url, String fileName) {
        //拷贝url地址到文件
        try {
            FileUtils.copyURLToFile(new URL(url), new File(fileName));
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("IO异常,Downloader方法异常");
        }
    }
}
上一篇:【坑2】maven通过nexus私服服务器下载jar包提示无权限Authentication failed for http://localhost:8081/repository/maven-pub


下一篇:Maven插件maven-install-plugin的详解