必应的壁纸一个一个下有点麻烦,写个小爬虫批量下载,代码如下:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.*;
import java.net.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 必应壁纸下载
* javac -encoding UTF-8 -cp .;C:\Users\administered\Downloads\jsoup-1.12.1.jar WallpaperDownload.java
* java -cp .;C:\Users\administered\Downloads\jsoup-1.12.1.jar WallpaperDownload E:\\wallpaper
*/
public class WallpaperDownload {
private static final String BY_PREFIX = "https://bing.ioliu.cn";
public static void main(String[] args) {
// 运行时指定一个本地下载路径
String path = "";
for(int i=0; i<args.length; i++){
path = args[i];
}
if(path == null || path.length()==0){
path = "E:\\wallpaper";
}
File filePath = new File(path);
if(!filePath.exists()){
System.out.println("创建目录:" + filePath.getName());
filePath.mkdirs();
}
System.out.println("下载位置:" + filePath.getName());
download(path);
}
public static void download(String path) {
long start = System.currentTimeMillis();
String pageHtml = "https://bing.ioliu.cn/ranking";
for (int i = 1; i <= 105; i++) {
if (i > 1) {
pageHtml = pageHtml + "?p=" + i;
}
try {
String[] links = getAddress(pageHtml);
execute(links, path);
} catch (IOException e) {
e.printStackTrace();
}
pageHtml = "https://bing.ioliu.cn/ranking";
}
long end = System.currentTimeMillis();
long time = (end - start) / 1000;
System.out.println("下载耗时:" + time);
}
/**
* 下载图片
*
* @param links
* @param path 下载位置
* @throws IOException
*/
public static void execute(String[] links, String path) throws IOException {
if (!path.endsWith("\\")) {
path = path + "\\";
}
for (int i = 0; i < links.length; i++) {
HttpURLConnection urlConnection = getConnection(links[i]);
InputStream ins = urlConnection.getInputStream();
String imageName = links[i].substring(links[i].lastIndexOf("/") + 1).split("\\?")[0];
File file = new File(path + imageName + ".jpg");
OutputStream outputStream = null;
if (!file.exists()) {
outputStream = new FileOutputStream(file);
int readCount;
byte[] bytes = new byte[10240];
while ((readCount = ins.read(bytes)) != -1) {
outputStream.write(bytes, 0, readCount);
}
System.out.println("[" + imageName + "] download finished ...");
} else {
System.out.println(file.getName() + " existed ...");
}
}
}
/**
* 获取下载链接地址
*
* @return
* @throws IOException
*/
public static String[] getAddress(String htmlPage) throws IOException {
System.out.println("get [" + htmlPage + "] info ...");
HttpURLConnection connection = getConnection(htmlPage);
InputStream is = connection.getInputStream();
String newLine = System.getProperty("line.separator");
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
StringBuilder result = new StringBuilder();
String line;
String html;
while ((line = reader.readLine()) != null) {
result.append(line + newLine);
}
html = result.toString();
Document doc = Jsoup.parseBodyFragment(html);
html = doc.body().html();
String[] links = extractLinks(html);
return links;
}
/**
* 提取图片链接
*
* @param html
*/
static String[] extractLinks(String html) {
List<String> list = new ArrayList<>();
String pattern = "/photo/.*_.*\\?force=download";
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(html);
while (m.find()) {
list.add(m.group());
}
String[] results = new String[list.size()];
for (int i = 0; i < list.size(); i++) {
results[i] = BY_PREFIX + list.get(i);
}
return results;
}
/**
* 获取连接
*
* @param urlStr
* @return
*/
public static HttpURLConnection getConnection(String urlStr) {
HttpURLConnection urlConnection = null;
try {
URI uri = new URI(urlStr);
URL url = uri.toURL();
urlConnection = (HttpURLConnection) url.openConnection();
} catch (URISyntaxException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return urlConnection;
}
}
附:依赖HTML格式化jar(Jsoup)