数据猎手:使用Java和Apache HttpComponents库下载Facebook图像

import org.apache.http.HttpHost; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class FacebookImageDownloader { // 亿牛云爬虫代理的配置信息 private static final String PROXY_HOST = "www.16yun.cn"; private static final int PROXY_PORT = 3128; private static final String PROXY_USER = "username"; private static final String PROXY_PASS = "password"; public static void main(String[] args) { // 配置代理 HttpHost proxy = new HttpHost(PROXY_HOST, PROXY_PORT); RequestConfig config = RequestConfig.custom() .setProxy(proxy) .build(); // 创建HttpClient实例 try (CloseableHttpClient httpClient = HttpClients.custom() .setDefaultRequestConfig(config) .build()) { // 创建线程池 ExecutorService executorService = Executors.newFixedThreadPool(5); // 待下载图像的URL数组 String[] imageUrls = { "http://www.example.com/image1.jpg", "http://www.example.com/image2.jpg", "http://www.example.com/image3.jpg" }; // 发送请求、处理响应、解析HTML、下载图像 for (String imageUrl : imageUrls) { executorService.execute(() -> { try { // 发送请求 HttpGet request = new HttpGet(imageUrl); CloseableHttpResponse response = httpClient.execute(request); // 处理响应 if (response.getStatusLine().getStatusCode() == 200) { // 解析HTML(如果需要的话) // 下载图像 byte[] imageData = EntityUtils.toByteArray(response.getEntity()); File destinationFile = new File("C:\\Downloads\\" + getImageName(imageUrl)); FileOutputStream fos = new FileOutputStream(destinationFile); fos.write(imageData); fos.close(); System.out.println("图像下载完成,保存到:" + destinationFile.getAbsolutePath()); } else { System.err.println("图像下载失败:" + response.getStatusLine()); } } catch (IOException e) { e.printStackTrace(); } }); } // 关闭线程池 executorService.shutdown(); } catch (Exception e) { e.printStackTrace(); } } // 从URL中获取图像文件名 private static String getImageName(String imageUrl) { int lastIndexOfSlash = imageUrl.lastIndexOf('/'); return imageUrl.substring(lastIndexOfSlash + 1); } }
上一篇:【Linux】进程exec函数族以及守护进程


下一篇:从离线到实时:无锡锡商银行基于 Apache Doris 的数据仓库演进实践