程序逻辑:
从文件config.txt读取url链接,根据url打开输入流,将接收到的网页内容保存到文件。
package com.changying.spider; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URL; public class Spider { public static void main(String[] args) throws IOException { System.out.println(args[0]); System.out.println(args[1]); File config = new File(args[0]); //System.out.println("Hello World!"); BufferedReader fileReader = new BufferedReader(new FileReader(config)); String strUrl = fileReader.readLine(); System.out.println(strUrl); URL url = new URL(strUrl); InputStream in = url.openStream(); File resultFile = new File(args[1] + "\\url001.html"); if (!resultFile.exists()) { resultFile.createNewFile(); } //OutputStream result = new FileOutputStream(args[1] + "\\url001.html"); OutputStream result = new FileOutputStream(resultFile); byte[] buffer = new byte[4096]; int bytes_read; while ((bytes_read = in.read(buffer)) != -1) { result.write(buffer, 0, bytes_read); } fileReader.close(); in.close(); result.close(); } }