<<< Java提取网页源码

package com.sevennight;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;

public class zidongwenzi {

/**
  * @param args
  * @param
  * @throws IOException
  */
public static void main(String[] args) throws IOException {
  GetWebContent con=new GetWebContent();
  BufferedWriter bw=new BufferedWriter(new FileWriter("F:/WebContent.txt"));   //此处保存在本地的地址
        bw.write(con.getWebCon("http://www.baidu.com"));   //此处填写网址...
        bw.flush();
        bw.close();
}
}
class GetWebContent {
         public String getWebCon(String domain){

           StringBuffer sb  =  new StringBuffer();
            try{
                java.net.URL url = new java.net.URL(domain);
                BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(),"GBK"));   //此处gbk是网页编码
                String line;
                while((line=in.readLine())!=null){
                    sb.append(line+'\n');
                    System.out.println(line);
                }
               in.close();
            }catch(Exception e){//   Report   any   errors   that   arise
                      sb.append(e.toString());
                  System.err.println(e);
                  System.err.println("Usage:   java   HttpClient   <URL>   [<filename>]");
            }
            return sb.toString();
    }
}

  

上一篇:Pandas之DataFrame——Part 1


下一篇:将String类型的json字符串转换成java对象