本文介绍Java编程技巧之小爬虫程序的编程方法。
马萨玛索(http://www.masamaso.com/index.shtml)每天10点都会推出一折商品5件,就是秒购。男装质量还不错,所以就经常去抢,感觉手动太慢了,就写了一个小爬虫程序,让自己去爬,如果是金子页面(免费商品)就会自动打开,我就可以抢到了。和大家分享一下。
思路:
1. 把所有想要的商品的链接读到程序中。
2. 分别打开每一个链接读取源代码
3. 验证是否是金子商品(源代码中含有free_msg字符串)
4. 如果是金子就把该链接用IE打开
源代码:
读链接文件:
package com.gogler.net;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;
publicclassFileReader {
private String fileName;
public FileReader(){}
public FileReader(StringfileName){
this.fileName = fileName;
}
public List<String>getLines(){
BufferedReaderreader = null;
try{
reader= newBufferedReader(newInputStreamReader(new FileInputStream(this.fileName)));
}catch(FileNotFoundExceptione){
e.printStackTrace();
}
List<String>lines = new LinkedList();
Stringline = null;
try{
while((line =reader.readLine())!=null){
lines.add(line);
}
}catch(IOException e){
e.printStackTrace();
}
return lines;
}
}
Url类:
package com.gogler.net;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
publicclassUrl {
Stringurl;
public Url(){}
public Url(String url){
this.url = url;
}
public String getUrl() {
returnurl;
}
publicvoid setUrl(String url) {
this.url = url;
}
public URLConnectiongetConnection(){
URLhttpURL = null;
try{
httpURL= newURL(this.url);
}catch(MalformedURLExceptione){
e.printStackTrace();
}
URLConnectionconn =null;
if(httpURL != null){
try{
conn= httpURL.openConnection();
}catch(IOException e){
e.printStackTrace();
}
}
return conn;
}
public BufferedReadergetBuffer(){
URLConnectionconn = this.getConnection();
BufferedReaderbr = null;
if(conn ==null){
returnnull;
}
conn.setConnectTimeout(1000*10);
try{
conn.connect();
br= newBufferedReader(newInputStreamReader(conn.getInputStream()));
}catch(IOException e){
e.printStackTrace();
returnnull;
}
return br;
}
publicboolean isExit(String str){
BufferedReaderbis = getBuffer();
boolean exit = false;
Stringline = null;
try{
while((line =bis.readLine())!=null){
exit =line.contains(str);
if(exit){
break;
}
}
}catch(IOException e){
e.printStackTrace();
}finally{
try{
bis.close();
}catch(IOException e){
e.printStackTrace();
}
}
return exit;
}
}
Digger类:
package com.gogler.net;
import java.io.IOException;
import java.util.List;
publicclassDigger extends Thread{
private Url url;
public Digger(){
super();
}
public Digger(Url url){
this.url = url;
}
/**
* @param args
*/
publicstaticvoid main(String[] args) {
FileReaderreader = newFileReader("D:/allan/craber.txt");
List<String>urls = reader.getLines();
for(String s :urls){
Urlurl = newUrl(s);
Digger digger = new Digger(url);
digger.start();
}
}
@Override
publicvoid run(){
if(url.isExit("配置文件")){
try{
Runtime.getRuntime().exec("C:/Program Files/Internet Explorer/iexplore.exe" + url.getUrl());
}catch(IOException e){
e.printStackTrace();
}
}
}
}