一、IDHTTP的基本用法 IDHttp和WebBrowser一样,都可以实现抓取远端网页的功能,但是http方式更快、更节约资源,缺点是需要手动维护cook,连接等 IDHttp的创建,需要引入IDHttp procedure InitHttp();
begin
http := TIdHTTP.Create(nil);
http.ReadTimeout := ;
http.OnRedirect := OnRedirect;
http.Request.Accept := 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, */*';
http.Request.AcceptLanguage := 'zh-cn';
http.Request.ContentType := 'application/x-www-form-urlencoded';
http.Request.UserAgent := 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)'; http.ProxyParams.ProxyServer := '代理服务器地址';
http.ProxyParams.ProxyPort := '代理服务器端口';
end; 二、如何取得服务端返回的cookie信息,并添加到http的request对象中 procedure Setcookie;
var
i: Integer;
tmp, cookie: String;
begin
cookie := '';
for i := to http.Response.RawHeaders.Count - do
begin
tmp := http.Response.RawHeaders[i];
if pos('set-cookie: ', LowerCase(tmp)) = then Continue;
tmp := Trim(Copy(tmp, Pos('Set-cookie: ', tmp) + Length('Set-cookie: '), Length(tmp)));
tmp := Trim(Copy(tmp, , Pos(';', tmp) - ));
if cookie = '' then cookie := tmp else cookie := cookie + '; ' + tmp;
end;
if cookie <> '' then
begin
for i := to http.Request.RawHeaders.Count - do
begin
tmp := http.Request.RawHeaders[i];
if Pos('cookie', LowerCase(tmp)) = then Continue;
http.Request.RawHeaders.Delete(i);
Break;
end;
http.Request.RawHeaders.Add('cookie: ' + cookie);
end;
end; 三、如何取得网页中的所有连接,对代码做修改你也可以实现查找所有图片等等 function GetURLList(Data: String): TStringList;
var
i: Integer;
List: TStringList;
tmp: String; function Split(Data, Node: String): TStringList;
var
Count, i, j: Integer; function GetFieldCount(Data, Node: String): Integer;
var
i: Integer;
begin
Result := -;
i := Pos(Node, Data);
if i = then Exit;
Result := ;
while i <> do
begin
Inc(Result);
Delete(Data, , i + Length(Node) - );
i := Pos(Node, Data);
end;
end;
begin
Result := TStringList.Create;
Count := GetFieldCount(Data, Node);
for i := to Count - do
begin
j := Pos(Node, Data);
Result.Add(Copy(Data, , j - ));
Delete(Data, , j + Length(Node) - );
end;
Result.Add(Data);
end;
begin
Result := TStringList.Create;
try
List := split(Data, 'href=');
for i := to List.Count - do
begin
tmp := List[i];
tmp := Copy(tmp, , Pos('</a>', tmp) - );
tmp := Copy(tmp, , Pos('>', tmp) - );
if Pos(' ', tmp) <> then tmp := Copy(tmp, , Pos(' ', tmp) - );
tmp := Q_ReplaceStr(tmp, Char(), '');
tmp := Q_ReplaceStr(tmp, Char(), '');
if not Compare(CI.Key, tmp) then Continue;
if Copy(tmp, , ) <> 'http://' then
begin
if Copy(tmp, , ) = '.' then tmp := StringReplace(tmp, '.', '', []);
if Copy(tmp, , ) = '.' then tmp := StringReplace(tmp, '.', '', []);
try
tmp := 'http://' + http.URL.Host + ':' + http.URL.Port + http.URL.Path + tmp;
except
end;
end;
if Result.IndexOf(tmp) <> - then Continue;
Result.Add(tmp);
end;
FreeAndNil(List);
except end;
end; 四、如何模拟http的get方法打开一个网页 function GetMethod(http: TIDhttp; URL: String; Max: Integer): String;
var
RespData: TStringStream;
begin
RespData := TStringStream.Create('');
try
try
Http.Get(URL, RespData);
Http.Request.Referer := URL;
Result := RespData.DataString;
except
Dec(Max);
if Max = then
begin
Result := '';
Exit;
end;
Result := GetMethod(http, URL, Max);
end;
finally
FreeAndNil(RespData);
end;
end; 五、如何模拟http的post方法提交一个网页 function PostMethod(URL, Data: String; max: Integer): String;
var
PostData, RespData: TStringStream;
begin
RespData := TStringStream.Create('');
PostData := TStringStream.Create(Data);
try
try
if http = nil then Exit;
Http.Post(URL, PostData, RespData);
Result := RespData.DataString;
http.Request.Referer := URL;
except
Dec(Max);
if Max = then
begin
Result := '';
Exit;
end;
Result := PostMethod(URL, Data, Max);
end;
finally
http.Disconnect;
FreeAndNil(RespData);
FreeAndNil(PostData);
end;
end; 六、伪造session var
My_Cookie,tmpcookie:string; begin
aIdHttp.Get('http://www.huochepiao.net/');
tmpcookie:=aIdHttp.Request.CustomHeaders.Values['Set-Cookie'];
if Pos(';',tmpcookie)> then
My_Cookie:=LeftBStr(tmpcookie,Pos(';',tmpcookie)-)
else
My_Cookie:= tmpcookie;
//
aIdHTTP.Request.CustomHeaders.Clear;
aIdHTTP.Request.CustomHeaders.Add('Cookie:'+My_COOKIE); end;
http://blog.csdn.net/yanjiaye520/article/details/8199016