HttpWebRequest抓数据遇到的问题

1、有些网站访问速度慢,而且这个网站的连接数(比如全球内衣,另外对于女生各种什么内衣不懂的也可以上去查看了解哈),因为没有即时的关闭,造成抓取页面数据的时候超时也严重。

解决:把相应的HttpWebResponse.Close(),   HttpWebRequest.Abort();  以及HttpWebRequest.KeepAlive=false,还有吧超时时间设置长一点, 之后连接超时的几率就贬低了。还有直接c盘的host文件的域名直接指向某个IP,减少去dns服务器查找的时间

2、抓中国供应商的时候开了多线程跑的太快,几十条就出现拉动类的验证码。

解决:使用代理或者移动的宽带,去拨号

RASDisplay ras = new RASDisplay();
ras.Disconnect();//断开连接
ras.Connect("ADSL");//重新拨号

//因为拨号不会马上连接成功,需要时间
Thread.Sleep(5000);

下面是封装请求的类库

public static string getRequest(string url, string charset = "utf-8")
{
HttpWebRequest myreq = null;
HttpWebResponse myres = null;
StreamReader reader = null;
Stream stream = null;
string result = "";
string code = charset; //charset.ToLower()
//code = "utf-8";
try
{
myreq = (HttpWebRequest)WebRequest.Create(url);
myreq.Timeout = 20000;
myreq.Method = "GET";
myreq.KeepAlive = false;
myreq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
//myreq.UserAgent = "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)";
myreq.Headers.Add("content", "text/html; charset=" + code);
//myreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
myreq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";

myreq.KeepAlive = true;
myres = (HttpWebResponse)myreq.GetResponse();
stream = myres.GetResponseStream();
reader = new StreamReader(stream, System.Text.Encoding.GetEncoding(code));
result = reader.ReadToEnd();

reader.Close();
reader.Dispose();

stream.Close();
stream.Dispose();
}
catch
{ }
finally
{
if (myreq != null)
{
myres.Close();
}
if (myreq != null)
{
myreq.Abort();
}

}

return result;
}

#region 自动拨号
/* 自动拨号
* 1、右击“网上邻居”--属性;
2、选择“宽带连接”,右击“属性”- >“选项”;
3、把“提示名称、密码和证书等”前面的对号去掉,点“确定”退出;

4、生成模式务必改成x86
*/
public struct RASCONN
{
public int dwSize;
public IntPtr hrasconn;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 257)]
public string szEntryName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 17)]
public string szDeviceType;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 129)]
public string szDeviceName;
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct RasStats
{
public int dwSize;
public int dwBytesXmited;
public int dwBytesRcved;
public int dwFramesXmited;
public int dwFramesRcved;
public int dwCrcErr;
public int dwTimeoutErr;
public int dwAlignmentErr;
public int dwHardwareOverrunErr;
public int dwFramingErr;
public int dwBufferOverrunErr;
public int dwCompressionRatioIn;
public int dwCompressionRatioOut;
public int dwBps;
public int dwConnectionDuration;
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct RasEntryName
{
public int dwSize;
//[MarshalAs(UnmanagedType.ByValTStr,SizeConst=(int)RasFieldSizeConstants.RAS_MaxEntryName + 1)]
public string szEntryName;
//#if WINVER5
// public int dwFlags;
// [MarshalAs(UnmanagedType.ByValTStr,SizeConst=260+1)]
// public string szPhonebookPath;
//#endif
}
public class RAS
{
[DllImport("Rasapi32.dll", EntryPoint = "RasEnumConnectionsA",
SetLastError = true)]

internal static extern int RasEnumConnections
(
ref RASCONN lprasconn, // buffer to receive connections data
ref int lpcb, // size in bytes of buffer
ref int lpcConnections // number of connections written to buffer
);

[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
internal static extern uint RasGetConnectionStatistics(
IntPtr hRasConn, // handle to the connection
[In, Out]RasStats lpStatistics // buffer to receive statistics
);
[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
public extern static uint RasHangUp(
IntPtr hrasconn // handle to the RAS connection to hang up
);

[DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
public extern static uint RasEnumEntries(
string reserved, // reserved, must be NULL
string lpszPhonebook, // pointer to full path and
// file name of phone-book file
[In, Out]RasEntryName[] lprasentryname, // buffer to receive
// phone-book entries
ref int lpcb, // size in bytes of buffer
out int lpcEntries // number of entries written
// to buffer
);

[DllImport("wininet.dll", CharSet = CharSet.Auto)]
public extern static int InternetDial(
IntPtr hwnd,
[In]string lpszConnectoid,
uint dwFlags,
ref int lpdwConnection,
uint dwReserved
);

public RAS()
{
}
}
public enum DEL_CACHE_TYPE //要删除的类型。
{
File,//表示internet临时文件
Cookie //表示Cookie
}

public class RASDisplay
{
[DllImport("wininet.dll", CharSet = CharSet.Auto)]
public static extern bool DeleteUrlCacheEntry(
DEL_CACHE_TYPE type
);
private string m_duration;
private string m_ConnectionName;
private string[] m_ConnectionNames;
private double m_TX;
private double m_RX;
private bool m_connected;
private IntPtr m_ConnectedRasHandle;

RasStats status = new RasStats();
public RASDisplay()
{
m_connected = true;

RAS lpras = new RAS();
RASCONN lprasConn = new RASCONN();

lprasConn.dwSize = Marshal.SizeOf(typeof(RASCONN));
lprasConn.hrasconn = IntPtr.Zero;

int lpcb = 0;
int lpcConnections = 0;
int nRet = 0;
lpcb = Marshal.SizeOf(typeof(RASCONN));

nRet = RAS.RasEnumConnections(ref lprasConn, ref lpcb, ref
lpcConnections);

if (nRet != 0)
{
m_connected = false;
return;

}

if (lpcConnections > 0)
{
//for (int i = 0; i < lpcConnections; i++)

//{
RasStats stats = new RasStats();

m_ConnectedRasHandle = lprasConn.hrasconn;
RAS.RasGetConnectionStatistics(lprasConn.hrasconn, stats);

m_ConnectionName = lprasConn.szEntryName;

int Hours = 0;
int Minutes = 0;
int Seconds = 0;

Hours = ((stats.dwConnectionDuration / 1000) / 3600);
Minutes = ((stats.dwConnectionDuration / 1000) / 60) - (Hours * 60);
Seconds = ((stats.dwConnectionDuration / 1000)) - (Minutes * 60) - (Hours * 3600);

m_duration = Hours + " hours " + Minutes + " minutes " + Seconds + " secs";
m_TX = stats.dwBytesXmited;
m_RX = stats.dwBytesRcved;
//}
}
else
{
m_connected = false;
}

int lpNames = 1;
int entryNameSize = 0;
int lpSize = 0;
RasEntryName[] names = null;

entryNameSize = Marshal.SizeOf(typeof(RasEntryName));
lpSize = lpNames * entryNameSize;

names = new RasEntryName[lpNames];
names[0].dwSize = entryNameSize;

uint retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

//if we have more than one connection, we need to do it again
if (lpNames > 1)
{
names = new RasEntryName[lpNames];
for (int i = 0; i < names.Length; i++)
{
names[i].dwSize = entryNameSize;
}

retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

}
m_ConnectionNames = new string[names.Length];

if (lpNames > 0)
{
for (int i = 0; i < names.Length; i++)
{
m_ConnectionNames[i] = names[i].szEntryName;
}
}
}

public string Duration
{
get
{
return m_connected ? m_duration : "";
}
}

public string[] Connections
{
get
{
return m_ConnectionNames;
}
}

public double BytesTransmitted
{
get
{
return m_connected ? m_TX : 0;
}
}
public double BytesReceived
{
get
{
return m_connected ? m_RX : 0;

}
}
public string ConnectionName
{
get
{
return m_connected ? m_ConnectionName : "";
}
}
public bool IsConnected
{
get
{
return m_connected;
}
}

public int Connect(string Connection)
{
int temp = 0;
uint INTERNET_AUTO_DIAL_UNATTENDED = 2;
int retVal = RAS.InternetDial(IntPtr.Zero, Connection, INTERNET_AUTO_DIAL_UNATTENDED, ref temp, 0);
return retVal;
}
public void Disconnect()
{
RAS.RasHangUp(m_ConnectedRasHandle);
}
}

endregion 自动拨号

上一篇:ASP.NET MVC中的ActionFilter介绍学习


下一篇:Chapter 18_0 数学库