c#采集网页用得几个函数

c#采集网页用得几个函数c#采集网页用得几个函数代码
public  string GetHtmlSource(string Url, string charset)
        {
            
if (charset == "" || charset == null) charset = "gb2312";
            
string text1 = "";
            
try
            {
                HttpWebRequest request1 
= (HttpWebRequest)WebRequest.Create(Url);
                HttpWebResponse response1 
= (HttpWebResponse)request1.GetResponse();
                Stream stream1 
= response1.GetResponseStream();
                StreamReader reader1 
= new StreamReader(stream1, Encoding.GetEncoding(charset));
                text1 
= reader1.ReadToEnd();
                stream1.Close();
                response1.Close();
            }
            
catch (Exception exception1)
            {
            }
            
return text1;
        }

 
public string SniffwebCode(string code, string wordsBegin, string wordsEnd)
        {
            
string NewsTitle = "";
            Regex regex1 
= new Regex("" + wordsBegin + @"(?<title>[\s\S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            
for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())
            {
                NewsTitle 
= match1.Groups["title"].ToString();
            }
            
return NewsTitle;

        }

public ArrayList SniffwebCodeReturnList(string code, string wordsBegin, string wordsEnd)
        {
            ArrayList urlList 
= new ArrayList();
            
//string NewsTitle = "";
            Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[\s\S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            
for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())
            {
                urlList.Add(match1.Groups[
"title"].ToString());
            }
            
return urlList;

        }

 

上一篇:linux memcached状态查询


下一篇:教你设置office word/excel 2007/2010默认保存格式是2003兼容格式