using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Net; using System.IO; using System.Text.RegularExpressions; namespace allen { class Program { /// <summary> /// 根据网址取得HTML代码 /// </summary> /// <param name="url"></param> /// <returns></returns> static string GetHtml(string url) { HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest; HttpWebResponse response = request.GetResponse() as HttpWebResponse; Stream stream = response.GetResponseStream(); StreamReader reader = new StreamReader(stream, Encoding.Default); string html = reader.ReadToEnd(); stream.Close(); return html; } static Regex reg; /// <summary> /// 过滤器,留下文章正文 /// </summary> /// <param name="htmlStr"></param> /// <returns></returns> static string MyFilter(string htmlStr) { reg = new Regex(@"\s+");//先把任意空白符做掉 htmlStr = reg.Replace(htmlStr, ""); reg = new Regex("点此下载封神演义.txt</font></font></a></div></td>.*</div></td></tr><tr><tdclass=");//匹配出正文 Match match = reg.Match(htmlStr); string result = match.Value; result = result.Replace("点此下载封神演义.txt</font></font></a></div></td>", ""); result = result.Replace("</div></td></tr><tr><tdclass=",""); result = result.Replace("</tr></table>", ""); result = result.Replace("本文章下载于www.Txt66.com", ""); result = result.Replace("<br>",Environment.NewLine); return result; } /// <summary> /// 循环读取每页的文章,写入记事本 /// </summary> static void WriteFile() { int page_num = 1; string url = "http://www.txt66.com/read2.asp?id=8480&PageNum={0}"; string url_temp = string.Empty; string html = string.Empty; string text = string.Empty; StreamWriter sw = new StreamWriter(@"F:\g.txt", true, Encoding.Unicode); while (page_num < 124) { url_temp = string.Format(url, page_num); html = GetHtml(url_temp); text = MyFilter(html); sw.Write(text); Console.WriteLine("写入第{0}页", page_num); System.Threading.Thread.Sleep(600); page_num++; } sw.Close(); } /// <summary> /// 主函数 /// </summary> /// <param name="args"></param> static void Main(string[] args) { WriteFile(); Console.ReadKey(); } } }