掌握 C# 爬虫技术:使用 HttpClient 获取今日头条内容

using System; using System.Net.Http; using System.Threading.Tasks; using System.Text.RegularExpressions; using System.Collections.Generic; namespace CSharpWebScraper { class Program { static async Task Main(string[] args) { // 亿牛云爬虫代理服务器信息 var proxy = new HttpClientHandler { Proxy = new WebProxy("代理域名", 端口号) { Credentials = new NetworkCredential("用户名", "密码") } }; // 创建 HttpClient 实例 var client = new HttpClient(proxy); // 设置请求头,模拟浏览器访问 client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0..."); // 目标网页 URL string url = "https://www.toutiao.com/"; try { // 发送 GET 请求 var response = await client.GetAsync(url); // 检查响应状态 if (response.IsSuccessStatusCode) { // 读取网页内容 string content = await response.Content.ReadAsStringAsync(); // 正则表达式匹配热点新闻标题 var regex = new Regex("<a class=\"link title\".*?>(.*?)</a>", RegexOptions.IgnoreCase); var matches = regex.Matches(content); // 存储热点新闻标题的列表 var hotTopics = new List<string>(); // 将匹配结果添加到列表中 foreach (Match match in matches) { hotTopics.Add(match.Groups[1].Value); } // 统计热点新闻数量 int hotTopicsCount = hotTopics.Count; // 输出热点新闻标题和数量 Console.WriteLine("热点新闻标题:"); foreach (var topic in hotTopics) { Console.WriteLine(topic); } Console.WriteLine($"共发现 {hotTopicsCount} 条热点新闻。"); } else { // 处理请求失败的情况 Console.WriteLine("请求失败: " + response.StatusCode); } } catch (HttpRequestException e) { // 处理请求异常 Console.WriteLine("请求异常: " + e.Message); } } } }
上一篇:StringBuilder和StringJoiner来实现字符串拼接


下一篇:Python 启动Appium Service然后执行APP自动化case