掌握 C# 爬虫技术:使用 HttpClient 获取今日头条内容
using System;
using System.Net.Http;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
using System.Collections.Generic;
namespace CSharpWebScraper
{
class Program
{
static async Task Main(string[] args)
{
// 亿牛云爬虫代理服务器信息
var proxy = new HttpClientHandler
{
Proxy = new WebProxy("代理域名", 端口号)
{
Credentials = new NetworkCredential("用户名", "密码")
}
};
// 创建 HttpClient 实例
var client = new HttpClient(proxy);
// 设置请求头,模拟浏览器访问
client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0...");
// 目标网页 URL
string url = "https://www.toutiao.com/";
try
{
// 发送 GET 请求
var response = await client.GetAsync(url);
// 检查响应状态
if (response.IsSuccessStatusCode)
{
// 读取网页内容
string content = await response.Content.ReadAsStringAsync();
// 正则表达式匹配热点新闻标题
var regex = new Regex("<a class=\"link title\".*?>(.*?)</a>", RegexOptions.IgnoreCase);
var matches = regex.Matches(content);
// 存储热点新闻标题的列表
var hotTopics = new List<string>();
// 将匹配结果添加到列表中
foreach (Match match in matches)
{
hotTopics.Add(match.Groups[1].Value);
}
// 统计热点新闻数量
int hotTopicsCount = hotTopics.Count;
// 输出热点新闻标题和数量
Console.WriteLine("热点新闻标题:");
foreach (var topic in hotTopics)
{
Console.WriteLine(topic);
}
Console.WriteLine($"共发现 {hotTopicsCount} 条热点新闻。");
}
else
{
// 处理请求失败的情况
Console.WriteLine("请求失败: " + response.StatusCode);
}
}
catch (HttpRequestException e)
{
// 处理请求异常
Console.WriteLine("请求异常: " + e.Message);
}
}
}
}