C# 使用url读取 网页的标题内容

一,直接加载数据的网页

 public String getTitle(String url)
        {
            //请求资源  
            System.Net.WebRequest wb = System.Net.WebRequest.Create(url.Trim());

            //响应请求  
            WebResponse webRes = null;

            //将返回的数据放入流中  
            Stream webStream = null;
            try
            {
                webRes = wb.GetResponse();
                webStream = webRes.GetResponseStream();
            }
            catch (Exception e)
            {
                return "输入的网址不存在或非法...";
            }


            //从流中读出数据  (这里如果乱码改变编码即可)
            StreamReader sr = new StreamReader(webStream, System.Text.Encoding.UTF8);

            //创建可变字符对象,用于保存网页数据   
            StringBuilder sb = new StringBuilder();

            //读出数据存入可变字符中  
            String str = "";
            while ((str = sr.ReadLine()) != null)
            {
                sb.Append(str);
            }

            //建立获取网页标题正则表达式  
            String regex = @"<title>.+</title>";

            //返回网页标题  
            String title = Regex.Match(sb.ToString(), regex).ToString();
            title = Regex.Replace(title, @"[\""]+", "");
            return title;
        }

 

 

二,js动态加载数据的 网页

public static string GetTitel(string url, string post_parament)
        {
            string html;//网页源代码

            HttpWebRequest Web_Request = (HttpWebRequest)WebRequest.Create(url);
            Web_Request.Timeout = 30000;
            Web_Request.Method = "GET";
            Web_Request.UserAgent = "Mozilla/4.0";
            Web_Request.Headers.Add("Accept-Encoding", "gzip, deflate");
            //Web_Request.Credentials = CredentialCache.DefaultCredentials;

            //设置代理属性WebProxy-------------------------------------------------
            //WebProxy proxy = new WebProxy("111.13.7.120", 80);
            //在发起HTTP请求前将proxy赋值给HttpWebRequest的Proxy属性
            //Web_Request.Proxy = proxy;

            HttpWebResponse Web_Response = (HttpWebResponse)Web_Request.GetResponse();

            if (Web_Response.ContentEncoding.ToLower() == "gzip")  // 如果使用了GZip则先解压
            {
                using (Stream Stream_Receive = Web_Response.GetResponseStream())
                {
                    using (var Zip_Stream = new GZipStream(Stream_Receive, CompressionMode.Decompress))
                    {
                        using (StreamReader Stream_Reader = new StreamReader(Zip_Stream, Encoding.UTF8))
                        {
                            html = Stream_Reader.ReadToEnd();
                        }
                    }
                }
            }
            else
            {
                using (Stream Stream_Receive = Web_Response.GetResponseStream())
                {
                    using (StreamReader Stream_Reader = new StreamReader(Stream_Receive, Encoding.UTF8))
                    {
                        html = Stream_Reader.ReadToEnd();
                    }
                }
            }


            Match m = Regex.Match(html, "<title>(.*)</title>");
            if (m.Groups.Count == 2)
            {
                return m.Groups[1].Value;
            }
            else
            {
                return "";
            }

            ////筛选出titel
            //String regex = @"<title>.+</title>";
            //String title = Regex.Match(html.ToString(), regex).ToString();
            //title = Regex.Replace(title, @"[\""]+", "");
            //return title;
        }

 

C# 使用url读取 网页的标题内容

上一篇:[Php] windows下使用composer出现SHA384 is not supported by your openssl extension


下一篇:android 数据存储的几种方式