1: /// <summary>
2: /// 去除HTML标记
3: /// </summary>
4: /// <param name="NoHTML">包括HTML的源码 </param>
5: /// <returns>已经去除后的文字</returns>
6: public static string RemoveHTML(string Htmlstring)
7: {
8: if (string.IsNullOrEmpty(Htmlstring))
9: {
10: return string.Empty;
11: }
12: //删除脚本
13: Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
14:
15: //删除HTML
16: Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
17: Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
18: Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
19: Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
20: Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
21: Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
22: Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
23: Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
24: Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
25: Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
26: Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
27: Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
28: Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
29: Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
30: Htmlstring.Replace("<", "");
31: Htmlstring.Replace(">", "");
32: Htmlstring.Replace("\r\n", "");
33:
34: return Htmlstring;
35: }
36:
37:
38: #region 正则表达式替换包含script脚本攻击的script代码
39: /// <summary>
40: /// 正则表达式替换包含script脚本攻击的script代码
41: /// author:Andrew.He
42: /// </summary>
43: /// <param name="scriptString">包含脚本攻击的字符串</param>
44: /// <returns>替换脚本攻击的字符串</returns>
45: public static string RemoveScript(string scriptString)
46: {
47: if (string.IsNullOrEmpty(scriptString))
48: {
49: return scriptString;
50: }
51:
52: //执行替换操作
53: scriptString = Regex.Replace(scriptString, @"<[ ]*script", "[script ", RegexOptions.IgnoreCase);
54: scriptString = Regex.Replace(scriptString, @"/[ ]*script[ ]*>", " /script]", RegexOptions.IgnoreCase);
55:
56: return scriptString;
57: }
58: #endregion