C# 正则表达式过滤危险HTML

下面是两个过滤的方法

/// <summary>
/// 此处过滤危险HTML方法
/// </summary>
/// <param name="html">html</param>
/// <returns></returns>
private string FilterHTML(string html)
{
if (html == null)
return ""; //过滤 script
Regex regex_script1 = new Regex("(<script[//s//S]*?///script//s*>)", RegexOptions.IgnoreCase);
Regex regex_script2 = new Regex("(<(script[//s//S]*?)>)", RegexOptions.IgnoreCase);
html = regex_script1.Replace(html, "");
html = regex_script1.Replace(html, ""); //过滤 <iframe> 标签
Regex regex_iframe1 = new Regex("(<iframe [//s//S]+<iframe//s*>)", RegexOptions.IgnoreCase);
Regex regex_iframe2 = new Regex("(<(iframe [//s//S]*?)>)", RegexOptions.IgnoreCase);
html = regex_iframe1.Replace(html, "");
html = regex_iframe2.Replace(html, ""); //过滤 <frameset> 标签
Regex regex_frameset1 = new Regex("(<frameset [//s//S]+<frameset //s*>)", RegexOptions.IgnoreCase);
Regex regex_frameset2 = new Regex("(<(frameset [//s//S]*?)>)", RegexOptions.IgnoreCase);
html = regex_frameset1.Replace(html, "");
html = regex_frameset2.Replace(html, ""); //过滤 <frame> 标签
Regex regex_frame1 = new Regex("(<frame[//s//S]+<frame //s*>)", RegexOptions.IgnoreCase);
Regex regex_frame2 = new Regex("(<(frame[//s//S]*?)>)", RegexOptions.IgnoreCase);
html = regex_frame1.Replace(html, "");
html = regex_frame2.Replace(html, ""); //过滤 <form> 标签
Regex regex_form1 = new Regex("(<(form [//s//S]*?)>)", RegexOptions.IgnoreCase);
Regex regex_form2 = new Regex("(<(/form[//s//S]*?)>)", RegexOptions.IgnoreCase);
html = regex_form1.Replace(html, "");
html = regex_form2.Replace(html, ""); //过滤 on: 的事件
//过滤on 带单引号的 过滤on 带双引号的 过滤on 不带有引号的
string regOn = @"<[//s//S]+ (on)[a-zA-Z]{4,20} *= *[//S ]{3,}>";
string regOn2 = @"((on)[a-zA-Z]{4,20} *= *'[^']{3,}')|((on)[a-zA-Z]{4,20} *= */""[^/""]{3,}/"")|((on)[a-zA-Z]{4,20} *= *[^>/ ]{3,})";
html = GetReplace(html, regOn, regOn2, ""); //过滤 javascript: 的事件
regOn = @"<[//s//S]+ (href|src|background|url|dynsrc|expression|codebase) *= *[ /""/']? *(javascript:)[//S]{1,}>";
regOn2 = @"(' *(javascript|vbscript):([//S^'])*')|(/"" *(javascript|vbscript):[//S^/""]*/"")|([^=]*(javascript|vbscript):[^/> ]*)";
html = GetReplace(html, regOn, regOn2, ""); return html;
} /// <summary>
/// 正则双重过滤
/// </summary>
/// <param name="content"></param>
/// <param name="splitKey1"></param>
/// <param name="splitKey2"></param>
/// <param name="newChars"></param>
/// <returns></returns>
private string GetReplace(string content, string splitKey1, string splitKey2, string newChars)
{
//splitKey1 第一个正则式匹配 //splitKey2 匹配结果中再次匹配进行替换 if (splitKey1 != null && splitKey1 != "" && splitKey2 != null && splitKey2 != "")
{
Regex rg = new Regex(splitKey1);
System.Text.RegularExpressions.MatchCollection mc = rg.Matches(content); foreach (System.Text.RegularExpressions.Match mc1 in mc)
{
string oldChar = mc1.ToString();
string newChar = new Regex(splitKey2, RegexOptions.IgnoreCase).Replace(oldChar, newChars);
content = content.Replace(oldChar, newChar);
}
return content;
}
else
{
if (splitKey2 != null && splitKey2 != "")
{
Regex rg = new Regex(splitKey2, RegexOptions.IgnoreCase);
return rg.Replace(content, newChars);
}
}
return content;
}

使用的时候

this.content.InnerHtml = FilterHTML(studentQuestionInfo.Description);

上一篇:Java基础(31):String的大小写转换、分离成数组、==与equals()的区别(String类)


下一篇:HTML5 respond.js 解决IE6~8的响应式布局问题