1 01.public static string StripHTML(string strHtml)
2 02. {
3 03. string[] aryReg ={
4 04. @"<script[^>]*?>.*?</script>",
5 05.
6 06. @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""‘])(\\[""‘tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
7 07. @"([\r\n])[\s]+",
8 08. @"&(quot|#34);",
9 09. @"&(amp|#38);",
10 10. @"&(lt|#60);",
11 11. @"&(gt|#62);",
12 12. @"&(nbsp|#160);",
13 13. @"&(iexcl|#161);",
14 14. @"&(cent|#162);",
15 15. @"&(pound|#163);",
16 16. @"&(copy|#169);",
17 17. @"&#(\d+);",
18 18. @"-->",
19 19. @"<!--.*\n"
20 20. };
21 21.
22 22. string[] aryRep = {
23 23. "",
24 24. "",
25 25. "",
26 26. "\"",
27 27. "&",
28 28. "<",
29 29. ">",
30 30. " ",
31 31. "\xa1",
32 32. "\xa2",
33 33. "\xa3",
34 34. "\xa9",
35 35. "",
36 36. "\r\n",
37 37. ""
38 38. };
39 39.
40 40. string newReg = aryReg[0];
41 41. string strOutput = strHtml;
42 42. for (int i = 0; i < aryReg.Length; i++)
43 43. {
44 44. Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
45 45. strOutput = regex.Replace(strOutput, aryRep[i]);
46 46. }
47 47. strOutput.Replace("<", "");
48 48. strOutput.Replace(">", "");
49 49. strOutput.Replace("\r\n", "");
50 50. return strOutput;
51 51. }