本文非本人编写,转载自:http://www.cnblogs.com/youngerliu/archive/2013/05/27/3101488.html
今天遇到用LumiSoft.Net这个组件收取邮件中含有类似于=?utf-8?B?5rWL6K+V6YKu5Lu2?=
,=?gbk?Q?=C6=BD=B0=B2=D6=A4=C8=AF*=C3=BF=D6=DC=B1=A8?=这两种格式的乱码,随后Google了下,原因是邮件本身的编码,跟传输过程采用的编码不一致,=?utf-8?B?5rWL6K+V6YKu5Lu2?=
这个表示邮件编码是utf-8,传输采用base64编码格式,第二个Q表示传输格式为Quote-Printable。
对于这种格式的字符,Google大神帮我搜到了相关的处理代码,然后综合项目,整理出来了,留个记号。
1 /// <summary> 2 /// 乱码解析 3 /// </summary> 4 /// <param name="input"></param> 5 /// <returns></returns> 6 private string GetMailSubject(string input) 7 { 8 try 9 { 10 string regex = @"=\?(?<encode>.*?)\?(?<type>[B|Q])\?(?<body>.*?)\?="; 11 Regex re = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); 12 MatchCollection mcs = re.Matches(input); 13 foreach (Match mc in mcs) 14 { 15 string encode = mc.Groups["encode"].Value; 16 string type = mc.Groups["type"].Value; 17 Encoding encod = null; 18 if (!string.IsNullOrEmpty(encode)) 19 { 20 if ((encode.ToLower().Contains("gbk") || encode.ToLower().Contains("utf8") || encode.ToLower().Contains("utf-8")) && type == "Q") 21 { 22 if (encode.ToLower().Contains("utf-8")) 23 { 24 encod = Encoding.UTF8; 25 } 26 else if (encode.ToLower().Contains("gbk")) 27 { 28 encod = Encoding.GetEncoding("gb2312"); 29 } 30 input = input.Replace(mc.Value, QuotedPrintable.Decode(mc.Groups["body"].Value, encod)); 31 } 32 else 33 { 34 if (encode.ToLower().Contains("euccn") || encode.ToLower().Contains("euc-cn")) 35 { 36 encode = "gb2312"; 37 } 38 else if (encode.ToLower().Contains("utf8")) 39 { 40 encode = "utf-8"; 41 } 42 string body = mc.Groups["body"].Value; 43 byte[] bytes = Convert.FromBase64String(body); 44 string result = Encoding.GetEncoding(encode).GetString(bytes); 45 input = input.Replace(mc.Value, result); 46 } 47 } 48 49 } 50 if (mcs.Count > 0) 51 { 52 FileLogHelper.WriteInfo(string.Format("原邮件标题:[{0}]\r\n解析后标题:[{1}]", input, input)); 53 } 54 return input; 55 } 56 catch (Exception) 57 { 58 return input; 59 } 60 }
针对两种不同类型的字符做了处理,这里调用了一个Quote-Printable编码解码类,代码如下:
1 using System; 2 using System.Collections; 3 using System.Text; 4 namespace Wind.MailRobot.BLL 5 { 6 public class QuotedPrintable 7 { 8 private const byte EQUALS = 61; 9 private const byte CR = 13; 10 private const byte LF = 10; 11 private const byte SPACE = 32; 12 private const byte TAB = 9; 13 14 /// <summary> 15 /// Encodes a string to QuotedPrintable 16 /// </summary> 17 /// <param name="_ToEncode">String to encode</param> 18 /// <returns>QuotedPrintable encoded string</returns> 19 public static string Encode(string _ToEncode, Encoding encoding) 20 { 21 StringBuilder Encoded = new StringBuilder(); 22 string hex = string.Empty; 23 byte[] bytes = encoding.GetBytes(_ToEncode); 24 int count = 0; 25 26 for (int i = 0; i < bytes.Length; i++) 27 { 28 //these characters must be encoded 29 if ((bytes[i] < 33 || bytes[i] > 126 || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE) 30 { 31 if (bytes[i].ToString("X").Length < 2) 32 { 33 hex = "0" + bytes[i].ToString("X"); 34 Encoded.Append("=" + hex); 35 } 36 else 37 { 38 hex = bytes[i].ToString("X"); 39 Encoded.Append("=" + hex); 40 } 41 } 42 else 43 { 44 //check if index out of range 45 if ((i + 1) < bytes.Length) 46 { 47 //if TAB is at the end of the line - encode it! 48 if ((bytes[i] == TAB && bytes[i + 1] == LF) || (bytes[i] == TAB && bytes[i + 1] == CR)) 49 { 50 Encoded.Append("=0" + bytes[i].ToString("X")); 51 } 52 //if SPACE is at the end of the line - encode it! 53 else if ((bytes[i] == SPACE && bytes[i + 1] == LF) || (bytes[i] == SPACE && bytes[i + 1] == CR)) 54 { 55 Encoded.Append("=" + bytes[i].ToString("X")); 56 } 57 else 58 { 59 Encoded.Append(System.Convert.ToChar(bytes[i])); 60 } 61 } 62 else 63 { 64 Encoded.Append(System.Convert.ToChar(bytes[i])); 65 } 66 } 67 if (count == 75) 68 { 69 Encoded.Append("=\r\n"); //insert soft-linebreak 70 count = 0; 71 } 72 count++; 73 } 74 75 return Encoded.ToString(); 76 } 77 78 /// <summary> 79 /// Decodes a QuotedPrintable encoded string 80 /// </summary> 81 /// <param name="_ToDecode">The encoded string to decode</param> 82 /// <returns>Decoded string</returns> 83 public static string Decode(string _ToDecode, Encoding encoding) 84 { 85 try 86 { 87 //remove soft-linebreaks first 88 _ToDecode = _ToDecode.Replace("=\r\n", ""); 89 char[] chars = _ToDecode.ToCharArray(); 90 byte[] bytes = new byte[chars.Length]; 91 int bytesCount = 0; 92 for (int i = 0; i < chars.Length; i++) 93 { 94 // if encoded character found decode it 95 if (chars[i] == ‘=‘) 96 { 97 bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber)); 98 i += 2; 99 } 100 else 101 { 102 bytes[bytesCount++] = System.Convert.ToByte(chars[i]); 103 } 104 } 105 return encoding.GetString(bytes, 0, bytesCount); 106 } 107 catch (Exception) 108 { 109 110 return _ToDecode; 111 } 112 } 113 } 114 }