LumiSoft.Net邮件接收乱码问题解决

本文非本人编写,转载自:http://www.cnblogs.com/youngerliu/archive/2013/05/27/3101488.html

今天遇到用LumiSoft.Net这个组件收取邮件中含有类似于=?utf-8?B?5rWL6K+V6YKu5Lu2?= ,=?gbk?Q?=C6=BD=B0=B2=D6=A4=C8=AF*=C3=BF=D6=DC=B1=A8?=这两种格式的乱码,随后Google了下,原因是邮件本身的编码,跟传输过程采用的编码不一致,=?utf-8?B?5rWL6K+V6YKu5Lu2?= 这个表示邮件编码是utf-8,传输采用base64编码格式,第二个Q表示传输格式为Quote-Printable。
对于这种格式的字符,Google大神帮我搜到了相关的处理代码,然后综合项目,整理出来了,留个记号。

LumiSoft.Net邮件接收乱码问题解决
 1 /// <summary>
 2     /// 乱码解析
 3     /// </summary>
 4     /// <param name="input"></param>
 5     /// <returns></returns>
 6     private string GetMailSubject(string input)
 7     {
 8         try
 9         {
10             string regex = @"=\?(?<encode>.*?)\?(?<type>[B|Q])\?(?<body>.*?)\?=";
11             Regex re = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
12             MatchCollection mcs = re.Matches(input);
13             foreach (Match mc in mcs)
14             {
15                 string encode = mc.Groups["encode"].Value;
16                 string type = mc.Groups["type"].Value;
17                 Encoding encod = null;
18                 if (!string.IsNullOrEmpty(encode))
19                 {
20                     if ((encode.ToLower().Contains("gbk") || encode.ToLower().Contains("utf8") || encode.ToLower().Contains("utf-8")) && type == "Q")
21                     {
22                         if (encode.ToLower().Contains("utf-8"))
23                         {
24                             encod = Encoding.UTF8;
25                         }
26                         else if (encode.ToLower().Contains("gbk"))
27                         {
28                             encod = Encoding.GetEncoding("gb2312");
29                         }
30                         input = input.Replace(mc.Value, QuotedPrintable.Decode(mc.Groups["body"].Value, encod));
31                     }
32                     else
33                     {
34                         if (encode.ToLower().Contains("euccn") || encode.ToLower().Contains("euc-cn"))
35                         {
36                             encode = "gb2312";
37                         }
38                         else if (encode.ToLower().Contains("utf8"))
39                         {
40                             encode = "utf-8";
41                         }
42                         string body = mc.Groups["body"].Value;
43                         byte[] bytes = Convert.FromBase64String(body);
44                         string result = Encoding.GetEncoding(encode).GetString(bytes);
45                         input = input.Replace(mc.Value, result);
46                     }
47                 }
48  
49             }
50             if (mcs.Count > 0)
51             {
52                 FileLogHelper.WriteInfo(string.Format("原邮件标题:[{0}]\r\n解析后标题:[{1}]", input, input));
53             }
54             return input;
55         }
56         catch (Exception)
57         {
58             return input;
59         }
60     }
View Code

 

  针对两种不同类型的字符做了处理,这里调用了一个Quote-Printable编码解码类,代码如下:

 

LumiSoft.Net邮件接收乱码问题解决
  1 using System;
  2 using System.Collections;
  3 using System.Text;
  4 namespace Wind.MailRobot.BLL
  5 {
  6     public class QuotedPrintable
  7     {
  8         private const byte EQUALS = 61;
  9         private const byte CR = 13;
 10         private const byte LF = 10;
 11         private const byte SPACE = 32;
 12         private const byte TAB = 9;
 13  
 14         /// <summary>
 15         /// Encodes a string to QuotedPrintable
 16         /// </summary>
 17         /// <param name="_ToEncode">String to encode</param>
 18         /// <returns>QuotedPrintable encoded string</returns>
 19         public static string Encode(string _ToEncode, Encoding encoding)
 20         {
 21             StringBuilder Encoded = new StringBuilder();
 22             string hex = string.Empty;
 23             byte[] bytes = encoding.GetBytes(_ToEncode);
 24             int count = 0;
 25  
 26             for (int i = 0; i < bytes.Length; i++)
 27             {
 28                 //these characters must be encoded
 29                 if ((bytes[i] < 33 || bytes[i] > 126 || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE)
 30                 {
 31                     if (bytes[i].ToString("X").Length < 2)
 32                     {
 33                         hex = "0" + bytes[i].ToString("X");
 34                         Encoded.Append("=" + hex);
 35                     }
 36                     else
 37                     {
 38                         hex = bytes[i].ToString("X");
 39                         Encoded.Append("=" + hex);
 40                     }
 41                 }
 42                 else
 43                 {
 44                     //check if index out of range
 45                     if ((i + 1) < bytes.Length)
 46                     {
 47                         //if TAB is at the end of the line - encode it!
 48                         if ((bytes[i] == TAB && bytes[i + 1] == LF) || (bytes[i] == TAB && bytes[i + 1] == CR))
 49                         {
 50                             Encoded.Append("=0" + bytes[i].ToString("X"));
 51                         }
 52                         //if SPACE is at the end of the line - encode it!
 53                         else if ((bytes[i] == SPACE && bytes[i + 1] == LF) || (bytes[i] == SPACE && bytes[i + 1] == CR))
 54                         {
 55                             Encoded.Append("=" + bytes[i].ToString("X"));
 56                         }
 57                         else
 58                         {
 59                             Encoded.Append(System.Convert.ToChar(bytes[i]));
 60                         }
 61                     }
 62                     else
 63                     {
 64                         Encoded.Append(System.Convert.ToChar(bytes[i]));
 65                     }
 66                 }
 67                 if (count == 75)
 68                 {
 69                     Encoded.Append("=\r\n"); //insert soft-linebreak
 70                     count = 0;
 71                 }
 72                 count++;
 73             }
 74  
 75             return Encoded.ToString();
 76         }
 77  
 78         /// <summary>
 79         /// Decodes a QuotedPrintable encoded string 
 80         /// </summary>
 81         /// <param name="_ToDecode">The encoded string to decode</param>
 82         /// <returns>Decoded string</returns>
 83         public static string Decode(string _ToDecode, Encoding encoding)
 84         {
 85             try
 86             {
 87                 //remove soft-linebreaks first
 88                 _ToDecode = _ToDecode.Replace("=\r\n", "");
 89                 char[] chars = _ToDecode.ToCharArray();
 90                 byte[] bytes = new byte[chars.Length];
 91                 int bytesCount = 0;
 92                 for (int i = 0; i < chars.Length; i++)
 93                 {
 94                     // if encoded character found decode it
 95                     if (chars[i] == =)
 96                     {
 97                         bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber));
 98                         i += 2;
 99                     }
100                     else
101                     {
102                         bytes[bytesCount++] = System.Convert.ToByte(chars[i]);
103                     }
104                 }
105                 return encoding.GetString(bytes, 0, bytesCount);
106             }
107             catch (Exception)
108             {
109  
110                 return _ToDecode;
111             }
112         }
113     }
114 }
View Code

LumiSoft.Net邮件接收乱码问题解决

上一篇:XHTML教会我的一些东西-2


下一篇:myeclipse 保存含中文的jsp失败,提示内容含有 ISO-8859-1 不支持的字符