本文非本人编写,转载自:http://www.cnblogs.com/youngerliu/archive/2013/05/27/3101488.html
今天遇到用LumiSoft.Net这个组件收取邮件中含有类似于=?utf-8?B?5rWL6K+V6YKu5Lu2?= ,=?gbk?Q?=C6=BD=B0=B2=D6=A4=C8=AF*=C3=BF=D6=DC=B1=A8?=这两种格式的乱码,随后Google了下,原因是邮件本身的编码,跟传输过程采用的编码不一致,=?utf-8?B?5rWL6K+V6YKu5Lu2?= 这个表示邮件编码是utf-8,传输采用base64编码格式,第二个Q表示传输格式为Quote-Printable。
对于这种格式的字符,Google大神帮我搜到了相关的处理代码,然后综合项目,整理出来了,留个记号。
/// <summary>
/// 乱码解析
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
private string GetMailSubject(string input)
{
try
{
string regex = @"=\?(?<encode>.*?)\?(?<type>[B|Q])\?(?<body>.*?)\?=";
Regex re = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(input);
foreach (Match mc in mcs)
{
string encode = mc.Groups["encode"].Value;
string type = mc.Groups["type"].Value;
Encoding encod = null;
if (!string.IsNullOrEmpty(encode))
{
if ((encode.ToLower().Contains("gbk") || encode.ToLower().Contains("utf8") || encode.ToLower().Contains("utf-8")) && type == "Q")
{
if (encode.ToLower().Contains("utf-8"))
{
encod = Encoding.UTF8;
}
else if (encode.ToLower().Contains("gbk"))
{
encod = Encoding.GetEncoding("gb2312");
}
input = input.Replace(mc.Value, QuotedPrintable.Decode(mc.Groups["body"].Value, encod));
}
else
{
if (encode.ToLower().Contains("euccn") || encode.ToLower().Contains("euc-cn"))
{
encode = "gb2312";
}
else if (encode.ToLower().Contains("utf8"))
{
encode = "utf-8";
}
string body = mc.Groups["body"].Value;
byte[] bytes = Convert.FromBase64String(body);
string result = Encoding.GetEncoding(encode).GetString(bytes);
input = input.Replace(mc.Value, result);
}
} }
if (mcs.Count > )
{
FileLogHelper.WriteInfo(string.Format("原邮件标题:[{0}]\r\n解析后标题:[{1}]", input, input));
}
return input;
}
catch (Exception)
{
return input;
}
}
针对两种不同类型的字符做了处理,这里调用了一个Quote-Printable编码解码类,代码如下:
using System;
using System.Collections;
using System.Text;
namespace Wind.MailRobot.BLL
{
public class QuotedPrintable
{
private const byte EQUALS = ;
private const byte CR = ;
private const byte LF = ;
private const byte SPACE = ;
private const byte TAB = ; /// <summary>
/// Encodes a string to QuotedPrintable
/// </summary>
/// <param name="_ToEncode">String to encode</param>
/// <returns>QuotedPrintable encoded string</returns>
public static string Encode(string _ToEncode, Encoding encoding)
{
StringBuilder Encoded = new StringBuilder();
string hex = string.Empty;
byte[] bytes = encoding.GetBytes(_ToEncode);
int count = ; for (int i = ; i < bytes.Length; i++)
{
//these characters must be encoded
if ((bytes[i] < || bytes[i] > || bytes[i] == EQUALS) && bytes[i] != CR && bytes[i] != LF && bytes[i] != SPACE)
{
if (bytes[i].ToString("X").Length < )
{
hex = "" + bytes[i].ToString("X");
Encoded.Append("=" + hex);
}
else
{
hex = bytes[i].ToString("X");
Encoded.Append("=" + hex);
}
}
else
{
//check if index out of range
if ((i + ) < bytes.Length)
{
//if TAB is at the end of the line - encode it!
if ((bytes[i] == TAB && bytes[i + ] == LF) || (bytes[i] == TAB && bytes[i + ] == CR))
{
Encoded.Append("=0" + bytes[i].ToString("X"));
}
//if SPACE is at the end of the line - encode it!
else if ((bytes[i] == SPACE && bytes[i + ] == LF) || (bytes[i] == SPACE && bytes[i + ] == CR))
{
Encoded.Append("=" + bytes[i].ToString("X"));
}
else
{
Encoded.Append(System.Convert.ToChar(bytes[i]));
}
}
else
{
Encoded.Append(System.Convert.ToChar(bytes[i]));
}
}
if (count == )
{
Encoded.Append("=\r\n"); //insert soft-linebreak
count = ;
}
count++;
} return Encoded.ToString();
} /// <summary>
/// Decodes a QuotedPrintable encoded string
/// </summary>
/// <param name="_ToDecode">The encoded string to decode</param>
/// <returns>Decoded string</returns>
public static string Decode(string _ToDecode, Encoding encoding)
{
try
{
//remove soft-linebreaks first
_ToDecode = _ToDecode.Replace("=\r\n", "");
char[] chars = _ToDecode.ToCharArray();
byte[] bytes = new byte[chars.Length];
int bytesCount = ;
for (int i = ; i < chars.Length; i++)
{
// if encoded character found decode it
if (chars[i] == '=')
{
bytes[bytesCount++] = System.Convert.ToByte(int.Parse(chars[i + ].ToString() + chars[i + ].ToString(), System.Globalization.NumberStyles.HexNumber));
i += ;
}
else
{
bytes[bytesCount++] = System.Convert.ToByte(chars[i]);
}
}
return encoding.GetString(bytes, , bytesCount);
}
catch (Exception)
{ return _ToDecode;
}
}
}
}