UTF-8与UCS4编码转换之C++篇

BOOST OFFICIAL DOCUMENTATION: http://www.boost.org/doc/libs/1_55_0/libs/serialization/doc/codecvt.html

官方网站样例

UTF-8与UCS4编码转换之C++篇
 1 //...
 2   // My encoding type
 3   typedef wchar_t ucs4_t;
 4 
 5   std::locale old_locale;
 6   std::locale utf8_locale(old_locale,new utf8_codecvt_facet<ucs4_t>);
 7 
 8   // Set a New global locale
 9   std::locale::global(utf8_locale);
10 
11   // Send the UCS-4 data out, converting to UTF-8
12   {
13     std::wofstream ofs("data.ucd");
14     ofs.imbue(utf8_locale);
15     std::copy(ucs4_data.begin(),ucs4_data.end(),
16           std::ostream_iterator<ucs4_t,ucs4_t>(ofs));
17   }
18 
19   // Read the UTF-8 data back in, converting to UCS-4 on the way in
20   std::vector<ucs4_t> from_file;
21   {
22     std::wifstream ifs("data.ucd");
23     ifs.imbue(utf8_locale);
24     ucs4_t item = 0;
25     while (ifs >> item) from_file.push_back(item);
26   }
27   //...
UTF-8与UCS4编码转换之C++篇

完整示例

UTF-8与UCS4编码转换之C++篇
 1 #include <iostream>
 2 #include <fstream>
 3 #include <vector>
 4 #include <locale>
 5 #include <iterator>
 6 #include <algorithm>
 7 using namespace std;
 8 #include "boost/program_options/detail/convert.hpp"
 9 #include "boost/program_options/detail/utf8_codecvt_facet.hpp"
10 using namespace boost;
11 
12 typedef wchar_t ucs4_t;
13 
14 int main()
15 {
16     std::locale old_locale;
17     std::locale utf8_locale(old_locale,new boost::program_options::detail::utf8_codecvt_facet());
18 
19     // std::locale::global(utf8_locale);
20     // std::locale::global(std::locale("en_US.UTF-8"));
21     
22     // std::vector<ucs4_t> from_file;
23     std::wifstream ifs("data.ucd");
24     ifs.imbue(utf8_locale);
25     ucs4_t item = 0;
26     std::wstring s;
27     while (ifs >> item)
28     {
29         // from_file.push_back(item);
30         s.push_back(item);
31     }
32     
33     wcout<<s.size()<<endl;
34     std::wofstream ofs("123");
35     ofs.imbue(utf8_locale);
36   
37     ofs<<s<<endl;
38 
39     return 0;
40 }
UTF-8与UCS4编码转换之C++篇

UTF-8与UCS4编码转换之C++篇,布布扣,bubuko.com

UTF-8与UCS4编码转换之C++篇

上一篇:c语言中argc和argv是什么意思?


下一篇:JQuery