1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | #include <fstream> #include <iostream> #include <string> #include <locale> #include <codecvt> // Unicode representation in MS Windows uses the 2-byte wchar_t type. std:: wstring_convert <std:: codecvt_utf8_utf16 < wchar_t >, wchar_t > utfconv; // string conversion std:: wstring wide = L "Hello, World! 안녕하세요?" ; // wide string with utf-16 encoding std:: string narrow = utfconv.to_bytes(wide); // conversion from utf-16 to utf-8 wide = utfconv.from_bytes(narrow); // back from utf-8 to utf-16 // conversion during file I/O std:: wofstream fout; // wide output stream fout.open( "test.txt" , fout.out); fout.imbue(std:: locale (fout.getloc(), new std:: codecvt_utf8_utf16 < wchar_t >)); fout << wide << std::endl; // this stream is stored as utf-8 fout << utfconv.from_bytes(narrow) << std::endl; // the same as the above line fout.close(); std:: wifstream fin; fin.open( "test.txt" , fin.in); fin.imbue(std:: locale (fin.getloc(), new std:: codecvt_utf8_utf16 < wchar_t >)); std:: wstring hello, world, anyoung, tline; fin >> hello >> world >> anyoung; // utf-8 stream is converted to utf-16 string std::getline(fin, tline); // read out the end of the line std::getline(fin, tline); // read the next line fin.close(); |
Conversion between UTF-16 and UTF-8 in C++
Posted on Jun 5, 2014