#include <fstream> #include <iostream> #include <string> #include <locale> #include <codecvt> // Unicode representation in MS Windows uses the 2-byte wchar_t type. std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> utfconv; // string conversion std::wstring wide = L"Hello, World! 안녕하세요?"; // wide string with utf-16 encoding std::string narrow = utfconv.to_bytes(wide); // conversion from utf-16 to utf-8 wide = utfconv.from_bytes(narrow); // back from utf-8 to utf-16 // conversion during file I/O std::wofstream fout; // wide output stream fout.open("test.txt", fout.out); fout.imbue(std::locale(fout.getloc(), new std::codecvt_utf8_utf16<wchar_t>)); fout << wide << std::endl; // this stream is stored as utf-8 fout << utfconv.from_bytes(narrow) << std::endl; // the same as the above line fout.close(); std::wifstream fin; fin.open("test.txt", fin.in); fin.imbue(std::locale(fin.getloc(), new std::codecvt_utf8_utf16<wchar_t>)); std::wstring hello, world, anyoung, tline; fin >> hello >> world >> anyoung; // utf-8 stream is converted to utf-16 string std::getline(fin, tline); // read out the end of the line std::getline(fin, tline); // read the next line fin.close();
Conversion between UTF-16 and UTF-8 in C++
Posted on Jun 5, 2014