Conversion between UTF-16 and UTF-8 in C++

Posted on Jun 5, 2014

#include <fstream>
#include <iostream>
#include <string>
#include <locale>
#include <codecvt>

// Unicode representation in MS Windows uses the 2-byte wchar_t type.
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> utfconv;

// string conversion
std::wstring wide = L"Hello, World! 안녕하세요?";  // wide string with utf-16 encoding
std::string narrow = utfconv.to_bytes(wide);     // conversion from utf-16 to utf-8
wide = utfconv.from_bytes(narrow);               // back from utf-8 to utf-16

// conversion during file I/O
std::wofstream fout;                             // wide output stream
fout.open("test.txt", fout.out);
fout.imbue(std::locale(fout.getloc(), new std::codecvt_utf8_utf16<wchar_t>));
fout << wide << std::endl;                       // this stream is stored as utf-8
fout << utfconv.from_bytes(narrow) << std::endl; // the same as the above line
fout.close();

std::wifstream fin;
fin.open("test.txt", fin.in);
fin.imbue(std::locale(fin.getloc(), new std::codecvt_utf8_utf16<wchar_t>));
std::wstring hello, world, anyoung, tline;
fin >> hello >> world >> anyoung;      // utf-8 stream is converted to utf-16 string
std::getline(fin, tline);              // read out the end of the line
std::getline(fin, tline);              // read the next line
fin.close();