#include <fstream>
#include <iostream>
#include <string>
#include <locale>
#include <codecvt>
// Unicode representation in MS Windows uses the 2-byte wchar_t type.
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> utfconv;
// string conversion
std::wstring wide = L"Hello, World! 안녕하세요?"; // wide string with utf-16 encoding
std::string narrow = utfconv.to_bytes(wide); // conversion from utf-16 to utf-8
wide = utfconv.from_bytes(narrow); // back from utf-8 to utf-16
// conversion during file I/O
std::wofstream fout; // wide output stream
fout.open("test.txt", fout.out);
fout.imbue(std::locale(fout.getloc(), new std::codecvt_utf8_utf16<wchar_t>));
fout << wide << std::endl; // this stream is stored as utf-8
fout << utfconv.from_bytes(narrow) << std::endl; // the same as the above line
fout.close();
std::wifstream fin;
fin.open("test.txt", fin.in);
fin.imbue(std::locale(fin.getloc(), new std::codecvt_utf8_utf16<wchar_t>));
std::wstring hello, world, anyoung, tline;
fin >> hello >> world >> anyoung; // utf-8 stream is converted to utf-16 string
std::getline(fin, tline); // read out the end of the line
std::getline(fin, tline); // read the next line
fin.close();
Conversion between UTF-16 and UTF-8 in C++
Posted on Jun 5, 2014
- Categories: Computer
- Tags: Unicode, Visual Studio, Windows