コード
ヘッダ
#ifndef string_encoder_hpp
#define string_encoder_hpp
#include <string>
namespace encoding {
class Encoder{
public:
static std::string EucToSjis(const std::string &value);
static std::string EucToUtf8(const std::string &value);
static std::string SjisToEuc(const std::string &value);
static std::string SjisToUtf8(const std::string &value);
static std::string Utf8ToEuc(const std::string &value);
static std::string Utf8ToSjis(const std::string &value);
};
}
#endif
実装
#include <vector>
#include <unicode/unistr.h>
#include "string_encoder.hpp"
namespace encoding {
namespace internal {
namespace encode_name {
static const std::string kEucJp = "euc-jp";
static const std::string kShiftJis = "shift-jis";
static const std::string kUtf8 = "utf8";
}
std::string encode(const std::string &value, const std::string &from, const std::string &to){
icu::UnicodeString src(value.c_str(), from.c_str());
const int length = src.extract(0, src.length(), nullptr, to.c_str());
std::vector<char> result(length + 1);
src.extract(0, src.length(), &result[0], to.c_str());
return std::move(std::string(result.begin(), result.end() - 1));
}
}
std::string Encoder::EucToSjis(const std::string &value){
return std::move(internal::encode(value,
internal::encode_name::kEucJp,
internal::encode_name::kShiftJis));
}
std::string Encoder::EucToUtf8(const std::string &value){
return std::move(internal::encode(value,
internal::encode_name::kEucJp,
internal::encode_name::kUtf8));
}
std::string Encoder::SjisToEuc(const std::string &value){
return std::move(internal::encode(value,
internal::encode_name::kShiftJis,
internal::encode_name::kEucJp));
}
std::string Encoder::SjisToUtf8(const std::string &value){
return std::move(internal::encode(value,
internal::encode_name::kShiftJis,
internal::encode_name::kUtf8));
}
std::string Encoder::Utf8ToEuc(const std::string &value){
return std::move(internal::encode(value,
internal::encode_name::kUtf8,
internal::encode_name::kEucJp));
}
std::string Encoder::Utf8ToSjis(const std::string &value){
return std::move(internal::encode(value,
internal::encode_name::kUtf8,
internal::encode_name::kShiftJis));
}
}
動作確認
#include <iostream>
#include <string>
#include <vector>
#include "string_encoder.hpp"
template <typename T>
std::string test(const T &expected, const T &actual){
return (expected == actual ? "Match" : "Unmatch!");
}
int main(int argc, const char * argv[]) {
const std::string utf8_string = "aこれはウにこーど";
std::cout << utf8_string << " is utf8 string, length:" << utf8_string.length() << std::endl;
std::cout << std::endl;
const std::string sjis_string = encoding::Encoder::Utf8ToSjis(utf8_string);
const std::string sjis_dump = {
'a',
'\x82', '\xb1',
'\x82', '\xea',
'\x82', '\xcd',
'\xb3',
'\x82', '\xc9',
'\x82', '\xb1',
'\x81', '\x5b',
'\x82', '\xc7'
};
std::cout << "test sjis is " << test(sjis_dump, sjis_string) << std::endl;
const std::string euc_string = encoding::Encoder::SjisToEuc(sjis_string);
const std::string euc_dump = {
'a',
'\xa4', '\xb3',
'\xa4', '\xec',
'\xa4', '\xcf',
'\x8e', '\xb3',
'\xa4', '\xcb',
'\xa4', '\xb3',
'\xa1', '\xbc',
'\xa4', '\xc9'
};
std::cout << "test euc is " << test(euc_dump, euc_string) << std::endl;
const std::string return_utf8 = encoding::Encoder::EucToUtf8(euc_string);
std::cout << "test utf8 is " << test(utf8_string, return_utf8) << std::endl;
const std::string euc_contains_machine_dependent_char_string = {
'a',
'\xa4', '\xb3',
'\xa4', '\xec',
'\xa4', '\xcf',
'\xad', '\xc0', '\x0a',
'\xa4', '\xcb',
'\xa4', '\xb3',
'\xa1', '\xbc',
'\xa4', '\xc9'
};
const std::string ㍉ = encoding::Encoder::EucToUtf8(euc_contains_machine_dependent_char_string);
std::cout << "test ㍉ is " << test(euc_contains_machine_dependent_char_string, encoding::Encoder::Utf8ToEuc(㍉)) << std::endl;
const std::string 🍣 = {
'\x3f', '\x0a'
};
const std::string sushi_string = "🍣";
std::cout << "test 🍣 is " << test(sushi_string, encoding::Encoder::SjisToUtf8(🍣)) << std::endl;
}
結果
aこれはウにこーど is utf8 string, length:25
test sjis is Match
test euc is Match
test utf8 is Match
test ㍉ is Match
test 🍣 is Unmatch!
参考文献