#ifndef JSONNET_UNICODE_H
#define JSONNET_UNICODE_H
#define JSONNET_CODEPOINT_ERROR 0xfffd
#define JSONNET_CODEPOINT_MAX 0x110000
static inline int encode_utf8(char32_t x, std::string &s)
{
if (x >= JSONNET_CODEPOINT_MAX)
x = JSONNET_CODEPOINT_ERROR;
long bytes = ((x & 0x1C0000) << 6) | ((x & 0x03F000) << 4) | ((x & 0x0FC0) << 2) | (x & 0x3F);
if (x < 0x80) {
s.push_back((char)x);
return 1;
} else if (x < 0x800) { bytes |= 0xC080;
s.push_back((bytes >> 8) & 0xFF);
s.push_back((bytes >> 0) & 0xFF);
return 2;
} else if (x < 0x10000) { bytes |= 0xE08080;
s.push_back((bytes >> 16) & 0xFF);
s.push_back((bytes >> 8) & 0xFF);
s.push_back((bytes >> 0) & 0xFF);
return 3;
} else if (x < 0x110000) { bytes |= 0xF0808080;
s.push_back((bytes >> 24) & 0xFF);
s.push_back((bytes >> 16) & 0xFF);
s.push_back((bytes >> 8) & 0xFF);
s.push_back((bytes >> 0) & 0xFF);
return 4;
} else {
std::cerr << "Should never get here." << std::endl;
abort();
}
}
static inline char32_t decode_utf8(const std::string &str, size_t &i)
{
char c0 = str[i];
if ((c0 & 0x80) == 0) { return c0;
} else if ((c0 & 0xE0) == 0xC0) { if (i + 1 >= str.length()) {
return JSONNET_CODEPOINT_ERROR;
}
char c1 = str[++i];
if ((c1 & 0xC0) != 0x80) {
return JSONNET_CODEPOINT_ERROR;
}
return ((c0 & 0x1F) << 6ul) | (c1 & 0x3F);
} else if ((c0 & 0xF0) == 0xE0) { if (i + 2 >= str.length()) {
return JSONNET_CODEPOINT_ERROR;
}
char c1 = str[++i];
if ((c1 & 0xC0) != 0x80) {
return JSONNET_CODEPOINT_ERROR;
}
char c2 = str[++i];
if ((c2 & 0xC0) != 0x80) {
return JSONNET_CODEPOINT_ERROR;
}
return ((c0 & 0xF) << 12ul) | ((c1 & 0x3F) << 6) | (c2 & 0x3F);
} else if ((c0 & 0xF8) == 0xF0) { if (i + 3 >= str.length()) {
return JSONNET_CODEPOINT_ERROR;
}
char c1 = str[++i];
if ((c1 & 0xC0) != 0x80) {
return JSONNET_CODEPOINT_ERROR;
}
char c2 = str[++i];
if ((c2 & 0xC0) != 0x80) {
return JSONNET_CODEPOINT_ERROR;
}
char c3 = str[++i];
if ((c3 & 0xC0) != 0x80) {
return JSONNET_CODEPOINT_ERROR;
}
return ((c0 & 0x7) << 24ul) | ((c1 & 0x3F) << 12ul) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
} else {
return JSONNET_CODEPOINT_ERROR;
}
}
typedef std::basic_string<char32_t> UString;
static inline void encode_utf8(const UString &s, std::string &r)
{
for (char32_t cp : s)
encode_utf8(cp, r);
}
static inline std::string encode_utf8(const UString &s)
{
std::string r;
encode_utf8(s, r);
return r;
}
static inline UString decode_utf8(const std::string &s)
{
UString r;
for (size_t i = 0; i < s.length(); ++i)
r.push_back(decode_utf8(s, i));
return r;
}
class UStringStream {
UString buf;
public:
UStringStream &operator<<(const UString &s)
{
buf.append(s);
return *this;
}
UStringStream &operator<<(const char32_t *s)
{
buf.append(s);
return *this;
}
UStringStream &operator<<(char32_t c)
{
buf.push_back(c);
return *this;
}
template <class T>
UStringStream &operator<<(T c)
{
std::stringstream ss;
ss << c;
for (char c : ss.str())
buf.push_back(char32_t(c));
return *this;
}
UString str()
{
return buf;
}
};
#endif