#include "utf.h"
#define badrune 0xFFFD
size_t uiprivUTF8EncodeRune(uint32_t rune, char *encoded)
{
uint8_t b, c, d, e;
size_t n;
if (rune > 0x10FFFF)
rune = badrune;
if (rune >= 0xD800 && rune < 0xE000)
rune = badrune;
if (rune < 0x80) { b = (uint8_t) (rune & 0xFF);
n = 1;
goto done;
}
if (rune < 0x800) { c = (uint8_t) (rune & 0x3F);
c |= 0x80;
rune >>= 6;
b = (uint8_t) (rune & 0x1F);
b |= 0xC0;
n = 2;
goto done;
}
if (rune < 0x10000) { d = (uint8_t) (rune & 0x3F);
d |= 0x80;
rune >>= 6;
c = (uint8_t) (rune & 0x3F);
c |= 0x80;
rune >>= 6;
b = (uint8_t) (rune & 0x0F);
b |= 0xE0;
n = 3;
goto done;
}
e = (uint8_t) (rune & 0x3F);
e |= 0x80;
rune >>= 6;
d = (uint8_t) (rune & 0x3F);
d |= 0x80;
rune >>= 6;
c = (uint8_t) (rune & 0x3F);
c |= 0x80;
rune >>= 6;
b = (uint8_t) (rune & 0x07);
b |= 0xF0;
n = 4;
done:
encoded[0] = b;
if (n > 1)
encoded[1] = c;
if (n > 2)
encoded[2] = d;
if (n > 3)
encoded[3] = e;
return n;
}
const char *uiprivUTF8DecodeRune(const char *s, size_t nElem, uint32_t *rune)
{
uint8_t b, c;
uint8_t lowestAllowed, highestAllowed;
size_t i, expected;
int bad;
b = (uint8_t) (*s);
if (b < 0x80) { *rune = b;
s++;
return s;
}
if (b < 0xC2 || b > 0xF4) { *rune = badrune;
s++;
return s;
}
lowestAllowed = 0x80;
highestAllowed = 0xBF;
switch (b) {
case 0xE0:
lowestAllowed = 0xA0;
break;
case 0xED:
highestAllowed = 0x9F;
break;
case 0xF0:
lowestAllowed = 0x90;
break;
case 0xF4:
highestAllowed = 0x8F;
break;
}
expected = 1;
if (b >= 0xE0)
expected++;
if (b >= 0xF0)
expected++;
if (nElem != 0) { nElem--;
if (nElem < expected) { *rune = badrune;
s++;
return s;
}
}
bad = 0;
for (i = 0; i < expected; i++) {
c = (uint8_t) (s[1 + i]);
if (c < lowestAllowed || c > highestAllowed) {
bad = 1;
break;
}
lowestAllowed = 0x80;
highestAllowed = 0xBF;
}
if (bad) {
*rune = badrune;
s++;
return s;
}
if (b < 0xE0)
*rune = b & 0x1F;
else if (b < 0xF0)
*rune = b & 0x0F;
else
*rune = b & 0x07;
s++;
for (; expected; expected--) {
c = (uint8_t) (*s);
s++;
c &= 0x3F; *rune <<= 6;
*rune |= c;
}
return s;
}
size_t uiprivUTF16EncodeRune(uint32_t rune, uint16_t *encoded)
{
uint16_t low, high;
if (rune > 0x10FFFF)
rune = badrune;
if (rune >= 0xD800 && rune < 0xE000)
rune = badrune;
if (rune < 0x10000) {
encoded[0] = (uint16_t) rune;
return 1;
}
rune -= 0x10000;
low = (uint16_t) (rune & 0x3FF);
rune >>= 10;
high = (uint16_t) (rune & 0x3FF);
encoded[0] = high | 0xD800;
encoded[1] = low | 0xDC00;
return 2;
}
const uint16_t *uiprivUTF16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune)
{
uint16_t high, low;
if (*s < 0xD800 || *s >= 0xE000) {
*rune = *s;
s++;
return s;
}
if (*s >= 0xDC00) {
*rune = badrune;
s++;
return s;
}
if (nElem == 1) { *rune = badrune;
s++;
return s;
}
high = *s;
high &= 0x3FF;
if (s[1] < 0xDC00 || s[1] >= 0xE000) {
*rune = badrune;
s++;
return s;
}
s++;
low = *s;
s++;
low &= 0x3FF;
*rune = high;
*rune <<= 10;
*rune |= low;
*rune += 0x10000;
return s;
}
size_t uiprivUTF8RuneCount(const char *s, size_t nElem)
{
size_t len;
uint32_t rune;
if (nElem != 0) {
const char *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = uiprivUTF8DecodeRune(t, nElem, &rune);
len++;
nElem -= u - t;
t = u;
}
return len;
}
len = 0;
while (*s) {
s = uiprivUTF8DecodeRune(s, nElem, &rune);
len++;
}
return len;
}
size_t uiprivUTF8UTF16Count(const char *s, size_t nElem)
{
size_t len;
uint32_t rune;
uint16_t encoded[2];
if (nElem != 0) {
const char *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = uiprivUTF8DecodeRune(t, nElem, &rune);
len += uiprivUTF16EncodeRune(rune, encoded);
nElem -= u - t;
t = u;
}
return len;
}
len = 0;
while (*s) {
s = uiprivUTF8DecodeRune(s, nElem, &rune);
len += uiprivUTF16EncodeRune(rune, encoded);
}
return len;
}
size_t uiprivUTF16RuneCount(const uint16_t *s, size_t nElem)
{
size_t len;
uint32_t rune;
if (nElem != 0) {
const uint16_t *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = uiprivUTF16DecodeRune(t, nElem, &rune);
len++;
nElem -= u - t;
t = u;
}
return len;
}
len = 0;
while (*s) {
s = uiprivUTF16DecodeRune(s, nElem, &rune);
len++;
}
return len;
}
size_t uiprivUTF16UTF8Count(const uint16_t *s, size_t nElem)
{
size_t len;
uint32_t rune;
char encoded[4];
if (nElem != 0) {
const uint16_t *t, *u;
len = 0;
t = s;
while (nElem != 0) {
u = uiprivUTF16DecodeRune(t, nElem, &rune);
len += uiprivUTF8EncodeRune(rune, encoded);
nElem -= u - t;
t = u;
}
return len;
}
len = 0;
while (*s) {
s = uiprivUTF16DecodeRune(s, nElem, &rune);
len += uiprivUTF8EncodeRune(rune, encoded);
}
return len;
}