#include "../Xutf8.h"
int
XConvertUtf8ToUcs(const unsigned char *buf,
int len,
unsigned int *ucs) {
if (buf[0] & 0x80) {
if (buf[0] & 0x40) {
if (buf[0] & 0x20) {
if (buf[0] & 0x10) {
if (buf[0] & 0x08) {
if (buf[0] & 0x04) {
if (buf[0] & 0x02) {
} else {
}
} else if (len > 4
&& (buf[1] & 0xC0) == 0x80
&& (buf[2] & 0xC0) == 0x80
&& (buf[3] & 0xC0) == 0x80
&& (buf[4] & 0xC0) == 0x80) {
*ucs = ((buf[0] & ~0xF8) << 24) +
((buf[1] & ~0x80) << 18) +
((buf[2] & ~0x80) << 12) +
((buf[3] & ~0x80) << 6) +
(buf[4] & ~0x80);
if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5;
}
} else if (len > 3
&& (buf[1] & 0xC0) == 0x80
&& (buf[2] & 0xC0) == 0x80
&& (buf[3] & 0xC0) == 0x80) {
*ucs = ((buf[0] & ~0xF0) << 18) +
((buf[1] & ~0x80) << 12) +
((buf[2] & ~0x80) << 6) +
(buf[3] & ~0x80);
if (*ucs > 0x0000FFFF) return 4;
}
} else if (len > 2
&& (buf[1] & 0xC0) == 0x80
&& (buf[2] & 0xC0) == 0x80) {
*ucs = ((buf[0] & ~0xE0) << 12) +
((buf[1] & ~0x80) << 6) +
(buf[2] & ~0x80);
if (*ucs > 0x000007FF) return 3;
}
} else if (len > 1 && (buf[1] & 0xC0) == 0x80) {
*ucs = ((buf[0] & ~0xC0) << 6) +
(buf[1] & ~0x80);
if (*ucs > 0x0000007F) return 2;
}
}
} else if (len > 0) {
*ucs = buf[0];
return 1;
}
*ucs = (unsigned int) '?';
return -1;
}
int
XConvertUcsToUtf8(unsigned int ucs,
char *buf) {
if (ucs < 0x000080) {
buf[0] = ucs;
return 1;
} else if (ucs < 0x000800) {
buf[0] = 0xC0 | (ucs >> 6);
buf[1] = 0x80 | (ucs & 0x3F);
return 2;
} else if (ucs < 0x010000) {
buf[0] = 0xE0 | (ucs >> 12);
buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
buf[2] = 0x80 | (ucs & 0x3F);
return 3;
} else if (ucs < 0x00200000) {
buf[0] = 0xF0 | (ucs >> 18);
buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
buf[3] = 0x80 | (ucs & 0x3F);
return 4;
} else if (ucs < 0x01000000) {
buf[0] = 0xF8 | (ucs >> 24);
buf[1] = 0x80 | ((ucs >> 18) & 0x3F);
buf[2] = 0x80 | ((ucs >> 12) & 0x3F);
buf[3] = 0x80 | ((ucs >> 6) & 0x3F);
buf[4] = 0x80 | (ucs & 0x3F);
return 5;
}
buf[0] = '?';
return -1;
}
int
XUtf8CharByteLen(const unsigned char *buf,
int len) {
unsigned int ucs;
return XConvertUtf8ToUcs(buf, len, &ucs);
}
int
XCountUtf8Char(const unsigned char *buf,
int len) {
int i = 0;
int nbc = 0;
while (i < len) {
int cl = XUtf8CharByteLen(buf + i, len - i);
if (cl < 1) cl = 1;
nbc++;
i += cl;
}
return nbc;
}
int
XFastConvertUtf8ToUcs(const unsigned char *buf,
int len,
unsigned int *ucs) {
if (buf[0] & 0x80) {
if (buf[0] & 0x40) {
if (buf[0] & 0x20) {
if (buf[0] & 0x10) {
if (buf[0] & 0x08) {
if (buf[0] & 0x04) {
if (buf[0] & 0x02) {
} else {
}
} else if (len > 4) {
*ucs = ((buf[0] & ~0xF8) << 24) +
((buf[1] & ~0x80) << 18) +
((buf[2] & ~0x80) << 12) +
((buf[3] & ~0x80) << 6) +
(buf[4] & ~0x80);
return 5;
}
} else if (len > 3) {
*ucs = ((buf[0] & ~0xF0) << 18) +
((buf[1] & ~0x80) << 12) +
((buf[2] & ~0x80) << 6) +
(buf[3] & ~0x80);
return 4;
}
} else if (len > 2) {
*ucs = ((buf[0] & ~0xE0) << 12) +
((buf[1] & ~0x80) << 6) +
(buf[2] & ~0x80);
return 3;
}
} else if (len > 1) {
*ucs = ((buf[0] & ~0xC0) << 6) +
(buf[1] & ~0x80);
return 2;
}
}
} else if (len > 0) {
*ucs = buf[0];
return 1;
}
*ucs = (unsigned int) '?';
return -1;
}