#include "hb.hh"
#ifndef HB_NO_OT_TAG
static hb_tag_t
hb_ot_old_tag_from_script (hb_script_t script)
{
switch ((hb_tag_t) script)
{
case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
case HB_SCRIPT_MATH: return HB_OT_TAG_MATH_SCRIPT;
case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a');
case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' ');
case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' ');
case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' ');
case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' ');
}
return ((hb_tag_t) script) | 0x20000000u;
}
static hb_script_t
hb_ot_old_tag_to_script (hb_tag_t tag)
{
if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
return HB_SCRIPT_INVALID;
if (unlikely (tag == HB_OT_TAG_MATH_SCRIPT))
return HB_SCRIPT_MATH;
if (unlikely ((tag & 0x0000FF00u) == 0x00002000u))
tag |= (tag >> 8) & 0x0000FF00u;
if (unlikely ((tag & 0x000000FFu) == 0x00000020u))
tag |= (tag >> 8) & 0x000000FFu;
return (hb_script_t) (tag & ~0x20000000u);
}
static hb_tag_t
hb_ot_new_tag_from_script (hb_script_t script)
{
switch ((hb_tag_t) script) {
case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2');
case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2');
case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2');
case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2');
case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2');
case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2');
case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2');
case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2');
case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2');
case HB_SCRIPT_MYANMAR: return HB_TAG('m','y','m','2');
}
return HB_OT_TAG_DEFAULT_SCRIPT;
}
static hb_script_t
hb_ot_new_tag_to_script (hb_tag_t tag)
{
switch (tag) {
case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI;
case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI;
case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI;
case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI;
case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA;
case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM;
case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA;
case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL;
case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU;
case HB_TAG('m','y','m','2'): return HB_SCRIPT_MYANMAR;
}
return HB_SCRIPT_UNKNOWN;
}
#ifndef HB_DISABLE_DEPRECATED
void
hb_ot_tags_from_script (hb_script_t script,
hb_tag_t *script_tag_1,
hb_tag_t *script_tag_2)
{
unsigned int count = 2;
hb_tag_t tags[2];
hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
*script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT;
*script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT;
}
#endif
static void
hb_ot_all_tags_from_script (hb_script_t script,
unsigned int *count ,
hb_tag_t *tags )
{
unsigned int i = 0;
hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
{
if (new_tag != HB_TAG('m','y','m','2'))
tags[i++] = new_tag | '3';
if (*count > i)
tags[i++] = new_tag;
}
if (*count > i)
{
hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
tags[i++] = old_tag;
}
*count = i;
}
hb_script_t
hb_ot_tag_to_script (hb_tag_t tag)
{
unsigned char digit = tag & 0x000000FFu;
if (unlikely (digit == '2' || digit == '3'))
return hb_ot_new_tag_to_script (tag & 0xFFFFFF32);
return hb_ot_old_tag_to_script (tag);
}
static inline bool
subtag_matches (const char *lang_str,
const char *limit,
const char *subtag,
unsigned subtag_len)
{
if (likely ((unsigned) (limit - lang_str) < subtag_len))
return false;
do {
const char *s = strstr (lang_str, subtag);
if (!s || s >= limit)
return false;
if (!ISALNUM (s[subtag_len]))
return true;
lang_str = s + subtag_len;
} while (true);
}
static bool
lang_matches (const char *lang_str,
const char *limit,
const char *spec,
unsigned spec_len)
{
if (likely ((unsigned) (limit - lang_str) < spec_len))
return false;
return strncmp (lang_str, spec, spec_len) == 0 &&
(lang_str[spec_len] == '\0' || lang_str[spec_len] == '-');
}
struct LangTag
{
hb_tag_t language;
hb_tag_t tag;
int cmp (hb_tag_t a) const
{
return a < this->language ? -1 : a > this->language ? +1 : 0;
}
int cmp (const LangTag *that) const
{ return cmp (that->language); }
};
#include "hb-ot-tag-table.hh"
#ifndef HB_DISABLE_DEPRECATED
hb_tag_t
hb_ot_tag_from_language (hb_language_t language)
{
unsigned int count = 1;
hb_tag_t tags[1];
hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE;
}
#endif
static void
hb_ot_tags_from_language (const char *lang_str,
const char *limit,
unsigned int *count,
hb_tag_t *tags)
{
#ifndef HB_NO_LANGUAGE_LONG
if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
return;
#endif
#ifndef HB_NO_LANGUAGE_LONG
const char *s; s = strchr (lang_str, '-');
#endif
{
#ifndef HB_NO_LANGUAGE_LONG
if (s && limit - lang_str >= 6)
{
const char *extlang_end = strchr (s + 1, '-');
if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
ISALPHA (s[1]))
lang_str = s + 1;
}
#endif
const LangTag *ot_languages = nullptr;
unsigned ot_languages_len = 0;
const char *dash = strchr (lang_str, '-');
unsigned first_len = dash ? dash - lang_str : limit - lang_str;
if (first_len == 2)
{
ot_languages = ot_languages2;
ot_languages_len = ARRAY_LENGTH (ot_languages2);
}
#ifndef HB_NO_LANGUAGE_LONG
else if (first_len == 3)
{
ot_languages = ot_languages3;
ot_languages_len = ARRAY_LENGTH (ot_languages3);
}
#endif
hb_tag_t lang_tag = hb_tag_from_string (lang_str, first_len);
static hb_atomic_int_t last_tag_idx;
unsigned tag_idx = last_tag_idx;
if (likely (tag_idx < ot_languages_len && ot_languages[tag_idx].language == lang_tag) ||
hb_sorted_array (ot_languages, ot_languages_len).bfind (lang_tag, &tag_idx))
{
last_tag_idx = tag_idx;
unsigned int i;
while (tag_idx != 0 &&
ot_languages[tag_idx].language == ot_languages[tag_idx - 1].language)
tag_idx--;
for (i = 0;
i < *count &&
tag_idx + i < ot_languages_len &&
ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
ot_languages[tag_idx + i].language == ot_languages[tag_idx].language;
i++)
tags[i] = ot_languages[tag_idx + i].tag;
*count = i;
return;
}
}
#ifndef HB_NO_LANGUAGE_LONG
if (!s)
s = lang_str + strlen (lang_str);
if (s - lang_str == 3) {
tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
*count = 1;
return;
}
#endif
*count = 0;
}
static bool
parse_private_use_subtag (const char *private_use_subtag,
unsigned int *count,
hb_tag_t *tags,
const char *prefix,
unsigned char (*normalize) (unsigned char))
{
#ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
return false;
#endif
if (!(private_use_subtag && count && tags && *count)) return false;
const char *s = strstr (private_use_subtag, prefix);
if (!s) return false;
char tag[4];
int i;
s += strlen (prefix);
if (s[0] == '-') {
s += 1;
char c;
for (i = 0; i < 8 && ISHEX (s[i]); i++)
{
c = FROMHEX (s[i]);
if (i % 2 == 0)
tag[i / 2] = c << 4;
else
tag[i / 2] += c;
}
if (i != 8) return false;
} else {
for (i = 0; i < 4 && ISALNUM (s[i]); i++)
tag[i] = normalize (s[i]);
if (!i) return false;
for (; i < 4; i++)
tag[i] = ' ';
}
tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]);
if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT)
tags[0] ^= ~0xDFDFDFDF;
*count = 1;
return true;
}
void
hb_ot_tags_from_script_and_language (hb_script_t script,
hb_language_t language,
unsigned int *script_count ,
hb_tag_t *script_tags ,
unsigned int *language_count ,
hb_tag_t *language_tags )
{
bool needs_script = true;
if (language == HB_LANGUAGE_INVALID)
{
if (language_count && language_tags && *language_count)
*language_count = 0;
}
else
{
const char *lang_str, *s, *limit, *private_use_subtag;
bool needs_language;
lang_str = hb_language_to_string (language);
limit = nullptr;
private_use_subtag = nullptr;
if (lang_str[0] == 'x' && lang_str[1] == '-')
{
private_use_subtag = lang_str;
} else {
for (s = lang_str + 1; *s; s++)
{
if (s[-1] == '-' && s[1] == '-')
{
if (s[0] == 'x')
{
private_use_subtag = s;
if (!limit)
limit = s - 1;
break;
} else if (!limit)
{
limit = s - 1;
}
}
}
if (!limit)
limit = s;
}
needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
if (needs_language && language_count && language_tags && *language_count)
hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
}
if (needs_script && script_count && script_tags && *script_count)
hb_ot_all_tags_from_script (script, script_count, script_tags);
}
hb_language_t
hb_ot_tag_to_language (hb_tag_t tag)
{
unsigned int i;
if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
return nullptr;
#ifndef HB_NO_LANGUAGE_LONG
{
hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
if (disambiguated_tag != HB_LANGUAGE_INVALID)
return disambiguated_tag;
}
#endif
char buf[4];
for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++)
if (ot_languages2[i].tag == tag)
{
hb_tag_to_string (ot_languages2[i].language, buf);
return hb_language_from_string (buf, 2);
}
#ifndef HB_NO_LANGUAGE_LONG
for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++)
if (ot_languages3[i].tag == tag)
{
hb_tag_to_string (ot_languages3[i].language, buf);
return hb_language_from_string (buf, 3);
}
#endif
{
char buf[20];
char *str = buf;
if (ISALPHA (tag >> 24)
&& ISALPHA ((tag >> 16) & 0xFF)
&& ISALPHA ((tag >> 8) & 0xFF)
&& (tag & 0xFF) == ' ')
{
buf[0] = TOLOWER (tag >> 24);
buf[1] = TOLOWER ((tag >> 16) & 0xFF);
buf[2] = TOLOWER ((tag >> 8) & 0xFF);
buf[3] = '-';
str += 4;
}
snprintf (str, 16, "x-hbot-%08" PRIx32, tag);
return hb_language_from_string (&*buf, -1);
}
}
void
hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
hb_tag_t language_tag,
hb_script_t *script ,
hb_language_t *language )
{
hb_script_t script_out = hb_ot_tag_to_script (script_tag);
if (script)
*script = script_out;
if (language)
{
unsigned int script_count = 1;
hb_tag_t primary_script_tag[1];
hb_ot_tags_from_script_and_language (script_out,
HB_LANGUAGE_INVALID,
&script_count,
primary_script_tag,
nullptr, nullptr);
*language = hb_ot_tag_to_language (language_tag);
if (script_count == 0 || primary_script_tag[0] != script_tag)
{
unsigned char *buf;
const char *lang_str = hb_language_to_string (*language);
size_t len = strlen (lang_str);
buf = (unsigned char *) hb_malloc (len + 16);
if (unlikely (!buf))
{
*language = nullptr;
}
else
{
int shift;
hb_memcpy (buf, lang_str, len);
if (lang_str[0] != 'x' || lang_str[1] != '-') {
buf[len++] = '-';
buf[len++] = 'x';
}
buf[len++] = '-';
buf[len++] = 'h';
buf[len++] = 'b';
buf[len++] = 's';
buf[len++] = 'c';
buf[len++] = '-';
for (shift = 28; shift >= 0; shift -= 4)
buf[len++] = TOHEX (script_tag >> shift);
*language = hb_language_from_string ((char *) buf, len);
hb_free (buf);
}
}
}
}
#ifdef MAIN
static inline void
test_langs_sorted ()
{
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages2); i++)
{
int c = ot_languages2[i].cmp (&ot_languages2[i - 1]);
if (c > 0)
{
fprintf (stderr, "ot_languages2 not sorted at index %u: %08x %d %08x\n",
i, ot_languages2[i-1].language, c, ot_languages2[i].language);
abort();
}
}
#ifndef HB_NO_LANGUAGE_LONG
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages3); i++)
{
int c = ot_languages3[i].cmp (&ot_languages3[i - 1]);
if (c > 0)
{
fprintf (stderr, "ot_languages3 not sorted at index %u: %08x %d %08x\n",
i, ot_languages3[i-1].language, c, ot_languages3[i].language);
abort();
}
}
#endif
}
int
main ()
{
test_langs_sorted ();
return 0;
}
#endif
#endif