libsql-ffi 0.9.29

Native bindings to libSQL
Documentation
#ifndef UTF8_GROUPS_H
#define UTF8_GROUPS_H

/* The tables below are extracted from the RE2 library */
#include <stdint.h>
#include "rune.h"

typedef struct {
    uint16_t lo;
    uint16_t hi;
} URange16;

typedef struct {
    const URange16* r16;
    int nr16;
} UGroup;

static const URange16 Cc_range16[] = {
    // Control
    {0, 31},
    {127, 159},
};

static const URange16 Lt_range16[] = {
    // Title case
    {453, 453},   {456, 456},   {459, 459},   {498, 498},   {8072, 8079},
    {8088, 8095}, {8104, 8111}, {8124, 8124}, {8140, 8140}, {8188, 8188},
};

static const URange16 Nd_range16[] = {
    // Decimal number
    {48, 57},       {1632, 1641},   {1776, 1785},   {1984, 1993},   {2406, 2415},   {2534, 2543},
    {2662, 2671},   {2790, 2799},   {2918, 2927},   {3046, 3055},   {3174, 3183},   {3302, 3311},
    {3430, 3439},   {3558, 3567},   {3664, 3673},   {3792, 3801},   {3872, 3881},   {4160, 4169},
    {4240, 4249},   {6112, 6121},   {6160, 6169},   {6470, 6479},   {6608, 6617},   {6784, 6793},
    {6800, 6809},   {6992, 7001},   {7088, 7097},   {7232, 7241},   {7248, 7257},   {42528, 42537},
    {43216, 43225}, {43264, 43273}, {43472, 43481}, {43504, 43513}, {43600, 43609}, {44016, 44025},
    {65296, 65305},
};

static const URange16 Nl_range16[] = {
    // Number letter
    {5870, 5872},   {8544, 8578},   {8581, 8584},   {12295, 12295},
    {12321, 12329}, {12344, 12346}, {42726, 42735},
};

static const URange16 Pc_range16[] = {
    // Connector punctuation
    {95, 95}, {8255, 8256}, {8276, 8276}, {65075, 65076}, {65101, 65103}, {65343, 65343},
};

static const URange16 Pd_range16[] = {
    // Dash punctuation
    {45, 45},       {1418, 1418},   {1470, 1470},   {5120, 5120},   {6150, 6150},   {8208, 8213},
    {11799, 11799}, {11802, 11802}, {11834, 11835}, {11840, 11840}, {11869, 11869}, {12316, 12316},
    {12336, 12336}, {12448, 12448}, {65073, 65074}, {65112, 65112}, {65123, 65123}, {65293, 65293},
};

static const URange16 Pf_range16[] = {
    // Final punctuation
    {187, 187},     {8217, 8217},   {8221, 8221},   {8250, 8250},   {11779, 11779},
    {11781, 11781}, {11786, 11786}, {11789, 11789}, {11805, 11805}, {11809, 11809},
};

static const URange16 Pi_range16[] = {
    // Initial punctuation
    {171, 171},     {8216, 8216},   {8219, 8220},   {8223, 8223},   {8249, 8249},   {11778, 11778},
    {11780, 11780}, {11785, 11785}, {11788, 11788}, {11804, 11804}, {11808, 11808},
};

static const URange16 Sc_range16[] = {
    // Currency symbol
    {36, 36},       {162, 165},     {1423, 1423},   {1547, 1547},   {2046, 2047},   {2546, 2547},
    {2555, 2555},   {2801, 2801},   {3065, 3065},   {3647, 3647},   {6107, 6107},   {8352, 8384},
    {43064, 43064}, {65020, 65020}, {65129, 65129}, {65284, 65284}, {65504, 65505}, {65509, 65510},
};

static const URange16 Zl_range16[] = {
    // Line separator
    {8232, 8232},
};

static const URange16 Zp_range16[] = {
    // Paragraph separator
    {8233, 8233},
};

static const URange16 Zs_range16[] = {
    // Space separator
    {32, 32}, {160, 160}, {5760, 5760}, {8192, 8202}, {8239, 8239}, {8287, 8287}, {12288, 12288},
};

static const URange16 Arabic_range16[] = {
    {1536, 1540},   {1542, 1547},   {1549, 1562},   {1564, 1566},   {1568, 1599},   {1601, 1610},
    {1622, 1647},   {1649, 1756},   {1758, 1791},   {1872, 1919},   {2160, 2190},   {2192, 2193},
    {2200, 2273},   {2275, 2303},   {64336, 64450}, {64467, 64829}, {64832, 64911}, {64914, 64967},
    {64975, 64975}, {65008, 65023}, {65136, 65140}, {65142, 65276},
};

static const URange16 Cyrillic_range16[] = {
    {1024, 1156}, {1159, 1327},   {7296, 7304},   {7467, 7467},
    {7544, 7544}, {11744, 11775}, {42560, 42655}, {65070, 65071},
};

static const URange16 Devanagari_range16[] = {
    {2304, 2384},
    {2389, 2403},
    {2406, 2431},
    {43232, 43263},
};

static const URange16 Greek_range16[] = {
    {880, 883},   {885, 887},   {890, 893},     {895, 895},   {900, 900},   {902, 902},
    {904, 906},   {908, 908},   {910, 929},     {931, 993},   {1008, 1023}, {7462, 7466},
    {7517, 7521}, {7526, 7530}, {7615, 7615},   {7936, 7957}, {7960, 7965}, {7968, 8005},
    {8008, 8013}, {8016, 8023}, {8025, 8025},   {8027, 8027}, {8029, 8029}, {8031, 8061},
    {8064, 8116}, {8118, 8132}, {8134, 8147},   {8150, 8155}, {8157, 8175}, {8178, 8180},
    {8182, 8190}, {8486, 8486}, {43877, 43877},
};

static const URange16 Han_range16[] = {
    {11904, 11929}, {11931, 12019}, {12032, 12245}, {12293, 12293}, {12295, 12295}, {12321, 12329},
    {12344, 12347}, {13312, 19903}, {19968, 40959}, {63744, 64109}, {64112, 64217},
};

static const URange16 Latin_range16[] = {
    {65, 90},       {97, 122},      {170, 170},     {186, 186},     {192, 214},     {216, 246},
    {248, 696},     {736, 740},     {7424, 7461},   {7468, 7516},   {7522, 7525},   {7531, 7543},
    {7545, 7614},   {7680, 7935},   {8305, 8305},   {8319, 8319},   {8336, 8348},   {8490, 8491},
    {8498, 8498},   {8526, 8526},   {8544, 8584},   {11360, 11391}, {42786, 42887}, {42891, 42954},
    {42960, 42961}, {42963, 42963}, {42965, 42969}, {42994, 43007}, {43824, 43866}, {43868, 43876},
    {43878, 43881}, {64256, 64262}, {65313, 65338}, {65345, 65370},
};

#define UNI_ENTRY(Code) {Code##_range16, sizeof(Code##_range16) / sizeof(URange16)}
#define _e_arg(k, v) [k] = v

static const UGroup _utf8_unicode_groups[U8G_SIZE] = {
    [U8G_Cc] = UNI_ENTRY(Cc),
    [U8G_Lt] = UNI_ENTRY(Lt),
    [U8G_Nd] = UNI_ENTRY(Nd),
    [U8G_Nl] = UNI_ENTRY(Nl),
    [U8G_Pc] = UNI_ENTRY(Pc),
    [U8G_Pd] = UNI_ENTRY(Pd),
    [U8G_Pf] = UNI_ENTRY(Pf),
    [U8G_Pi] = UNI_ENTRY(Pi),
    [U8G_Sc] = UNI_ENTRY(Sc),
    [U8G_Zl] = UNI_ENTRY(Zl),
    [U8G_Zp] = UNI_ENTRY(Zp),
    [U8G_Zs] = UNI_ENTRY(Zs),
    [U8G_Arabic] = UNI_ENTRY(Arabic),
    [U8G_Cyrillic] = UNI_ENTRY(Cyrillic),
    [U8G_Devanagari] = UNI_ENTRY(Devanagari),
    [U8G_Greek] = UNI_ENTRY(Greek),
    [U8G_Han] = UNI_ENTRY(Han),
    [U8G_Latin] = UNI_ENTRY(Latin),
};

#endif // UTF8_GROUPS_H