#include <limits.h>
#include <string.h>
#include <mruby.h>
#include <mruby/khash.h>
#include <mruby/string.h>
#include <mruby/dump.h>
#include <mruby/class.h>
typedef struct symbol_name {
mrb_bool lit : 1;
uint8_t prev;
uint16_t len;
const char *name;
} symbol_name;
static void
sym_validate_len(mrb_state *mrb, size_t len)
{
if (len >= RITE_LV_NULL_MARK) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
}
}
#ifndef MRB_ENABLE_ALL_SYMBOLS
static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
static mrb_sym
sym_inline_pack(const char *name, uint16_t len)
{
const int lower_length_max = (MRB_SYMBOL_BITSIZE - 2) / 5;
const int mix_length_max = (MRB_SYMBOL_BITSIZE - 2) / 6;
char c;
const char *p;
int i;
mrb_sym sym = 0;
int lower = 1;
if (len > lower_length_max) return 0;
for (i=0; i<len; i++) {
uint32_t bits;
c = name[i];
if (c == 0) return 0;
p = strchr(pack_table, (int)c);
if (p == 0) return 0;
bits = (uint32_t)(p - pack_table)+1;
if (bits > 27) lower = 0;
if (i >= mix_length_max) break;
sym |= bits<<(i*6+2);
}
if (lower) {
sym = 0;
for (i=0; i<len; i++) {
uint32_t bits;
c = name[i];
p = strchr(pack_table, (int)c);
bits = (uint32_t)(p - pack_table)+1;
sym |= bits<<(i*5+2);
}
return sym | 3;
}
if (len > mix_length_max) return 0;
return sym | 1;
}
static const char*
sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp)
{
int bit_per_char = sym&2 ? 5 : 6;
int i;
mrb_assert(sym&1);
for (i=0; i<30/bit_per_char; i++) {
uint32_t bits = sym>>(i*bit_per_char+2) & ((1<<bit_per_char)-1);
if (bits == 0) break;
buf[i] = pack_table[bits-1];;
}
buf[i] = '\0';
if (lenp) *lenp = i;
return buf;
}
#endif
uint8_t
symhash(const char *key, size_t len)
{
uint32_t hash, i;
for(hash = i = 0; i < len; ++i) {
hash += key[i];
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash & 0xff;
}
static mrb_sym
find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash)
{
mrb_sym i;
symbol_name *sname;
#ifndef MRB_ENABLE_ALL_SYMBOLS
i = sym_inline_pack(name, len);
if (i > 0) return i;
#endif
i = mrb->symhash[hash];
if (i == 0) return 0;
do {
sname = &mrb->symtbl[i];
if (sname->len == len && memcmp(sname->name, name, len) == 0) {
return i<<1;
}
if (sname->prev == 0xff) {
i -= 0xff;
sname = &mrb->symtbl[i];
while (mrb->symtbl < sname) {
if (sname->len == len && memcmp(sname->name, name, len) == 0) {
return (mrb_sym)(sname - mrb->symtbl)<<1;
}
sname--;
}
return 0;
}
i -= sname->prev;
} while (sname->prev > 0);
return 0;
}
static mrb_sym
sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
{
mrb_sym sym;
symbol_name *sname;
uint8_t hash;
sym_validate_len(mrb, len);
hash = symhash(name, len);
sym = find_symbol(mrb, name, len, hash);
if (sym > 0) return sym;
sym = ++mrb->symidx;
if (mrb->symcapa < sym) {
if (mrb->symcapa == 0) mrb->symcapa = 100;
else mrb->symcapa = (size_t)(mrb->symcapa * 6 / 5);
mrb->symtbl = (symbol_name*)mrb_realloc(mrb, mrb->symtbl, sizeof(symbol_name)*(mrb->symcapa+1));
}
sname = &mrb->symtbl[sym];
sname->len = (uint16_t)len;
if (lit || mrb_ro_data_p(name)) {
sname->name = name;
sname->lit = TRUE;
}
else {
char *p = (char *)mrb_malloc(mrb, len+1);
memcpy(p, name, len);
p[len] = 0;
sname->name = (const char*)p;
sname->lit = FALSE;
}
if (mrb->symhash[hash]) {
mrb_sym i = sym - mrb->symhash[hash];
if (i > 0xff)
sname->prev = 0xff;
else
sname->prev = i;
}
else {
sname->prev = 0;
}
mrb->symhash[hash] = sym;
return sym<<1;
}
MRB_API mrb_sym
mrb_intern(mrb_state *mrb, const char *name, size_t len)
{
return sym_intern(mrb, name, len, FALSE);
}
MRB_API mrb_sym
mrb_intern_static(mrb_state *mrb, const char *name, size_t len)
{
return sym_intern(mrb, name, len, TRUE);
}
MRB_API mrb_sym
mrb_intern_cstr(mrb_state *mrb, const char *name)
{
return mrb_intern(mrb, name, strlen(name));
}
MRB_API mrb_sym
mrb_intern_str(mrb_state *mrb, mrb_value str)
{
return mrb_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
}
MRB_API mrb_value
mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
{
mrb_sym sym;
sym_validate_len(mrb, len);
sym = find_symbol(mrb, name, len, symhash(name, len));
if (sym > 0) return mrb_symbol_value(sym);
return mrb_nil_value();
}
MRB_API mrb_value
mrb_check_intern_cstr(mrb_state *mrb, const char *name)
{
return mrb_check_intern(mrb, name, strlen(name));
}
MRB_API mrb_value
mrb_check_intern_str(mrb_state *mrb, mrb_value str)
{
return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
}
static const char*
sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp)
{
#ifndef MRB_ENABLE_ALL_SYMBOLS
if (sym & 1) {
return sym_inline_unpack(sym, buf, lenp);
}
#endif
sym >>= 1;
if (sym == 0 || mrb->symidx < sym) {
if (lenp) *lenp = 0;
return NULL;
}
if (lenp) *lenp = mrb->symtbl[sym].len;
return mrb->symtbl[sym].name;
}
MRB_API const char*
mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
{
return sym2name_len(mrb, sym, mrb->symbuf, lenp);
}
void
mrb_free_symtbl(mrb_state *mrb)
{
mrb_sym i, lim;
for (i=1, lim=mrb->symidx+1; i<lim; i++) {
if (!mrb->symtbl[i].lit) {
mrb_free(mrb, (char*)mrb->symtbl[i].name);
}
}
mrb_free(mrb, mrb->symtbl);
}
void
mrb_init_symtbl(mrb_state *mrb)
{
}
static mrb_value
sym_to_s(mrb_state *mrb, mrb_value sym)
{
return mrb_sym2str(mrb, mrb_symbol(sym));
}
static mrb_value
sym_to_sym(mrb_state *mrb, mrb_value sym)
{
return sym;
}
#if __STDC__
# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
#else
# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
#endif
#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
static mrb_bool
is_special_global_name(const char* m)
{
switch (*m) {
case '~': case '*': case '$': case '?': case '!': case '@':
case '/': case '\\': case ';': case ',': case '.': case '=':
case ':': case '<': case '>': case '\"':
case '&': case '`': case '\'': case '+':
case '0':
++m;
break;
case '-':
++m;
if (is_identchar(*m)) m += 1;
break;
default:
if (!ISDIGIT(*m)) return FALSE;
do ++m; while (ISDIGIT(*m));
break;
}
return !*m;
}
static mrb_bool
symname_p(const char *name)
{
const char *m = name;
mrb_bool localid = FALSE;
if (!m) return FALSE;
switch (*m) {
case '\0':
return FALSE;
case '$':
if (is_special_global_name(++m)) return TRUE;
goto id;
case '@':
if (*++m == '@') ++m;
goto id;
case '<':
switch (*++m) {
case '<': ++m; break;
case '=': if (*++m == '>') ++m; break;
default: break;
}
break;
case '>':
switch (*++m) {
case '>': case '=': ++m; break;
default: break;
}
break;
case '=':
switch (*++m) {
case '~': ++m; break;
case '=': if (*++m == '=') ++m; break;
default: return FALSE;
}
break;
case '*':
if (*++m == '*') ++m;
break;
case '!':
switch (*++m) {
case '=': case '~': ++m;
}
break;
case '+': case '-':
if (*++m == '@') ++m;
break;
case '|':
if (*++m == '|') ++m;
break;
case '&':
if (*++m == '&') ++m;
break;
case '^': case '/': case '%': case '~': case '`':
++m;
break;
case '[':
if (*++m != ']') return FALSE;
if (*++m == '=') ++m;
break;
default:
localid = !ISUPPER(*m);
id:
if (*m != '_' && !ISALPHA(*m)) return FALSE;
while (is_identchar(*m)) m += 1;
if (localid) {
switch (*m) {
case '!': case '?': case '=': ++m;
default: break;
}
}
break;
}
return *m ? FALSE : TRUE;
}
static mrb_value
sym_inspect(mrb_state *mrb, mrb_value sym)
{
mrb_value str;
const char *name;
mrb_int len;
mrb_sym id = mrb_symbol(sym);
char *sp;
name = mrb_sym2name_len(mrb, id, &len);
str = mrb_str_new(mrb, 0, len+1);
sp = RSTRING_PTR(str);
RSTRING_PTR(str)[0] = ':';
memcpy(sp+1, name, len);
mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
if (!symname_p(name) || strlen(name) != (size_t)len) {
str = mrb_str_dump(mrb, str);
sp = RSTRING_PTR(str);
sp[0] = ':';
sp[1] = '"';
}
return str;
}
MRB_API mrb_value
mrb_sym2str(mrb_state *mrb, mrb_sym sym)
{
mrb_int len;
const char *name = mrb_sym2name_len(mrb, sym, &len);
if (!name) return mrb_undef_value();
if (sym&1) {
return mrb_str_new(mrb, name, len);
}
return mrb_str_new_static(mrb, name, len);
}
MRB_API const char*
mrb_sym2name(mrb_state *mrb, mrb_sym sym)
{
mrb_int len;
const char *name = mrb_sym2name_len(mrb, sym, &len);
if (!name) return NULL;
if (symname_p(name) && strlen(name) == (size_t)len) {
return name;
}
else {
mrb_value str;
if (sym&1) {
str = mrb_str_new(mrb, name, len);
}
else {
str = mrb_str_new_static(mrb, name, len);
}
str = mrb_str_dump(mrb, str);
return RSTRING_PTR(str);
}
}
#define lesser(a,b) (((a)>(b))?(b):(a))
static mrb_value
sym_cmp(mrb_state *mrb, mrb_value s1)
{
mrb_value s2;
mrb_sym sym1, sym2;
mrb_get_args(mrb, "o", &s2);
if (mrb_type(s2) != MRB_TT_SYMBOL) return mrb_nil_value();
sym1 = mrb_symbol(s1);
sym2 = mrb_symbol(s2);
if (sym1 == sym2) return mrb_fixnum_value(0);
else {
const char *p1, *p2;
int retval;
mrb_int len, len1, len2;
char buf1[8], buf2[8];
p1 = sym2name_len(mrb, sym1, buf1, &len1);
p2 = sym2name_len(mrb, sym2, buf2, &len2);
len = lesser(len1, len2);
retval = memcmp(p1, p2, len);
if (retval == 0) {
if (len1 == len2) return mrb_fixnum_value(0);
if (len1 > len2) return mrb_fixnum_value(1);
return mrb_fixnum_value(-1);
}
if (retval > 0) return mrb_fixnum_value(1);
return mrb_fixnum_value(-1);
}
}
void
mrb_init_symbol(mrb_state *mrb)
{
struct RClass *sym;
mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class);
MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL);
mrb_undef_class_method(mrb, sym, "new");
mrb_define_method(mrb, sym, "id2name", sym_to_s, MRB_ARGS_NONE());
mrb_define_method(mrb, sym, "to_s", sym_to_s, MRB_ARGS_NONE());
mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE());
mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE());
mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
}