#include "sym.h"
#include "core/platform.h"
#include "store/col.h"
#include "store/fileio.h"
#include "mem/heap.h"
#include "mem/sys.h"
#include "mem/arena.h"
#include <string.h>
#include <stdio.h>
#include <stdatomic.h>
#include <errno.h>
#include "ops/hash.h"
#define SYM_INIT_CAP 256
#define SYM_LOAD_FACTOR 0.7
typedef struct {
uint8_t nsegs;
int64_t* segs;
} sym_segs_t;
typedef struct {
uint64_t* buckets;
uint32_t bucket_cap;
ray_t** strings;
uint32_t str_count;
uint32_t str_cap;
uint64_t* dotted;
uint64_t* scanned;
sym_segs_t* segments;
uint32_t persisted_count;
ray_arena_t* arena;
} sym_table_t;
static sym_table_t g_sym;
static _Atomic(bool) g_sym_inited = false;
static _Atomic(int) g_sym_lock = 0;
static inline void sym_lock(void) {
while (atomic_exchange_explicit(&g_sym_lock, 1, memory_order_acquire)) {
#if defined(__x86_64__) || defined(__i386__)
__builtin_ia32_pause();
#endif
}
}
static inline void sym_unlock(void) {
atomic_store_explicit(&g_sym_lock, 0, memory_order_release);
}
static ray_t* sym_str_arena(ray_arena_t* arena, const char* s, size_t len) {
if (len < 7) {
ray_t* v = ray_arena_alloc(arena, 0);
if (!v) return NULL;
v->type = -RAY_STR;
v->slen = (uint8_t)len;
if (len > 0) memcpy(v->sdata, s, len);
v->sdata[len] = '\0';
return v;
}
size_t data_size = len + 1;
size_t chars_block = ((32 + data_size) + 31) & ~(size_t)31;
ray_t* chars = ray_arena_alloc(arena, chars_block + 32 - 32);
if (!chars) return NULL;
chars->type = RAY_U8;
chars->len = (int64_t)len;
memcpy(ray_data(chars), s, len);
((char*)ray_data(chars))[len] = '\0';
ray_t* v = (ray_t*)((char*)chars + chars_block);
memset(v, 0, 32);
v->attrs = RAY_ATTR_ARENA;
ray_atomic_store(&v->rc, 1);
v->type = -RAY_STR;
v->obj = chars;
return v;
}
ray_err_t ray_sym_init(void) {
bool expected = false;
if (!atomic_compare_exchange_strong_explicit(&g_sym_inited, &expected, true,
memory_order_acq_rel, memory_order_acquire))
return RAY_OK;
g_sym.bucket_cap = SYM_INIT_CAP;
g_sym.buckets = (uint64_t*)ray_sys_alloc(g_sym.bucket_cap * sizeof(uint64_t));
if (!g_sym.buckets) {
atomic_store_explicit(&g_sym_inited, false, memory_order_release);
return RAY_ERR_OOM;
}
g_sym.str_cap = SYM_INIT_CAP;
g_sym.str_count = 0;
g_sym.strings = (ray_t**)ray_sys_alloc(g_sym.str_cap * sizeof(ray_t*));
if (!g_sym.strings) {
ray_sys_free(g_sym.buckets);
g_sym.buckets = NULL;
atomic_store_explicit(&g_sym_inited, false, memory_order_release);
return RAY_ERR_OOM;
}
g_sym.arena = ray_arena_new(1024 * 1024);
if (!g_sym.arena) {
ray_sys_free(g_sym.strings);
ray_sys_free(g_sym.buckets);
g_sym.strings = NULL;
g_sym.buckets = NULL;
atomic_store_explicit(&g_sym_inited, false, memory_order_release);
return RAY_ERR_OOM;
}
uint32_t bm_words = (g_sym.str_cap + 63) / 64;
g_sym.dotted = (uint64_t*)ray_sys_alloc((size_t)bm_words * sizeof(uint64_t));
g_sym.scanned = (uint64_t*)ray_sys_alloc((size_t)bm_words * sizeof(uint64_t));
g_sym.segments = (sym_segs_t*)ray_sys_alloc((size_t)g_sym.str_cap * sizeof(sym_segs_t));
if (!g_sym.dotted || !g_sym.scanned || !g_sym.segments) {
if (g_sym.dotted) ray_sys_free(g_sym.dotted);
if (g_sym.scanned) ray_sys_free(g_sym.scanned);
if (g_sym.segments) ray_sys_free(g_sym.segments);
g_sym.dotted = NULL;
g_sym.scanned = NULL;
g_sym.segments = NULL;
ray_arena_destroy(g_sym.arena);
g_sym.arena = NULL;
ray_sys_free(g_sym.strings);
ray_sys_free(g_sym.buckets);
g_sym.strings = NULL;
g_sym.buckets = NULL;
atomic_store_explicit(&g_sym_inited, false, memory_order_release);
return RAY_ERR_OOM;
}
return RAY_OK;
}
void ray_sym_destroy(void) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return;
if (g_sym.arena) {
ray_arena_destroy(g_sym.arena);
g_sym.arena = NULL;
}
if (g_sym.segments) ray_sys_free(g_sym.segments);
if (g_sym.scanned) ray_sys_free(g_sym.scanned);
if (g_sym.dotted) ray_sys_free(g_sym.dotted);
ray_sys_free(g_sym.strings);
ray_sys_free(g_sym.buckets);
memset(&g_sym, 0, sizeof(g_sym));
atomic_store_explicit(&g_sym_inited, false, memory_order_release);
}
static void ht_insert(uint64_t* buckets, uint32_t cap, uint32_t hash, uint32_t id) {
uint32_t mask = cap - 1;
uint32_t slot = hash & mask;
uint64_t entry = ((uint64_t)hash << 32) | ((uint64_t)(id + 1));
for (;;) {
if (buckets[slot] == 0) {
buckets[slot] = entry;
return;
}
slot = (slot + 1) & mask;
}
}
static bool ht_grow_to(uint32_t new_cap) {
uint64_t* new_buckets = (uint64_t*)ray_sys_alloc((size_t)new_cap * sizeof(uint64_t));
if (!new_buckets) return false;
for (uint32_t i = 0; i < g_sym.bucket_cap; i++) {
uint64_t e = g_sym.buckets[i];
if (e == 0) continue;
uint32_t h = (uint32_t)(e >> 32);
uint32_t id = (uint32_t)(e & 0xFFFFFFFF) - 1;
ht_insert(new_buckets, new_cap, h, id);
}
ray_sys_free(g_sym.buckets);
g_sym.buckets = new_buckets;
g_sym.bucket_cap = new_cap;
return true;
}
static bool ht_grow(void) {
if (g_sym.bucket_cap >= (UINT32_MAX / 2 + 1)) return false;
return ht_grow_to(g_sym.bucket_cap * 2);
}
static bool sym_grow_str_cap(uint32_t new_cap) {
uint32_t old_cap = g_sym.str_cap;
if (new_cap <= old_cap) return true;
ray_t** new_strings = (ray_t**)ray_sys_realloc(g_sym.strings,
(size_t)new_cap * sizeof(ray_t*));
if (!new_strings) return false;
g_sym.strings = new_strings;
uint32_t old_bm_words = (old_cap + 63) / 64;
uint32_t new_bm_words = (new_cap + 63) / 64;
if (new_bm_words > old_bm_words) {
uint64_t* new_dotted = (uint64_t*)ray_sys_realloc(g_sym.dotted,
(size_t)new_bm_words * sizeof(uint64_t));
if (!new_dotted) return false;
memset(new_dotted + old_bm_words, 0,
(size_t)(new_bm_words - old_bm_words) * sizeof(uint64_t));
g_sym.dotted = new_dotted;
uint64_t* new_scanned = (uint64_t*)ray_sys_realloc(g_sym.scanned,
(size_t)new_bm_words * sizeof(uint64_t));
if (!new_scanned) return false;
memset(new_scanned + old_bm_words, 0,
(size_t)(new_bm_words - old_bm_words) * sizeof(uint64_t));
g_sym.scanned = new_scanned;
}
sym_segs_t* new_segments = (sym_segs_t*)ray_sys_realloc(g_sym.segments,
(size_t)new_cap * sizeof(sym_segs_t));
if (!new_segments) return false;
memset(new_segments + old_cap, 0,
(size_t)(new_cap - old_cap) * sizeof(sym_segs_t));
g_sym.segments = new_segments;
g_sym.str_cap = new_cap;
return true;
}
static int64_t sym_intern_nolock(uint32_t hash, const char* str, size_t len);
static int64_t sym_probe(uint32_t hash, const char* str, size_t len);
static int64_t sym_commit_new(uint32_t hash, const char* str, size_t len);
static bool sym_reserve_capacity(uint32_t new_sym_count, size_t arena_bytes);
static bool sym_cache_segments(uint32_t new_id, const char* str, size_t len) {
uint64_t bit = (uint64_t)1 << (new_id & 63);
uint32_t word = new_id >> 6;
if (g_sym.scanned[word] & bit) return true;
const char* first_dot = (const char*)memchr(str, '.', len);
if (!first_dot) {
g_sym.scanned[word] |= bit;
return true;
}
if (str[len - 1] == '.') {
g_sym.scanned[word] |= bit;
return true;
}
bool leading_dot = (str[0] == '.');
if (leading_dot) {
const char* second = (const char*)memchr(str + 1, '.', len - 1);
if (!second) { g_sym.scanned[word] |= bit; return true; }
}
size_t sep_dots = 0;
for (size_t i = (leading_dot ? 1 : 0); i < len; i++)
if (str[i] == '.') sep_dots++;
if (sep_dots + 1 > 255) {
g_sym.scanned[word] |= bit;
return true;
}
uint8_t nsegs = (uint8_t)(sep_dots + 1);
struct { const char* p; size_t len; uint32_t hash; int64_t id; } descs[256];
uint32_t new_seg_count = 0;
size_t new_seg_bytes = 0;
{
const char* p = str;
size_t remaining = len;
uint8_t i = 0;
while (remaining && i < nsegs) {
size_t skip = (i == 0 && leading_dot) ? 1 : 0;
const char* dot = remaining > skip
? (const char*)memchr(p + skip, '.', remaining - skip)
: NULL;
size_t seg_len = dot ? (size_t)(dot - p) : remaining;
if (seg_len == 0) { g_sym.scanned[word] |= bit; return true; }
uint32_t h = (uint32_t)ray_hash_bytes(p, seg_len);
descs[i].p = p;
descs[i].len = seg_len;
descs[i].hash = h;
descs[i].id = sym_probe(h, p, seg_len);
if (descs[i].id < 0) {
new_seg_count++;
new_seg_bytes += ray_sym_bytes_upper(seg_len);
}
i++;
if (!dot) break;
remaining -= (seg_len + 1);
p = dot + 1;
}
}
size_t segs_payload = (size_t)nsegs * sizeof(int64_t);
size_t arena_bytes = new_seg_bytes +
(((size_t)32 + segs_payload + 31) & ~(size_t)31);
if (!sym_reserve_capacity(new_seg_count, arena_bytes)) return false;
for (uint8_t i = 0; i < nsegs; i++) {
if (descs[i].id < 0) {
int64_t sid = sym_commit_new(descs[i].hash, descs[i].p, descs[i].len);
if (sid < 0) return false;
descs[i].id = sid;
g_sym.scanned[sid >> 6] |= ((uint64_t)1 << (sid & 63));
}
}
int64_t* segs = (int64_t*)ray_arena_alloc(g_sym.arena, segs_payload);
if (!segs) return false;
for (uint8_t i = 0; i < nsegs; i++) segs[i] = descs[i].id;
g_sym.segments[new_id].nsegs = nsegs;
g_sym.segments[new_id].segs = segs;
g_sym.dotted[word] |= bit;
g_sym.scanned[word] |= bit;
return true;
}
static int64_t sym_probe(uint32_t hash, const char* str, size_t len) {
uint32_t mask = g_sym.bucket_cap - 1;
uint32_t slot = hash & mask;
for (;;) {
uint64_t e = g_sym.buckets[slot];
if (e == 0) return -1;
uint32_t e_hash = (uint32_t)(e >> 32);
if (e_hash == hash) {
uint32_t e_id = (uint32_t)(e & 0xFFFFFFFF) - 1;
ray_t* existing = g_sym.strings[e_id];
if (ray_str_len(existing) == len &&
memcmp(ray_str_ptr(existing), str, len) == 0) {
return (int64_t)e_id;
}
}
slot = (slot + 1) & mask;
}
}
static int64_t sym_commit_new(uint32_t hash, const char* str, size_t len) {
if ((uint64_t)g_sym.str_count * 100 >= (uint64_t)g_sym.bucket_cap * 70) {
if (!ht_grow()) {
if ((uint64_t)g_sym.str_count * 100 >= (uint64_t)g_sym.bucket_cap * 95) {
return -1;
}
}
}
uint32_t new_id = g_sym.str_count;
if (new_id >= g_sym.str_cap) {
if (g_sym.str_cap >= UINT32_MAX / 2) return -1;
if (!sym_grow_str_cap(g_sym.str_cap * 2)) return -1;
}
ray_t* s = sym_str_arena(g_sym.arena, str, len);
if (!s) return -1;
g_sym.strings[new_id] = s;
g_sym.str_count++;
ht_insert(g_sym.buckets, g_sym.bucket_cap, hash, new_id);
return (int64_t)new_id;
}
static int64_t sym_intern_nolock_noseg(uint32_t hash, const char* str, size_t len) {
int64_t existing = sym_probe(hash, str, len);
if (existing >= 0) return existing;
return sym_commit_new(hash, str, len);
}
static bool sym_reserve_capacity(uint32_t new_sym_count, size_t arena_bytes) {
uint64_t new_count = (uint64_t)g_sym.str_count + new_sym_count;
uint32_t target = g_sym.bucket_cap;
while (new_count * 100 >= (uint64_t)target * 70) {
if (target >= (UINT32_MAX / 2 + 1)) return false;
target *= 2;
}
if (target > g_sym.bucket_cap) {
if (!ht_grow_to(target)) return false;
}
if (new_count > g_sym.str_cap) {
uint32_t str_target = g_sym.str_cap;
while (str_target < new_count) {
if (str_target >= UINT32_MAX / 2) return false;
str_target *= 2;
}
if (!sym_grow_str_cap(str_target)) return false;
}
if (arena_bytes && !ray_arena_reserve(g_sym.arena, arena_bytes)) return false;
return true;
}
static int64_t sym_intern_nolock(uint32_t hash, const char* str, size_t len) {
int64_t existing = sym_probe(hash, str, len);
if (existing >= 0) {
(void)sym_cache_segments((uint32_t)existing, str, len);
return existing;
}
struct { const char* p; size_t len; uint32_t hash; int64_t id; } descs[256];
uint8_t nsegs = 0;
uint32_t new_seg_count = 0;
size_t new_seg_bytes = 0;
bool is_dotted = false;
const char* first_dot = (const char*)memchr(str, '.', len);
if (first_dot) {
bool valid = str[len - 1] != '.';
bool leading_dot = (str[0] == '.');
if (valid && leading_dot) {
const char* second = (const char*)memchr(str + 1, '.', len - 1);
if (!second) valid = false;
}
size_t sep_dots = 0;
if (valid) {
for (size_t i = (leading_dot ? 1 : 0); i < len; i++)
if (str[i] == '.') sep_dots++;
if (sep_dots + 1 > 255) valid = false;
}
if (valid) {
nsegs = (uint8_t)(sep_dots + 1);
const char* p = str;
size_t remaining = len;
uint8_t i = 0;
while (remaining && i < nsegs) {
size_t skip = (i == 0 && leading_dot) ? 1 : 0;
const char* dot = remaining > skip
? (const char*)memchr(p + skip, '.', remaining - skip)
: NULL;
size_t seg_len = dot ? (size_t)(dot - p) : remaining;
if (seg_len == 0) { valid = false; break; }
uint32_t seg_hash = (uint32_t)ray_hash_bytes(p, seg_len);
descs[i].p = p;
descs[i].len = seg_len;
descs[i].hash = seg_hash;
descs[i].id = sym_probe(seg_hash, p, seg_len);
if (descs[i].id < 0) {
new_seg_count++;
new_seg_bytes += ray_sym_bytes_upper(seg_len);
}
i++;
if (!dot) break;
remaining -= (seg_len + 1);
p = dot + 1;
}
if (valid) is_dotted = true;
}
}
size_t arena_bytes = ray_sym_bytes_upper(len);
if (is_dotted) {
arena_bytes += new_seg_bytes;
size_t segs_payload = (size_t)nsegs * sizeof(int64_t);
arena_bytes += ((size_t)32 + segs_payload + 31) & ~(size_t)31;
}
if (!sym_reserve_capacity(1 + new_seg_count, arena_bytes)) return -1;
if (is_dotted) {
for (uint8_t i = 0; i < nsegs; i++) {
if (descs[i].id < 0) {
int64_t sid = sym_commit_new(descs[i].hash, descs[i].p, descs[i].len);
if (sid < 0) return -1;
descs[i].id = sid;
g_sym.scanned[sid >> 6] |= ((uint64_t)1 << (sid & 63));
}
}
}
int64_t main_id = sym_commit_new(hash, str, len);
if (main_id < 0) return -1;
if (is_dotted) {
int64_t* segs = (int64_t*)ray_arena_alloc(g_sym.arena,
(size_t)nsegs * sizeof(int64_t));
if (!segs) return main_id;
for (uint8_t i = 0; i < nsegs; i++) segs[i] = descs[i].id;
g_sym.segments[main_id].nsegs = nsegs;
g_sym.segments[main_id].segs = segs;
g_sym.dotted[main_id >> 6] |= ((uint64_t)1 << (main_id & 63));
}
g_sym.scanned[main_id >> 6] |= ((uint64_t)1 << (main_id & 63));
return main_id;
}
int64_t ray_sym_intern(const char* str, size_t len) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1;
uint32_t hash = (uint32_t)ray_hash_bytes(str, len);
sym_lock();
int64_t id = sym_intern_nolock(hash, str, len);
sym_unlock();
return id;
}
int64_t ray_sym_intern_prehashed(uint32_t hash, const char* str, size_t len) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1;
return sym_intern_nolock(hash, str, len);
}
int64_t ray_sym_intern_no_split(const char* str, size_t len) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1;
uint32_t hash = (uint32_t)ray_hash_bytes(str, len);
sym_lock();
int64_t id = sym_intern_nolock_noseg(hash, str, len);
sym_unlock();
return id;
}
ray_err_t ray_sym_rebuild_segments(void) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return RAY_ERR_IO;
sym_lock();
uint32_t count = g_sym.str_count;
for (uint32_t i = 0; i < count; i++) {
if (g_sym.scanned[i >> 6] & ((uint64_t)1 << (i & 63))) continue;
ray_t* s = g_sym.strings[i];
if (!s) continue;
if (!sym_cache_segments(i, ray_str_ptr(s), ray_str_len(s))) {
sym_unlock();
return RAY_ERR_OOM;
}
}
sym_unlock();
return RAY_OK;
}
bool ray_sym_is_dotted(int64_t sym_id) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return false;
if (sym_id < 0 || (uint32_t)sym_id >= g_sym.str_count) return false;
uint64_t word = g_sym.dotted[(uint32_t)sym_id >> 6];
return (word >> ((uint32_t)sym_id & 63)) & 1;
}
int ray_sym_segs(int64_t sym_id, const int64_t** out_segs) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return 0;
if (sym_id < 0 || (uint32_t)sym_id >= g_sym.str_count) return 0;
sym_segs_t s = g_sym.segments[sym_id];
if (s.nsegs == 0 || !s.segs) return 0;
if (out_segs) *out_segs = s.segs;
return (int)s.nsegs;
}
int64_t ray_sym_find(const char* str, size_t len) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1;
sym_lock();
uint32_t hash = (uint32_t)ray_hash_bytes(str, len);
uint32_t mask = g_sym.bucket_cap - 1;
uint32_t slot = hash & mask;
for (;;) {
uint64_t e = g_sym.buckets[slot];
if (e == 0) { sym_unlock(); return -1; }
uint32_t e_hash = (uint32_t)(e >> 32);
if (e_hash == hash) {
uint32_t e_id = (uint32_t)(e & 0xFFFFFFFF) - 1;
ray_t* existing = g_sym.strings[e_id];
if (ray_str_len(existing) == len &&
memcmp(ray_str_ptr(existing), str, len) == 0) {
sym_unlock();
return (int64_t)e_id;
}
}
slot = (slot + 1) & mask;
}
}
ray_t* ray_sym_str(int64_t id) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return NULL;
sym_lock();
if (id < 0 || (uint32_t)id >= g_sym.str_count) { sym_unlock(); return NULL; }
ray_t* s = g_sym.strings[id];
sym_unlock();
return s;
}
uint32_t ray_sym_count(void) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return 0;
sym_lock();
uint32_t count = g_sym.str_count;
sym_unlock();
return count;
}
bool ray_sym_ensure_cap(uint32_t needed) {
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return false;
sym_lock();
while (g_sym.str_cap < needed) {
if (g_sym.str_cap >= UINT32_MAX / 2) { sym_unlock(); return false; }
uint32_t new_str_cap = g_sym.str_cap * 2;
if (new_str_cap < needed) {
new_str_cap = needed;
new_str_cap--;
new_str_cap |= new_str_cap >> 1;
new_str_cap |= new_str_cap >> 2;
new_str_cap |= new_str_cap >> 4;
new_str_cap |= new_str_cap >> 8;
new_str_cap |= new_str_cap >> 16;
new_str_cap++;
if (new_str_cap == 0) { sym_unlock(); return false; }
}
if (!sym_grow_str_cap(new_str_cap)) { sym_unlock(); return false; }
}
double raw_buckets = (double)needed / SYM_LOAD_FACTOR + 1.0;
if (raw_buckets > (double)UINT32_MAX) { sym_unlock(); return false; }
uint32_t needed_buckets = (uint32_t)raw_buckets;
needed_buckets--;
needed_buckets |= needed_buckets >> 1;
needed_buckets |= needed_buckets >> 2;
needed_buckets |= needed_buckets >> 4;
needed_buckets |= needed_buckets >> 8;
needed_buckets |= needed_buckets >> 16;
needed_buckets++;
if (needed_buckets > g_sym.bucket_cap) {
if (!ht_grow_to(needed_buckets)) { sym_unlock(); return false; }
}
sym_unlock();
return true;
}
ray_err_t ray_sym_save(const char* path) {
if (!path) return RAY_ERR_IO;
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return RAY_ERR_IO;
sym_lock();
if (g_sym.persisted_count == g_sym.str_count) {
sym_unlock();
return RAY_OK;
}
sym_unlock();
char lock_path[1024];
char tmp_path[1024];
if (snprintf(lock_path, sizeof(lock_path), "%s.lk", path) >= (int)sizeof(lock_path))
return RAY_ERR_IO;
if (snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path) >= (int)sizeof(tmp_path))
return RAY_ERR_IO;
ray_fd_t lock_fd = ray_file_open(lock_path, RAY_OPEN_READ | RAY_OPEN_WRITE | RAY_OPEN_CREATE);
if (lock_fd == RAY_FD_INVALID) return RAY_ERR_IO;
ray_err_t err = ray_file_lock_ex(lock_fd);
if (err != RAY_OK) { ray_file_close(lock_fd); return err; }
{
ray_t* existing = ray_col_load(path);
if (existing && !RAY_IS_ERR(existing)) {
if (existing->type != RAY_LIST) {
ray_release(existing);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
ray_t** slots = (ray_t**)ray_data(existing);
for (int64_t i = 0; i < existing->len; i++) {
ray_t* s = slots[i];
if (!s || RAY_IS_ERR(s) || s->type != -RAY_STR) {
ray_release(existing);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
int64_t id = ray_sym_intern_no_split(ray_str_ptr(s), ray_str_len(s));
if (id < 0) {
ray_release(existing);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_OOM;
}
if (id != i) {
ray_release(existing);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
}
ray_release(existing);
ray_err_t rebuild_err = ray_sym_rebuild_segments();
if (rebuild_err != RAY_OK) {
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return rebuild_err;
}
} else {
ray_fd_t probe_fd = ray_file_open(path, RAY_OPEN_READ);
if (probe_fd != RAY_FD_INVALID) {
ray_file_close(probe_fd);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_IS_ERR(existing) ? ray_err_from_obj(existing) : RAY_ERR_IO;
}
if (errno != ENOENT) {
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_IO;
}
}
}
sym_lock();
uint32_t count = g_sym.str_count;
size_t snap_sz = count * sizeof(ray_t*);
ray_t* snap_block = ray_alloc(snap_sz);
if (!snap_block || RAY_IS_ERR(snap_block)) {
sym_unlock();
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_OOM;
}
ray_t** snap = (ray_t**)ray_data(snap_block);
memcpy(snap, g_sym.strings, snap_sz);
sym_unlock();
ray_t* list = ray_list_new((int64_t)count);
if (!list || RAY_IS_ERR(list)) {
ray_free(snap_block);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_OOM;
}
for (uint32_t i = 0; i < count; i++) {
list = ray_list_append(list, snap[i]);
if (!list || RAY_IS_ERR(list)) {
ray_free(snap_block);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_OOM;
}
}
ray_free(snap_block);
err = ray_col_save(list, tmp_path);
ray_release(list);
if (err != RAY_OK) {
remove(tmp_path);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return err;
}
ray_fd_t tmp_fd = ray_file_open(tmp_path, RAY_OPEN_READ | RAY_OPEN_WRITE);
if (tmp_fd == RAY_FD_INVALID) {
remove(tmp_path);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_IO;
}
err = ray_file_sync(tmp_fd);
ray_file_close(tmp_fd);
if (err != RAY_OK) {
remove(tmp_path);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return err;
}
err = ray_file_rename(tmp_path, path);
if (err != RAY_OK) {
remove(tmp_path);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return err;
}
err = ray_file_sync_dir(path);
if (err != RAY_OK) {
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return err;
}
sym_lock();
g_sym.persisted_count = count;
sym_unlock();
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_OK;
}
ray_err_t ray_sym_load(const char* path) {
if (!path) return RAY_ERR_IO;
if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return RAY_ERR_IO;
char lock_path[1024];
if (snprintf(lock_path, sizeof(lock_path), "%s.lk", path) >= (int)sizeof(lock_path))
return RAY_ERR_IO;
ray_fd_t lock_fd = ray_file_open(lock_path, RAY_OPEN_READ);
if (lock_fd == RAY_FD_INVALID) {
int saved_errno = errno;
lock_fd = ray_file_open(lock_path, RAY_OPEN_READ | RAY_OPEN_WRITE | RAY_OPEN_CREATE);
if (lock_fd == RAY_FD_INVALID) {
if (saved_errno != EROFS && errno != EROFS)
return RAY_ERR_IO;
}
}
if (lock_fd != RAY_FD_INVALID) {
ray_err_t err = ray_file_lock_sh(lock_fd);
if (err != RAY_OK) { ray_file_close(lock_fd); return err; }
}
ray_t* list = ray_col_load(path);
if (!list || RAY_IS_ERR(list)) {
ray_err_t code = RAY_IS_ERR(list) ? ray_err_from_obj(list) : RAY_ERR_IO;
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return code;
}
if (list->type != RAY_LIST || list->len > UINT32_MAX) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
sym_lock();
uint32_t already = g_sym.persisted_count;
sym_unlock();
ray_t** slots = (ray_t**)ray_data(list);
if (already > 0 && list->len < (int64_t)already) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
for (int64_t i = 0; i < (int64_t)already && i < list->len; i++) {
ray_t* s = slots[i];
if (!s || RAY_IS_ERR(s) || s->type != -RAY_STR) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
ray_t* mem_s = ray_sym_str(i);
if (!mem_s || ray_str_len(mem_s) != ray_str_len(s) ||
memcmp(ray_str_ptr(mem_s), ray_str_ptr(s), ray_str_len(s)) != 0) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
}
for (int64_t i = (int64_t)already; i < list->len; i++) {
ray_t* s = slots[i];
if (!s || RAY_IS_ERR(s) || s->type != -RAY_STR) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
int64_t id = ray_sym_intern_no_split(ray_str_ptr(s), ray_str_len(s));
if (id < 0) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_OOM;
}
if (id != i) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_ERR_CORRUPT;
}
}
ray_err_t rebuild_err = ray_sym_rebuild_segments();
if (rebuild_err != RAY_OK) {
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return rebuild_err;
}
sym_lock();
g_sym.persisted_count = (uint32_t)list->len;
sym_unlock();
ray_release(list);
ray_file_unlock(lock_fd);
ray_file_close(lock_fd);
return RAY_OK;
}