#ifndef MOLECULE2_READER_H
#define MOLECULE2_READER_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef mol2_printf
#define mol2_printf printf
#endif
#ifndef MOL2_EXIT
#define MOL2_EXIT exit
#endif
#ifndef MOL2_PANIC
#define MOL2_PANIC(err) \
do { \
mol2_printf("Error at %s: %d\n", __FILE__, __LINE__); \
MOL2_EXIT(err); \
} while (0)
#endif
#ifndef ASSERT
#define ASSERT(s) ((void)0)
#endif
#ifndef MIN
#define MIN(a, b) ((a > b) ? (b) : (a))
#endif
#ifndef MAX
#define MAX(a, b) ((a > b) ? (a) : (b))
#endif
typedef uint32_t mol2_num_t; typedef uint8_t mol2_errno; #define MOL2_NUM_T_SIZE 4
typedef uint64_t Uint64; typedef int64_t Int64; typedef uint32_t Uint32; typedef int32_t Int32; typedef uint16_t Uint16; typedef int16_t Int16; typedef uint8_t Uint8; typedef int8_t Int8;
#define MOL2_OK 0x00
#define MOL2_ERR 0xff
#define MOL2_ERR_TOTAL_SIZE 0x01
#define MOL2_ERR_HEADER 0x02
#define MOL2_ERR_OFFSET 0x03
#define MOL2_ERR_UNKNOWN_ITEM 0x04
#define MOL2_ERR_INDEX_OUT_OF_BOUNDS 0x05
#define MOL2_ERR_FIELD_COUNT 0x06
#define MOL2_ERR_DATA 0x07
#define MOL2_ERR_OVERFLOW 0x08
#define SWAP(a, b, t) \
{ \
(t) = (a); \
(a) = (b); \
(b) = (t); \
}
#define is_le2() \
((union { \
uint16_t i; \
unsigned char c; \
}){.i = 1} \
.c)
void change_endian(uint8_t *ptr, int size);
typedef uint32_t (*mol2_source_t)(uintptr_t arg[], uint8_t *ptr, uint32_t len,
uint32_t offset);
#define MAX_CACHE_SIZE 2048
#define MIN_CACHE_SIZE 64
typedef struct mol2_data_source_t {
uintptr_t args[4];
uint32_t total_size;
mol2_source_t read;
uint32_t start_point;
uint32_t cache_size;
uint32_t max_cache_size;
uint8_t cache[];
} mol2_data_source_t;
#define MOL2_DATA_SOURCE_LEN(cache_size) \
(sizeof(mol2_data_source_t) + (cache_size))
#define DEFAULT_DATA_SOURCE_LENGTH (sizeof(mol2_data_source_t) + MAX_CACHE_SIZE)
typedef struct mol2_cursor_t {
uint32_t offset; uint32_t size; mol2_data_source_t *data_source;
} mol2_cursor_t;
uint32_t mol2_source_memory(uintptr_t args[], uint8_t *ptr, uint32_t len,
uint32_t offset);
mol2_cursor_t mol2_make_cursor_from_memory(const void *memory, uint32_t size);
uint32_t mol2_read_at(const mol2_cursor_t *cur, uint8_t *buff,
uint32_t buff_len);
typedef struct {
uint8_t *ptr; mol2_num_t size; } mol2_seg_t;
typedef struct {
mol2_num_t item_id; mol2_cursor_t cursor; } mol2_union_t;
typedef struct {
mol2_errno errno; mol2_cursor_t cur; } mol2_cursor_res_t;
void mol2_add_offset(mol2_cursor_t *cur, uint32_t offset);
void mol2_sub_size(mol2_cursor_t *cur, uint32_t shrinked_size);
void mol2_validate(const mol2_cursor_t *cur);
mol2_num_t mol2_unpack_number(const mol2_cursor_t *cursor);
mol2_errno mol2_verify_fixed_size(const mol2_cursor_t *input,
mol2_num_t total_size);
mol2_errno mol2_fixvec_verify(const mol2_cursor_t *input, mol2_num_t item_size);
bool mol2_option_is_none(const mol2_cursor_t *input);
mol2_union_t mol2_union_unpack(const mol2_cursor_t *input);
mol2_num_t mol2_fixvec_length(const mol2_cursor_t *input);
mol2_num_t mol2_dynvec_length(const mol2_cursor_t *input);
mol2_num_t mol2_table_actual_field_count(const mol2_cursor_t *input);
bool mol2_table_has_extra_fields(const mol2_cursor_t *input,
mol2_num_t field_count);
mol2_cursor_t mol2_slice_by_offset(const mol2_cursor_t *input,
mol2_num_t offset, mol2_num_t size);
mol2_cursor_res_t mol2_fixvec_slice_by_index(const mol2_cursor_t *input,
mol2_num_t item_size,
mol2_num_t item_index);
mol2_cursor_res_t mol2_dynvec_slice_by_index(const mol2_cursor_t *input,
mol2_num_t item_index);
mol2_cursor_t mol2_table_slice_by_index(const mol2_cursor_t *input,
mol2_num_t field_index);
mol2_cursor_t mol2_fixvec_slice_raw_bytes(const mol2_cursor_t *input);
Uint64 convert_to_Uint64(mol2_cursor_t *cur);
Int64 convert_to_Int64(mol2_cursor_t *cur);
Uint32 convert_to_Uint32(mol2_cursor_t *cur);
Int32 convert_to_Int32(mol2_cursor_t *cur);
Uint16 convert_to_Uint16(mol2_cursor_t *cur);
Int16 convert_to_Int16(mol2_cursor_t *cur);
Uint8 convert_to_Uint8(mol2_cursor_t *cur);
Int8 convert_to_Int8(mol2_cursor_t *cur);
mol2_cursor_t convert_to_array(mol2_cursor_t *cur);
mol2_cursor_t convert_to_rawbytes(mol2_cursor_t *cur);
#ifndef MOLECULEC_C2_DECLARATION_ONLY
void mol2_add_offset(mol2_cursor_t *cur, uint32_t offset) {
uint32_t res;
if (__builtin_add_overflow(cur->offset, offset, &res)) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
}
cur->offset = res;
}
void mol2_sub_size(mol2_cursor_t *cur, uint32_t shrinked_size) {
uint32_t res;
if (__builtin_sub_overflow(cur->size, shrinked_size, &res)) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
}
cur->size = res;
}
uint32_t mol2_get_item_count(mol2_cursor_t *cur) {
uint32_t count = mol2_unpack_number(cur) / 4;
if (count == 0) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
}
return count - 1;
}
uint32_t mol2_calculate_offset(uint32_t item_size, uint32_t item_count,
uint32_t offset) {
uint32_t mul_res;
if (__builtin_mul_overflow(item_size, item_count, &mul_res)) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
}
uint32_t sum_res;
if (__builtin_add_overflow(mul_res, offset, &sum_res)) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
}
return sum_res;
}
void mol2_validate(const mol2_cursor_t *cur) {
uint32_t res;
if (__builtin_add_overflow(cur->offset, cur->size, &res)) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
}
if (res > cur->data_source->total_size) {
mol2_printf("total_size(%d) > offset(%d) + size(%d)\n",
cur->data_source->total_size, cur->offset, cur->size);
MOL2_PANIC(MOL2_ERR_INDEX_OUT_OF_BOUNDS);
}
}
mol2_errno mol2_verify_fixed_size(const mol2_cursor_t *input,
mol2_num_t total_size) {
return input->size == total_size ? MOL2_OK : MOL2_ERR_TOTAL_SIZE;
}
mol2_errno mol2_fixvec_verify(const mol2_cursor_t *input,
mol2_num_t item_size) {
if (input->size < MOL2_NUM_T_SIZE) {
return MOL2_ERR_HEADER;
}
mol2_num_t item_count = mol2_unpack_number(input);
if (item_count == 0) {
return input->size == MOL2_NUM_T_SIZE ? MOL2_OK : MOL2_ERR_TOTAL_SIZE;
}
mol2_num_t total_size =
mol2_calculate_offset(item_size, item_count, MOL2_NUM_T_SIZE);
return input->size == total_size ? MOL2_OK : MOL2_ERR_TOTAL_SIZE;
}
bool mol2_option_is_none(const mol2_cursor_t *input) {
return input->size == 0;
}
mol2_union_t mol2_union_unpack(const mol2_cursor_t *input) {
mol2_union_t ret;
ret.item_id = mol2_unpack_number(input);
ret.cursor = *input; mol2_add_offset(&ret.cursor, MOL2_NUM_T_SIZE);
mol2_sub_size(&ret.cursor, MOL2_NUM_T_SIZE);
mol2_validate(&ret.cursor);
return ret;
}
mol2_num_t mol2_fixvec_length(const mol2_cursor_t *input) {
return mol2_unpack_number(input);
}
mol2_num_t mol2_dynvec_length(const mol2_cursor_t *input) {
if (input->size == MOL2_NUM_T_SIZE) {
return 0;
} else {
mol2_cursor_t cur = *input;
mol2_add_offset(&cur, MOL2_NUM_T_SIZE);
mol2_sub_size(&cur, MOL2_NUM_T_SIZE);
mol2_validate(&cur);
return mol2_get_item_count(&cur);
}
}
mol2_num_t mol2_table_actual_field_count(const mol2_cursor_t *input) {
return mol2_dynvec_length(input);
}
bool mol2_table_has_extra_fields(const mol2_cursor_t *input,
mol2_num_t field_count) {
return mol2_table_actual_field_count(input) > field_count;
}
mol2_cursor_t mol2_slice_by_offset(const mol2_cursor_t *input,
mol2_num_t offset, mol2_num_t size) {
mol2_cursor_t cur = *input;
mol2_add_offset(&cur, offset);
cur.size = size;
mol2_validate(&cur);
return cur;
}
mol2_cursor_res_t mol2_slice_by_offset2(const mol2_cursor_t *input,
mol2_num_t offset, mol2_num_t size) {
mol2_cursor_t cur = *input;
mol2_add_offset(&cur, offset);
cur.size = size;
mol2_validate(&cur);
mol2_cursor_res_t res;
res.errno = MOL2_OK;
res.cur = cur;
return res;
}
mol2_cursor_res_t mol2_fixvec_slice_by_index(const mol2_cursor_t *input,
mol2_num_t item_size,
mol2_num_t item_index) {
mol2_cursor_res_t res;
res.cur = *input;
mol2_num_t item_count = mol2_unpack_number(input);
if (item_index >= item_count) {
res.errno = MOL2_ERR_INDEX_OUT_OF_BOUNDS;
} else {
res.errno = MOL2_OK;
uint32_t offset =
mol2_calculate_offset(item_size, item_index, MOL2_NUM_T_SIZE);
mol2_add_offset(&res.cur, offset);
res.cur.size = item_size;
mol2_validate(&res.cur);
}
return res;
}
mol2_cursor_res_t mol2_dynvec_slice_by_index(const mol2_cursor_t *input,
mol2_num_t item_index) {
mol2_cursor_res_t res;
res.cur = *input;
struct mol2_cursor_t temp = *input;
mol2_num_t total_size = mol2_unpack_number(input);
if (total_size == MOL2_NUM_T_SIZE) {
res.errno = MOL2_ERR_INDEX_OUT_OF_BOUNDS;
} else {
mol2_add_offset(&temp, MOL2_NUM_T_SIZE);
mol2_num_t item_count = mol2_get_item_count(&temp);
if (item_index >= item_count) {
res.errno = MOL2_ERR_INDEX_OUT_OF_BOUNDS;
} else {
temp.offset = input->offset;
uint32_t temp_offset =
mol2_calculate_offset(MOL2_NUM_T_SIZE, item_index + 1, 0);
mol2_add_offset(&temp, temp_offset);
mol2_num_t item_start = mol2_unpack_number(&temp);
if (item_index + 1 == item_count) {
res.errno = MOL2_OK;
res.cur.offset = input->offset;
mol2_add_offset(&res.cur, item_start);
res.cur.size = total_size;
mol2_sub_size(&res.cur, item_start);
} else {
temp.offset = input->offset;
uint32_t calc_offset =
mol2_calculate_offset(MOL2_NUM_T_SIZE, item_index + 2, 0);
mol2_add_offset(&temp, calc_offset);
mol2_num_t item_end = mol2_unpack_number(&temp);
res.errno = MOL2_OK;
res.cur.offset = input->offset;
mol2_add_offset(&res.cur, item_start);
res.cur.size = item_end;
mol2_sub_size(&res.cur, item_start);
}
}
}
if (res.errno == MOL2_OK) {
mol2_validate(&res.cur);
}
return res;
}
mol2_cursor_t mol2_table_slice_by_index(const mol2_cursor_t *input,
mol2_num_t field_index) {
mol2_cursor_res_t res = mol2_dynvec_slice_by_index(input, field_index);
ASSERT(res.errno == 0);
return res.cur;
}
mol2_cursor_t mol2_fixvec_slice_raw_bytes(const mol2_cursor_t *input) {
mol2_cursor_t cur = *input;
mol2_add_offset(&cur, MOL2_NUM_T_SIZE);
cur.size = mol2_unpack_number(input);
mol2_validate(&cur);
return cur;
}
Uint64 convert_to_Uint64(mol2_cursor_t *cur) {
uint64_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
Int64 convert_to_Int64(mol2_cursor_t *cur) {
int64_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
Uint32 convert_to_Uint32(mol2_cursor_t *cur) {
uint32_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
Int32 convert_to_Int32(mol2_cursor_t *cur) {
int32_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
Uint16 convert_to_Uint16(mol2_cursor_t *cur) {
uint16_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
Int16 convert_to_Int16(mol2_cursor_t *cur) {
int16_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
ASSERT(len == sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
return ret;
}
Uint8 convert_to_Uint8(mol2_cursor_t *cur) {
uint8_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
Int8 convert_to_Int8(mol2_cursor_t *cur) {
int8_t ret;
uint32_t len = mol2_read_at(cur, (uint8_t *)&ret, sizeof(ret));
if (len != sizeof(ret)) {
MOL2_PANIC(MOL2_ERR_DATA);
}
change_endian((uint8_t *)&ret, sizeof(ret));
return ret;
}
mol2_cursor_t convert_to_array(mol2_cursor_t *cur) { return *cur; }
mol2_cursor_t convert_to_rawbytes(mol2_cursor_t *cur) {
return mol2_fixvec_slice_raw_bytes(cur);
}
void change_endian(uint8_t *ptr, int size) {
if (is_le2()) return;
if (size == 0) return;
if (size % 2 != 0) {
MOL2_PANIC(MOL2_ERR_DATA);
}
uint8_t t = 0;
for (int i = 0; i < size / 2; i++) {
SWAP(ptr[i], ptr[size - 1 - i], t);
}
}
uint32_t mol2_source_memory(uintptr_t args[], uint8_t *ptr, uint32_t len,
uint32_t offset) {
uint32_t mem_len = (uint32_t)args[1];
ASSERT(offset < mem_len);
uint32_t remaining_len = mem_len - offset;
uint32_t min_len = MIN(remaining_len, len);
uint8_t *start_mem = (uint8_t *)args[0];
ASSERT((offset + min_len) <= mem_len);
memcpy(ptr, start_mem + offset, min_len);
return min_len;
}
mol2_cursor_t mol2_make_cursor_from_memory(const void *memory, uint32_t size) {
mol2_cursor_t cur;
cur.offset = 0;
cur.size = size;
static mol2_data_source_t s_data_source = {0};
s_data_source.read = mol2_source_memory;
s_data_source.total_size = size;
s_data_source.args[0] = (uintptr_t)memory;
s_data_source.args[1] = (uintptr_t)size;
s_data_source.cache_size = 0;
s_data_source.start_point = 0;
s_data_source.max_cache_size = MAX_CACHE_SIZE;
cur.data_source = &s_data_source;
return cur;
}
uint32_t mol2_read_at(const mol2_cursor_t *cur, uint8_t *buff,
uint32_t buff_len) {
uint32_t read_len = MIN(cur->size, buff_len);
mol2_data_source_t *ds = cur->data_source;
if (read_len > ds->max_cache_size) {
return ds->read(ds->args, buff, read_len, cur->offset);
}
if (cur->offset < ds->start_point ||
((cur->offset + read_len) > ds->start_point + ds->cache_size)) {
uint32_t size =
ds->read(ds->args, ds->cache, ds->max_cache_size, cur->offset);
if (size < read_len) {
MOL2_PANIC(MOL2_ERR_DATA);
return 0;
}
ds->cache_size = size;
ds->start_point = cur->offset;
if (ds->cache_size > ds->max_cache_size) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
return 0;
}
}
if (cur->offset < ds->start_point ||
(cur->offset - ds->start_point) > ds->max_cache_size) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
return 0;
}
uint8_t *read_point = ds->cache + cur->offset - ds->start_point;
if ((read_point + read_len) > (ds->cache + ds->cache_size)) {
MOL2_PANIC(MOL2_ERR_OVERFLOW);
return 0;
}
memcpy(buff, read_point, read_len);
return read_len;
}
mol2_num_t mol2_unpack_number(const mol2_cursor_t *cursor) {
uint8_t src[4];
uint32_t len = mol2_read_at(cursor, src, 4);
if (len != 4) {
MOL2_PANIC(MOL2_ERR_DATA);
}
if (is_le2()) {
return *(const uint32_t *)src;
} else {
uint32_t output = 0;
uint8_t *dst = (uint8_t *)&output;
dst[3] = src[0];
dst[2] = src[1];
dst[1] = src[2];
dst[0] = src[3];
return output;
}
}
#endif
#undef is_le2
#ifdef __cplusplus
}
#endif
#endif