#include "prism/util/pm_strpbrk.h"
static inline void
pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
}
static inline void
pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
if (parser->explicit_encoding != NULL) {
if (parser->explicit_encoding == parser->encoding) {
} else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
} else {
assert(false && "unreachable");
}
}
parser->explicit_encoding = parser->encoding;
}
static inline const uint8_t *
pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
size_t index = 0;
while (index < maximum) {
if (strchr((const char *) charset, source[index]) != NULL) {
return source + index;
}
if (source[index] < 0x80) {
index++;
} else {
size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
if (width > 0) {
index += width;
} else if (!validate) {
index++;
} else {
const size_t start = index;
do {
index++;
} while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
}
}
}
return NULL;
}
static inline const uint8_t *
pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
size_t index = 0;
while (index < maximum) {
if (strchr((const char *) charset, source[index]) != NULL) {
return source + index;
}
if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
index++;
}
return NULL;
}
static inline const uint8_t *
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
size_t index = 0;
const pm_encoding_t *encoding = parser->encoding;
while (index < maximum) {
if (strchr((const char *) charset, source[index]) != NULL) {
return source + index;
}
if (source[index] < 0x80) {
index++;
} else {
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
if (width > 0) {
index += width;
} else if (!validate) {
index++;
} else {
const size_t start = index;
do {
index++;
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
}
}
}
return NULL;
}
static inline const uint8_t *
pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
size_t index = 0;
const pm_encoding_t *encoding = parser->encoding;
while (index < maximum) {
if (strchr((const char *) charset, source[index]) != NULL) {
return source + index;
}
if (source[index] < 0x80 || !validate) {
index++;
} else {
size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
pm_strpbrk_explicit_encoding_set(parser, source, width);
if (width > 0) {
index += width;
} else {
const size_t start = index;
do {
index++;
} while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
}
}
}
return NULL;
}
const uint8_t *
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
if (length <= 0) {
return NULL;
} else if (!parser->encoding_changed) {
return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
} else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
} else if (parser->encoding->multibyte) {
return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
} else {
return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
}
}