use memchr::memchr3;
use crate::error::{Error, ErrorKind, Span};
use crate::value::Scalar;
use super::inline;
use super::value_start::ValueStart;
pub(super) fn classify_value_start(
text: &str,
line_num: usize,
trimmed_span: Span,
) -> Result<ValueStart, Error> {
let trimmed = text.trim_start();
if trimmed == "{" {
return Ok(ValueStart::OpenObject);
}
if trimmed == "[" {
return Ok(ValueStart::OpenArray);
}
if trimmed.starts_with('{') {
if trimmed.ends_with('}') && trimmed[1..trimmed.len() - 1].trim().is_empty() {
return Ok(ValueStart::EmptyObject);
}
if trimmed.ends_with('}') {
let value = inline::parse_inline_object(trimmed, line_num, trimmed_span)?;
return Ok(ValueStart::InlineValue(value));
}
check_trailing_backslash(trimmed, line_num, trimmed_span)?;
return Err(Error::Structured(ErrorKind::UnterminatedInlineCompound {
line: line_num as u32,
span: trimmed_span,
}));
}
if trimmed.starts_with('[') {
if trimmed.ends_with(']') && trimmed[1..trimmed.len() - 1].trim().is_empty() {
return Ok(ValueStart::EmptyArray);
}
if trimmed.ends_with(']') {
let value = inline::parse_inline_array(trimmed, line_num, trimmed_span)?;
return Ok(ValueStart::InlineValue(value));
}
check_trailing_backslash(trimmed, line_num, trimmed_span)?;
return Err(Error::Structured(ErrorKind::UnterminatedInlineCompound {
line: line_num as u32,
span: trimmed_span,
}));
}
match trimmed {
"(" => return Ok(ValueStart::OpenMultilineStripped),
"((" => return Ok(ValueStart::OpenMultilineVerbatim),
"()" | "(())" => return Ok(ValueStart::Scalar(Scalar::new(""))),
_ => {}
}
if trimmed.starts_with('(') {
return Err(Error::Structured(ErrorKind::InlineNonEmptyCompound {
line: line_num as u32,
span: trimmed_span,
body: "paren-string".to_string(),
}));
}
match trimmed {
"null" => return Ok(ValueStart::Null),
"true" => return Ok(ValueStart::Bool(true)),
"false" => return Ok(ValueStart::Bool(false)),
_ => {}
}
if let Some(_val) = fast_plain_decimal_i64(trimmed) {
return Ok(ValueStart::Integer(trimmed.into()));
}
if let Some(val) = try_parse_integer(trimmed) {
let mut buf = itoa::Buffer::new();
let canonical = buf.format(val);
return Ok(ValueStart::Integer(canonical.into()));
}
if is_float_literal(trimmed) {
if let Some(val) = parse_float_value(trimmed) {
let mut buf = ryu::Buffer::new();
let canonical = buf.format(val);
if canonical == trimmed {
return Ok(ValueStart::Float(trimmed.into()));
}
return Ok(ValueStart::Float(canonical.into()));
}
}
Ok(ValueStart::Scalar(trimmed.into()))
}
pub(crate) fn try_parse_integer(s: &str) -> Option<i64> {
let bytes = s.as_bytes();
if bytes.is_empty() {
return None;
}
let mut i = 0;
let negative = if bytes[i] == b'-' {
i += 1;
true
} else if bytes[i] == b'+' {
i += 1;
false
} else {
false
};
if i >= bytes.len() {
return None; }
if bytes[i] == b'0' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'x' | b'X' => return parse_prefixed_int(&bytes[i + 2..], 16, negative),
b'o' | b'O' => return parse_prefixed_int(&bytes[i + 2..], 8, negative),
b'b' | b'B' => return parse_prefixed_int(&bytes[i + 2..], 2, negative),
_ => {}
}
}
parse_decimal_int(&bytes[i..], negative)
}
fn parse_prefixed_int(digits: &[u8], radix: u32, negative: bool) -> Option<i64> {
if digits.is_empty() {
return None; }
if !is_digit_for_radix(digits[0], radix) {
return None;
}
let mut val: u64 = 0;
let mut prev_was_underscore = false;
for &b in digits {
if b == b'_' {
if prev_was_underscore {
return None; }
prev_was_underscore = true;
continue;
}
prev_was_underscore = false;
let d = digit_value(b, radix)?;
val = val.checked_mul(radix as u64)?.checked_add(d as u64)?;
}
if prev_was_underscore {
return None;
}
if negative {
if val > (i64::MAX as u64) + 1 {
return None;
}
if val == 0 {
Some(0) } else {
Some(-(val as i64))
}
} else {
if val > i64::MAX as u64 {
return None;
}
Some(val as i64)
}
}
fn parse_decimal_int(digits: &[u8], negative: bool) -> Option<i64> {
if digits.is_empty() {
return None;
}
if !digits[0].is_ascii_digit() {
return None;
}
let mut val: u64 = 0;
let mut prev_was_underscore = false;
let mut count = 0;
for &b in digits {
if b == b'_' {
if prev_was_underscore || count == 0 {
return None;
}
prev_was_underscore = true;
continue;
}
if !b.is_ascii_digit() {
return None; }
prev_was_underscore = false;
let d = (b - b'0') as u64;
val = val.checked_mul(10)?.checked_add(d)?;
count += 1;
}
if prev_was_underscore || count == 0 {
return None;
}
if negative {
let min_mag = (i64::MAX as u64) + 1;
if val > min_mag {
return None;
}
if val == 0 {
Some(0)
} else if val == min_mag {
Some(i64::MIN)
} else {
Some(-(val as i64))
}
} else {
if val > i64::MAX as u64 {
return None;
}
Some(val as i64)
}
}
fn is_digit_for_radix(b: u8, radix: u32) -> bool {
digit_value(b, radix).is_some()
}
fn digit_value(b: u8, radix: u32) -> Option<u32> {
let v = match b {
b'0'..=b'9' => (b - b'0') as u32,
b'a'..=b'f' => (b - b'a') as u32 + 10,
b'A'..=b'F' => (b - b'A') as u32 + 10,
_ => return None,
};
if v < radix {
Some(v)
} else {
None
}
}
pub(crate) fn is_float_literal(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() {
return false;
}
let first = bytes[0];
if !first.is_ascii_digit() && first != b'+' && first != b'-' {
return false;
}
if memchr3(b'.', b'e', b'E', bytes).is_none() {
return false;
}
let mut i = 0;
if i >= bytes.len() {
return false;
}
if bytes[i] == b'+' || bytes[i] == b'-' {
i += 1;
}
let (new_i, ok) = scan_dec_part(bytes, i);
if !ok {
return false;
}
i = new_i;
if i < bytes.len() && bytes[i] == b'.' {
i += 1;
let (new_i, ok) = scan_dec_part(bytes, i);
if !ok {
return false; }
i = new_i;
if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
let (new_i, ok) = scan_exponent(bytes, i);
if !ok {
return false;
}
i = new_i;
}
return i == bytes.len();
}
if i < bytes.len() && (bytes[i] == b'e' || bytes[i] == b'E') {
let (new_i, ok) = scan_exponent(bytes, i);
if !ok {
return false;
}
i = new_i;
return i == bytes.len();
}
false
}
fn scan_dec_part(bytes: &[u8], mut i: usize) -> (usize, bool) {
if i >= bytes.len() || !bytes[i].is_ascii_digit() {
return (i, false);
}
i += 1;
let mut prev_was_underscore = false;
while i < bytes.len() {
if bytes[i] == b'_' {
if prev_was_underscore {
return (i, false); }
prev_was_underscore = true;
i += 1;
continue;
}
if bytes[i].is_ascii_digit() {
prev_was_underscore = false;
i += 1;
continue;
}
break;
}
if prev_was_underscore {
return (i, false); }
(i, true)
}
fn scan_exponent(bytes: &[u8], mut i: usize) -> (usize, bool) {
if i >= bytes.len() || (bytes[i] != b'e' && bytes[i] != b'E') {
return (i, false);
}
i += 1; if i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-') {
i += 1;
}
scan_dec_part(bytes, i)
}
#[inline]
pub(super) fn fast_plain_decimal_i64(s: &str) -> Option<i64> {
let bytes = s.as_bytes();
if bytes.is_empty() {
return None;
}
let first = bytes[0];
if first == b'0' {
return if bytes.len() == 1 { Some(0) } else { None };
}
if !(b'1'..=b'9').contains(&first) {
return None;
}
let mut acc: i64 = (first - b'0') as i64;
for &b in &bytes[1..] {
let d = b.wrapping_sub(b'0');
if d > 9 {
return None;
}
acc = acc.checked_mul(10)?.checked_add(d as i64)?;
}
Some(acc)
}
fn parse_float_value(s: &str) -> Option<f64> {
if !s.as_bytes().contains(&b'_') {
let val: f64 = s.parse().ok()?;
if val.is_nan() || val.is_infinite() {
return None;
}
return Some(val);
}
let cleaned: String = s.chars().filter(|&c| c != '_').collect();
let val: f64 = cleaned.parse().ok()?;
if val.is_nan() || val.is_infinite() {
return None; }
Some(val)
}
pub fn matches_integer_grammar(s: &str) -> bool {
if try_parse_integer(s).is_some() {
return true;
}
matches_integer_grammar_syntax(s)
}
fn matches_integer_grammar_syntax(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.is_empty() {
return false;
}
let mut i = 0;
if bytes[i] == b'+' || bytes[i] == b'-' {
i += 1;
}
if i >= bytes.len() {
return false;
}
if bytes[i] == b'0' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'x' | b'X' => return check_prefixed_digits(&bytes[i + 2..], 16),
b'o' | b'O' => return check_prefixed_digits(&bytes[i + 2..], 8),
b'b' | b'B' => return check_prefixed_digits(&bytes[i + 2..], 2),
_ => {}
}
}
check_decimal_digits(&bytes[i..])
}
fn check_prefixed_digits(digits: &[u8], radix: u32) -> bool {
if digits.is_empty() {
return false;
}
if !is_digit_for_radix(digits[0], radix) {
return false;
}
let mut prev_underscore = false;
for &b in &digits[1..] {
if b == b'_' {
if prev_underscore {
return false;
}
prev_underscore = true;
continue;
}
prev_underscore = false;
if !is_digit_for_radix(b, radix) {
return false;
}
}
!prev_underscore
}
fn check_decimal_digits(digits: &[u8]) -> bool {
if digits.is_empty() || !digits[0].is_ascii_digit() {
return false;
}
let mut prev_underscore = false;
for &b in &digits[1..] {
if b == b'_' {
if prev_underscore {
return false;
}
prev_underscore = true;
continue;
}
prev_underscore = false;
if !b.is_ascii_digit() {
return false;
}
}
!prev_underscore
}
pub fn matches_float_grammar(s: &str) -> bool {
is_float_literal(s)
}
fn check_trailing_backslash(s: &str, line_num: usize, span: Span) -> Result<(), Error> {
let bytes = s.as_bytes();
if bytes.is_empty() {
return Ok(());
}
let mut n = 0;
for &b in bytes.iter().rev() {
if b == b'\\' {
n += 1;
} else {
break;
}
}
if n % 2 == 1 {
return Err(Error::Structured(ErrorKind::BadEscapeSequence {
line: line_num as u32,
span,
sequence: "\\<end-of-line>".to_string(),
}));
}
Ok(())
}