sciforge 0.0.3

A comprehensive scientific computing library in pure Rust with zero dependencies
Documentation
use core::char;
use core::str;

use super::error::{JsonError, JsonErrorKind};
use super::lexer::Cursor;

pub(crate) struct ParsedString<'a> {
    pub raw: &'a str,
    pub start: usize,
    pub end: usize,
}

pub(crate) fn parse_string<'a>(cursor: &mut Cursor<'a>) -> Result<ParsedString<'a>, JsonError> {
    cursor.consume(b'"')?;

    let start = cursor.position();
    let mut seg_start = start;

    loop {
        let b = cursor
            .peek()
            .ok_or(JsonError::new(JsonErrorKind::Eof, cursor.position()))?;

        if b == b'"' {
            validate_utf8_segment(cursor, seg_start, cursor.position())?;
            let end = cursor.position();
            cursor.advance(1);
            let raw = str::from_utf8(&cursor.bytes()[start..end])
                .map_err(|_| JsonError::new(JsonErrorKind::InvalidUtf8, start))?;
            return Ok(ParsedString { raw, start, end });
        }

        if b == b'\\' {
            validate_utf8_segment(cursor, seg_start, cursor.position())?;
            cursor.advance(1);
            parse_escape(cursor)?;
            seg_start = cursor.position();
            continue;
        }

        if b < 0x20 {
            return Err(JsonError::new(
                JsonErrorKind::ControlCharacterInString,
                cursor.position(),
            ));
        }

        cursor.advance(1);
    }
}

fn parse_escape(cursor: &mut Cursor<'_>) -> Result<(), JsonError> {
    let esc = cursor
        .peek()
        .ok_or(JsonError::new(JsonErrorKind::Eof, cursor.position()))?;
    cursor.advance(1);

    match esc {
        b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
        b'u' => {
            let first = parse_u16_hex(cursor)?;
            if (0xD800..=0xDBFF).contains(&first) {
                cursor.consume(b'\\')?;
                cursor.consume(b'u')?;
                let second = parse_u16_hex(cursor)?;
                if !(0xDC00..=0xDFFF).contains(&second) {
                    return Err(JsonError::new(
                        JsonErrorKind::InvalidUnicodeSurrogate,
                        cursor.position(),
                    ));
                }
                let code =
                    0x10000u32 + (((first as u32) - 0xD800) << 10) + ((second as u32) - 0xDC00);
                char::from_u32(code).ok_or(JsonError::new(
                    JsonErrorKind::InvalidUnicodeEscape,
                    cursor.position(),
                ))?;
            } else if (0xDC00..=0xDFFF).contains(&first) {
                return Err(JsonError::new(
                    JsonErrorKind::InvalidUnicodeSurrogate,
                    cursor.position(),
                ));
            } else {
                char::from_u32(first as u32).ok_or(JsonError::new(
                    JsonErrorKind::InvalidUnicodeEscape,
                    cursor.position(),
                ))?;
            }
        }
        _ => {
            return Err(JsonError::new(
                JsonErrorKind::InvalidEscape,
                cursor.position() - 1,
            ));
        }
    }

    Ok(())
}

fn parse_u16_hex(cursor: &mut Cursor<'_>) -> Result<u16, JsonError> {
    if cursor.position() + 4 > cursor.len() {
        return Err(JsonError::new(JsonErrorKind::Eof, cursor.position()));
    }
    let mut v = 0u16;
    for _ in 0..4 {
        let b = cursor
            .peek()
            .ok_or(JsonError::new(JsonErrorKind::Eof, cursor.position()))?;
        cursor.advance(1);
        let d = match b {
            b'0'..=b'9' => (b - b'0') as u16,
            b'a'..=b'f' => (b - b'a' + 10) as u16,
            b'A'..=b'F' => (b - b'A' + 10) as u16,
            _ => {
                return Err(JsonError::new(
                    JsonErrorKind::InvalidUnicodeEscape,
                    cursor.position() - 1,
                ));
            }
        };
        v = (v << 4) | d;
    }
    Ok(v)
}

fn validate_utf8_segment(cursor: &Cursor<'_>, start: usize, end: usize) -> Result<(), JsonError> {
    if start == end {
        return Ok(());
    }
    str::from_utf8(&cursor.bytes()[start..end])
        .map_err(|_| JsonError::new(JsonErrorKind::InvalidUtf8, start))?;
    Ok(())
}