aver-lang 0.19.0

VM and transpiler for Aver, a statically-typed language designed for AI-assisted development
Documentation
use std::collections::BTreeMap;

use super::JsonValue;

pub(super) fn parse_json(input: &str) -> Result<JsonValue, String> {
    JsonParser::new(input).parse()
}

struct JsonParser<'a> {
    src: &'a str,
    bytes: &'a [u8],
    pos: usize,
}

impl<'a> JsonParser<'a> {
    fn new(src: &'a str) -> Self {
        Self {
            src,
            bytes: src.as_bytes(),
            pos: 0,
        }
    }

    fn parse(mut self) -> Result<JsonValue, String> {
        self.skip_ws();
        let value = self.parse_value()?;
        self.skip_ws();
        if self.pos != self.bytes.len() {
            return Err(self.error("trailing characters after JSON value"));
        }
        Ok(value)
    }

    fn parse_value(&mut self) -> Result<JsonValue, String> {
        self.skip_ws();
        let Some(byte) = self.peek() else {
            return Err(self.error("unexpected end of input"));
        };

        match byte {
            b'n' => {
                self.expect_keyword("null")?;
                Ok(JsonValue::Null)
            }
            b't' => {
                self.expect_keyword("true")?;
                Ok(JsonValue::Bool(true))
            }
            b'f' => {
                self.expect_keyword("false")?;
                Ok(JsonValue::Bool(false))
            }
            b'"' => Ok(JsonValue::String(self.parse_string()?)),
            b'[' => self.parse_array(),
            b'{' => self.parse_object(),
            b'-' | b'0'..=b'9' => self.parse_number(),
            _ => Err(self.error("unexpected token")),
        }
    }

    fn parse_array(&mut self) -> Result<JsonValue, String> {
        self.expect_byte(b'[')?;
        self.skip_ws();

        let mut items = Vec::new();
        if self.peek() == Some(b']') {
            self.pos += 1;
            return Ok(JsonValue::Array(items));
        }

        loop {
            items.push(self.parse_value()?);
            self.skip_ws();
            match self.peek() {
                Some(b',') => {
                    self.pos += 1;
                    self.skip_ws();
                }
                Some(b']') => {
                    self.pos += 1;
                    break;
                }
                _ => return Err(self.error("expected ',' or ']' in array")),
            }
        }

        Ok(JsonValue::Array(items))
    }

    fn parse_object(&mut self) -> Result<JsonValue, String> {
        self.expect_byte(b'{')?;
        self.skip_ws();

        let mut fields = BTreeMap::new();
        if self.peek() == Some(b'}') {
            self.pos += 1;
            return Ok(JsonValue::Object(fields));
        }

        loop {
            let key = self.parse_string()?;
            self.skip_ws();
            self.expect_byte(b':')?;
            self.skip_ws();
            let value = self.parse_value()?;
            fields.insert(key, value);
            self.skip_ws();

            match self.peek() {
                Some(b',') => {
                    self.pos += 1;
                    self.skip_ws();
                }
                Some(b'}') => {
                    self.pos += 1;
                    break;
                }
                _ => return Err(self.error("expected ',' or '}' in object")),
            }
        }

        Ok(JsonValue::Object(fields))
    }

    fn parse_string(&mut self) -> Result<String, String> {
        self.expect_byte(b'"')?;
        let mut out = String::new();
        let mut chunk_start = self.pos;

        while self.pos < self.bytes.len() {
            let b = self.bytes[self.pos];
            match b {
                b'"' => {
                    if chunk_start < self.pos {
                        out.push_str(
                            std::str::from_utf8(&self.bytes[chunk_start..self.pos])
                                .map_err(|_| self.error("invalid UTF-8 in string"))?,
                        );
                    }
                    self.pos += 1;
                    return Ok(out);
                }
                b'\\' => {
                    if chunk_start < self.pos {
                        out.push_str(
                            std::str::from_utf8(&self.bytes[chunk_start..self.pos])
                                .map_err(|_| self.error("invalid UTF-8 in string"))?,
                        );
                    }
                    self.pos += 1;
                    out.push(self.parse_escape_sequence()?);
                    chunk_start = self.pos;
                }
                0x00..=0x1F => {
                    return Err(self.error("control character in string literal"));
                }
                _ => {
                    self.pos += 1;
                }
            }
        }

        Err(self.error("unterminated string literal"))
    }

    fn parse_escape_sequence(&mut self) -> Result<char, String> {
        let Some(ch) = self.next_byte() else {
            return Err(self.error("unterminated escape sequence"));
        };

        match ch {
            b'"' => Ok('"'),
            b'\\' => Ok('\\'),
            b'/' => Ok('/'),
            b'b' => Ok('\u{08}'),
            b'f' => Ok('\u{0C}'),
            b'n' => Ok('\n'),
            b'r' => Ok('\r'),
            b't' => Ok('\t'),
            b'u' => self.parse_unicode_escape(),
            _ => Err(self.error("invalid escape sequence")),
        }
    }

    fn parse_unicode_escape(&mut self) -> Result<char, String> {
        let first = self.parse_hex_u16()?;

        if (0xD800..=0xDBFF).contains(&first) {
            self.expect_byte(b'\\')?;
            self.expect_byte(b'u')?;
            let second = self.parse_hex_u16()?;
            if !(0xDC00..=0xDFFF).contains(&second) {
                return Err(self.error("invalid low surrogate in unicode escape"));
            }
            let high = (first as u32) - 0xD800;
            let low = (second as u32) - 0xDC00;
            let codepoint = 0x10000 + ((high << 10) | low);
            return char::from_u32(codepoint)
                .ok_or_else(|| self.error("invalid unicode codepoint"));
        }

        if (0xDC00..=0xDFFF).contains(&first) {
            return Err(self.error("unexpected low surrogate in unicode escape"));
        }

        char::from_u32(first as u32).ok_or_else(|| self.error("invalid unicode codepoint"))
    }

    fn parse_hex_u16(&mut self) -> Result<u16, String> {
        let mut value: u16 = 0;
        for _ in 0..4 {
            let Some(b) = self.next_byte() else {
                return Err(self.error("incomplete unicode escape"));
            };
            value = value
                .checked_mul(16)
                .ok_or_else(|| self.error("unicode escape overflow"))?;
            value = value
                .checked_add(hex_digit(b).ok_or_else(|| self.error("invalid hex digit"))? as u16)
                .ok_or_else(|| self.error("unicode escape overflow"))?;
        }
        Ok(value)
    }

    fn parse_number(&mut self) -> Result<JsonValue, String> {
        let start = self.pos;

        if self.peek() == Some(b'-') {
            self.pos += 1;
        }

        match self.peek() {
            Some(b'0') => {
                self.pos += 1;
                if let Some(b'0'..=b'9') = self.peek() {
                    return Err(self.error("leading zero in number"));
                }
            }
            Some(b'1'..=b'9') => {
                self.pos += 1;
                while let Some(b'0'..=b'9') = self.peek() {
                    self.pos += 1;
                }
            }
            _ => return Err(self.error("invalid number")),
        }

        let mut is_float = false;

        if self.peek() == Some(b'.') {
            is_float = true;
            self.pos += 1;
            let frac_start = self.pos;
            while let Some(b'0'..=b'9') = self.peek() {
                self.pos += 1;
            }
            if self.pos == frac_start {
                return Err(self.error("missing digits after decimal point"));
            }
        }

        if matches!(self.peek(), Some(b'e' | b'E')) {
            is_float = true;
            self.pos += 1;
            if matches!(self.peek(), Some(b'+' | b'-')) {
                self.pos += 1;
            }
            let exp_start = self.pos;
            while let Some(b'0'..=b'9') = self.peek() {
                self.pos += 1;
            }
            if self.pos == exp_start {
                return Err(self.error("missing exponent digits"));
            }
        }

        let number_text = &self.src[start..self.pos];
        if is_float {
            let value = number_text
                .parse::<f64>()
                .map_err(|_| self.error("invalid floating-point number"))?;
            if !value.is_finite() {
                return Err(self.error("non-finite number is not allowed"));
            }
            Ok(JsonValue::Float(value))
        } else {
            let value = number_text
                .parse::<i64>()
                .map_err(|_| self.error("integer out of i64 range"))?;
            Ok(JsonValue::Int(value))
        }
    }

    fn expect_keyword(&mut self, keyword: &str) -> Result<(), String> {
        let end = self.pos + keyword.len();
        if end > self.bytes.len() || &self.src[self.pos..end] != keyword {
            return Err(self.error(&format!("expected '{}'", keyword)));
        }
        self.pos = end;
        Ok(())
    }

    fn expect_byte(&mut self, expected: u8) -> Result<(), String> {
        match self.next_byte() {
            Some(b) if b == expected => Ok(()),
            _ => Err(self.error(&format!("expected '{}'", expected as char))),
        }
    }

    fn peek(&self) -> Option<u8> {
        self.bytes.get(self.pos).copied()
    }

    fn next_byte(&mut self) -> Option<u8> {
        let b = self.peek()?;
        self.pos += 1;
        Some(b)
    }

    fn skip_ws(&mut self) {
        while let Some(b) = self.peek() {
            if matches!(b, b' ' | b'\n' | b'\r' | b'\t') {
                self.pos += 1;
            } else {
                break;
            }
        }
    }

    fn error(&self, msg: &str) -> String {
        format!("JSON parse error at byte {}: {}", self.pos, msg)
    }
}

fn hex_digit(byte: u8) -> Option<u8> {
    match byte {
        b'0'..=b'9' => Some(byte - b'0'),
        b'a'..=b'f' => Some(byte - b'a' + 10),
        b'A'..=b'F' => Some(byte - b'A' + 10),
        _ => None,
    }
}