sciforge 0.0.3

A comprehensive scientific computing library in pure Rust with zero dependencies
Documentation
use super::error::{YamlError, YamlErrorKind};
use super::lexer::{LineCursor, YamlLine};
use super::scalar::parse_scalar;
use super::value::YamlValue;

pub const DEFAULT_MAX_YAML_DEPTH: usize = 64;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct YamlLimits {
    pub max_depth: usize,
    pub max_scalar_len: usize,
    pub max_sequence_len: usize,
    pub max_mapping_len: usize,
    pub max_node_count: usize,
}

pub const DEFAULT_YAML_LIMITS: YamlLimits = YamlLimits {
    max_depth: DEFAULT_MAX_YAML_DEPTH,
    max_scalar_len: 64 * 1024,
    max_sequence_len: 16 * 1024,
    max_mapping_len: 16 * 1024,
    max_node_count: 128 * 1024,
};

pub struct YamlParser<'a> {
    cursor: LineCursor<'a>,
    limits: YamlLimits,
    nodes_seen: usize,
}

impl<'a> YamlParser<'a> {
    pub const fn new(bytes: &'a [u8]) -> Self {
        Self {
            cursor: LineCursor::new(bytes),
            limits: DEFAULT_YAML_LIMITS,
            nodes_seen: 0,
        }
    }

    pub const fn with_max_depth(mut self, max_depth: usize) -> Self {
        self.limits.max_depth = max_depth;
        self
    }

    pub const fn with_limits(mut self, limits: YamlLimits) -> Self {
        self.limits = limits;
        self
    }

    pub fn parse(mut self) -> Result<YamlValue<'a>, YamlError> {
        let first = self
            .cursor
            .peek()?
            .ok_or(YamlError::new(YamlErrorKind::Eof, self.cursor.position()))?;
        self.parse_node(first.indent, 0)
    }

    pub fn validate(mut self) -> Result<(), YamlError> {
        let first = self
            .cursor
            .peek()?
            .ok_or(YamlError::new(YamlErrorKind::Eof, self.cursor.position()))?;
        self.parse_node(first.indent, 0)?;
        if self.cursor.peek()?.is_some() {
            let line = self.cursor.peek()?.expect("peek checked is_some");
            return Err(YamlError::new(YamlErrorKind::UnexpectedToken, line.offset));
        }
        Ok(())
    }

    fn parse_node(&mut self, base_indent: usize, depth: usize) -> Result<YamlValue<'a>, YamlError> {
        if depth > self.limits.max_depth {
            return Err(YamlError::new(
                YamlErrorKind::MaxDepthExceeded,
                self.cursor.position(),
            ));
        }

        self.nodes_seen = self.nodes_seen.saturating_add(1);
        if self.nodes_seen > self.limits.max_node_count {
            return Err(YamlError::new(
                YamlErrorKind::MaxNodeCountExceeded,
                self.cursor.position(),
            ));
        }

        let line = self
            .cursor
            .peek()?
            .ok_or(YamlError::new(YamlErrorKind::Eof, self.cursor.position()))?;

        if line.indent < base_indent {
            return Err(YamlError::new(
                YamlErrorKind::InvalidIndentation,
                line.offset,
            ));
        }
        if line.indent > base_indent {
            return Err(YamlError::new(
                YamlErrorKind::InvalidIndentation,
                line.offset,
            ));
        }

        if is_sequence_entry(line.content) {
            self.parse_sequence(base_indent, depth + 1)
        } else if has_mapping_separator(line.content) {
            self.parse_mapping(base_indent, depth + 1)
        } else {
            self.parse_scalar_line(line)
        }
    }

    fn parse_scalar_line(&mut self, line: YamlLine<'a>) -> Result<YamlValue<'a>, YamlError> {
        if line.content.len() > self.limits.max_scalar_len {
            return Err(YamlError::new(
                YamlErrorKind::MaxScalarLengthExceeded,
                line.offset,
            ));
        }
        self.cursor.next()?;
        parse_scalar(line.content, line.offset)
    }

    fn parse_sequence(
        &mut self,
        base_indent: usize,
        depth: usize,
    ) -> Result<YamlValue<'a>, YamlError> {
        let mut items = Vec::new();

        loop {
            let Some(line) = self.cursor.peek()? else {
                break;
            };
            if line.indent < base_indent {
                break;
            }
            if line.indent > base_indent {
                return Err(YamlError::new(
                    YamlErrorKind::InvalidIndentation,
                    line.offset,
                ));
            }
            if !is_sequence_entry(line.content) {
                break;
            }

            let item_text = line.content[1..].trim_start();
            self.cursor.next()?;

            if !item_text.is_empty() {
                if item_text.len() > self.limits.max_scalar_len {
                    return Err(YamlError::new(
                        YamlErrorKind::MaxScalarLengthExceeded,
                        line.offset,
                    ));
                }
                if has_mapping_separator(item_text) {
                    let (key, value_part) = split_mapping_entry(item_text).unwrap();
                    let mut entries = Vec::new();
                    let val = if value_part.is_empty() {
                        let nested = self.cursor.peek()?;
                        if let Some(nl) = nested
                            && nl.indent > base_indent
                        {
                            self.parse_node(nl.indent, depth)?
                        } else {
                            YamlValue::Null
                        }
                    } else if value_part == "[]" {
                        YamlValue::Sequence(Vec::new())
                    } else {
                        parse_scalar(value_part, line.offset)?
                    };
                    entries.push((key, val));
                    loop {
                        let Some(next) = self.cursor.peek()? else {
                            break;
                        };
                        if next.indent <= base_indent || is_sequence_entry(next.content) {
                            break;
                        }
                        if !has_mapping_separator(next.content) {
                            break;
                        }
                        let (nk, nv) = split_mapping_entry(next.content).unwrap();
                        self.cursor.next()?;
                        let val = if nv.is_empty() {
                            let nested = self.cursor.peek()?;
                            if let Some(nl) = nested
                                && nl.indent > next.indent
                            {
                                self.parse_node(nl.indent, depth)?
                            } else {
                                YamlValue::Null
                            }
                        } else if nv == "[]" {
                            YamlValue::Sequence(Vec::new())
                        } else {
                            parse_scalar(nv, next.offset)?
                        };
                        entries.push((nk, val));
                    }
                    items.push(YamlValue::Mapping(entries));
                } else {
                    items.push(parse_scalar(item_text, line.offset)?);
                }
            } else {
                let nested = self
                    .cursor
                    .peek()?
                    .ok_or(YamlError::new(YamlErrorKind::Eof, line.offset))?;
                if nested.indent <= base_indent {
                    return Err(YamlError::new(
                        YamlErrorKind::InvalidIndentation,
                        nested.offset,
                    ));
                }
                items.push(self.parse_node(nested.indent, depth)?);
            }

            if items.len() > self.limits.max_sequence_len {
                return Err(YamlError::new(
                    YamlErrorKind::MaxSequenceLengthExceeded,
                    line.offset,
                ));
            }
        }

        if items.is_empty() {
            return Err(YamlError::new(
                YamlErrorKind::UnexpectedToken,
                self.cursor.position(),
            ));
        }

        Ok(YamlValue::Sequence(items))
    }

    fn parse_mapping(
        &mut self,
        base_indent: usize,
        depth: usize,
    ) -> Result<YamlValue<'a>, YamlError> {
        let mut entries = Vec::new();

        loop {
            let Some(line) = self.cursor.peek()? else {
                break;
            };
            if line.indent < base_indent {
                break;
            }
            if line.indent > base_indent {
                return Err(YamlError::new(
                    YamlErrorKind::InvalidIndentation,
                    line.offset,
                ));
            }

            let Some((key, value_part)) = split_mapping_entry(line.content) else {
                break;
            };

            if key.is_empty() {
                return Err(YamlError::new(
                    YamlErrorKind::InvalidMappingKey,
                    line.offset,
                ));
            }
            if key.len() > self.limits.max_scalar_len {
                return Err(YamlError::new(
                    YamlErrorKind::MaxScalarLengthExceeded,
                    line.offset,
                ));
            }

            self.cursor.next()?;

            let val = if !value_part.is_empty() {
                if value_part.len() > self.limits.max_scalar_len {
                    return Err(YamlError::new(
                        YamlErrorKind::MaxScalarLengthExceeded,
                        line.offset,
                    ));
                }
                if value_part == "[]" {
                    YamlValue::Sequence(Vec::new())
                } else {
                    parse_scalar(value_part, line.offset)?
                }
            } else {
                let next = self.cursor.peek()?;
                if let Some(next_line) = next
                    && next_line.indent > base_indent
                {
                    self.parse_node(next_line.indent, depth)?
                } else {
                    YamlValue::Null
                }
            };

            entries.push((key, val));

            if entries.len() > self.limits.max_mapping_len {
                return Err(YamlError::new(
                    YamlErrorKind::MaxMappingLengthExceeded,
                    line.offset,
                ));
            }
        }

        if entries.is_empty() {
            return Err(YamlError::new(
                YamlErrorKind::UnexpectedToken,
                self.cursor.position(),
            ));
        }

        Ok(YamlValue::Mapping(entries))
    }
}

fn is_sequence_entry(content: &str) -> bool {
    content.starts_with('-') && (content.len() == 1 || content.as_bytes()[1] == b' ')
}

fn has_mapping_separator(content: &str) -> bool {
    split_mapping_entry(content).is_some()
}

fn split_mapping_entry(content: &str) -> Option<(&str, &str)> {
    let bytes = content.as_bytes();
    let mut idx = 0usize;
    while idx < bytes.len() {
        if bytes[idx] == b':' {
            let key = content[..idx].trim();
            let value = content[idx + 1..].trim_start();
            if key.is_empty() {
                return None;
            }
            return Some((key, value));
        }
        idx += 1;
    }
    None
}

pub fn parse_yaml(bytes: &[u8]) -> Result<YamlValue<'_>, YamlError> {
    YamlParser::new(bytes).parse()
}

pub fn parse_yaml_with_max_depth(
    bytes: &[u8],
    max_depth: usize,
) -> Result<YamlValue<'_>, YamlError> {
    YamlParser::new(bytes).with_max_depth(max_depth).parse()
}

pub fn parse_yaml_with_limits(
    bytes: &[u8],
    limits: YamlLimits,
) -> Result<YamlValue<'_>, YamlError> {
    YamlParser::new(bytes).with_limits(limits).parse()
}

pub fn validate_yaml(bytes: &[u8]) -> Result<(), YamlError> {
    YamlParser::new(bytes).validate()
}