granit-parser 0.0.3

A YAML parser in pure Rust with comment and style support
Documentation
#![allow(clippy::bool_assert_comparison)]
#![allow(clippy::float_cmp)]
use granit_parser::{Event, Marker, Parser, ScanError};

fn char_index_to_byte_index(s: &str, char_index: usize) -> usize {
    if char_index == 0 {
        return 0;
    }
    s.char_indices()
        .nth(char_index)
        .map_or_else(|| s.len(), |(byte, _)| byte)
}

fn span_offsets(input: &str, start: Marker, end: Marker) -> (usize, usize) {
    let start_b = start
        .byte_offset()
        .unwrap_or_else(|| char_index_to_byte_index(input, start.index()));
    let end_b = end
        .byte_offset()
        .unwrap_or_else(|| char_index_to_byte_index(input, end.index()));
    (start_b, end_b)
}

/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
fn run_parser_and_deref_scalar_spans(input: &str) -> Result<Vec<(String, String)>, ScanError> {
    let mut events = vec![];
    for x in Parser::new_from_str(input) {
        let x = x?;
        if let Event::Scalar(s, ..) = x.0 {
            let (start, end) = span_offsets(input, x.1.start, x.1.end);
            let input_s = &input[start..end];
            events.push((s.into(), input_s.to_string()));
        }
    }
    Ok(events)
}

/// Run the parser through the string, returning all the scalars, and collecting their spans to strings.
fn run_parser_and_deref_seq_spans(input: &str) -> Result<Vec<String>, ScanError> {
    let mut events = vec![];
    let mut start_stack = vec![];
    for x in Parser::new_from_str(input) {
        let x = x?;
        match x.0 {
            Event::SequenceStart(..) => start_stack.push(x.1.start),
            Event::SequenceEnd => {
                let start = start_stack.pop().unwrap();
                let (start, end) = span_offsets(input, start, x.1.end);
                let input_s = &input[start..end];
                events.push(input_s.to_string());
            }
            _ => {}
        }
    }
    Ok(events)
}

fn deref_pairs(pairs: &[(String, String)]) -> Vec<(&str, &str)> {
    pairs
        .iter()
        .map(|(a, b)| (a.as_str(), b.as_str()))
        .collect()
}

#[test]
fn span_helpers_report_length_empty_and_byte_range() {
    let span = granit_parser::Span::new(
        Marker::new(2, 1, 2).with_byte_offset(Some(5)),
        Marker::new(6, 1, 6).with_byte_offset(Some(13)),
    );

    assert_eq!(span.len(), 4);
    assert!(!span.is_empty());
    assert_eq!(span.byte_range(), Some(5..13));

    let empty = granit_parser::Span::empty(Marker::new(6, 1, 6).with_byte_offset(Some(13)));
    assert!(empty.is_empty());
    assert_eq!(empty.byte_range(), Some(13..13));

    let without_byte_offsets = granit_parser::Span::new(Marker::new(0, 1, 0), Marker::new(1, 1, 1));
    assert_eq!(without_byte_offsets.byte_range(), None);
}

#[test]
fn span_slice_returns_source_text_for_valid_byte_ranges() {
    let source = "key: value";
    let span = granit_parser::Span::new(
        Marker::new(5, 1, 5).with_byte_offset(Some(5)),
        Marker::new(10, 1, 10).with_byte_offset(Some(10)),
    );

    assert_eq!(span.slice(source), Some("value"));
}

#[test]
fn span_slice_handles_non_ascii_byte_ranges() {
    let source = "a: 你好";
    let span = granit_parser::Span::new(
        Marker::new(3, 1, 3).with_byte_offset(Some(3)),
        Marker::new(5, 1, 5).with_byte_offset(Some(source.len())),
    );
    let invalid_boundary = granit_parser::Span::new(
        Marker::new(3, 1, 3).with_byte_offset(Some(4)),
        Marker::new(5, 1, 5).with_byte_offset(Some(source.len())),
    );

    assert_eq!(span.slice(source), Some("你好"));
    assert_eq!(invalid_boundary.slice(source), None);
}

#[test]
fn parser_spans_use_byte_offsets_for_non_ascii_input() {
    let source = "a: 你好\nb: c\n";
    let scalars: Vec<_> = Parser::new_from_str(source)
        .filter_map(|parsed| {
            let (event, span) = parsed.unwrap();
            if let Event::Scalar(value, ..) = event {
                Some((
                    value.into_owned(),
                    span.byte_range(),
                    span.slice(source).map(std::borrow::ToOwned::to_owned),
                ))
            } else {
                None
            }
        })
        .collect();

    assert_eq!(
        scalars,
        vec![
            ("a".to_string(), Some(0..1), Some("a".to_string())),
            ("你好".to_string(), Some(3..9), Some("你好".to_string())),
            ("b".to_string(), Some(10..11), Some("b".to_string())),
            ("c".to_string(), Some(13..14), Some("c".to_string())),
        ]
    );
}

#[test]
fn span_slice_handles_empty_spans() {
    let source = "key: value";
    let empty = granit_parser::Span::empty(Marker::new(4, 1, 4).with_byte_offset(Some(4)));

    assert_eq!(empty.slice(source), Some(""));
}

#[test]
fn span_slice_returns_none_for_buffered_input_spans_without_byte_offsets() {
    let source = "foo: bar";
    let mut scalar_slices = Vec::new();

    for parsed in Parser::new_from_iter(source.chars()) {
        let (event, span) = parsed.unwrap();
        if matches!(event, Event::Scalar(..)) {
            scalar_slices.push(span.slice(source));
        }
    }

    assert_eq!(scalar_slices, [None, None]);
}

#[test]
fn test_plain() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar").unwrap()),
        [("foo", "foo"), ("bar", "bar"),]
    );
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: bar ").unwrap()),
        [("foo", "foo"), ("bar", "bar"),]
    );
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo :  \t  bar\t ").unwrap()),
        [("foo", "foo"), ("bar", "bar"),]
    );

    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo :  \n  - bar\n  - baz\n ").unwrap()),
        [("foo", "foo"), ("bar", "bar"), ("baz", "baz")]
    );
}

#[test]
fn test_plain_utf8() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("a: \u{4F60}\u{5273}").unwrap()),
        [("a", "a"), ("\u{4F60}\u{5273}", "\u{4F60}\u{5273}")]
    );
}

#[test]
fn test_quoted() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar""#).unwrap()),
        [("foo", "foo"), ("bar", r#""bar""#),]
    );
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans(r"foo: 'bar'").unwrap()),
        [("foo", "foo"), ("bar", r"'bar'"),]
    );

    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans(r#"foo: "bar ""#).unwrap()),
        [("foo", "foo"), ("bar ", r#""bar ""#),]
    );
}

#[test]
fn test_literal() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n  bar").unwrap()),
        [("foo", "foo"), ("bar\n", "bar"),]
    );
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n  bar\n  more").unwrap()),
        [("foo", "foo"), ("bar\nmore\n", "bar\n  more"),]
    );
}

#[test]
fn test_block() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n  bar").unwrap()),
        [("foo", "foo"), ("bar\n", "bar"),]
    );
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n  bar\n  more").unwrap()),
        [("foo", "foo"), ("bar more\n", "bar\n  more"),]
    );
}

#[test]
fn test_seq() {
    assert_eq!(
        run_parser_and_deref_seq_spans("[a, b]").unwrap(),
        ["[a, b]"]
    );
    assert_eq!(
        run_parser_and_deref_seq_spans("- a\n- b").unwrap(),
        ["- a\n- b"]
    );
    assert_eq!(
        run_parser_and_deref_seq_spans("foo:\n  - a\n  - b").unwrap(),
        ["- a\n  - b"]
    );
    assert_eq!(
        run_parser_and_deref_seq_spans("foo:\n  - a\n  - bar:\n    - b\n    - c").unwrap(),
        ["b\n    - c", "- a\n  - bar:\n    - b\n    - c"]
    );
}

#[test]
fn test_literal_utf8() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: |\n  \u{4F60}\u{5273}").unwrap()),
        [("foo", "foo"), ("\u{4F60}\u{5273}\n", "\u{4F60}\u{5273}"),]
    );
    assert_eq!(
        deref_pairs(
            &run_parser_and_deref_scalar_spans(
                "foo: |\n  one:\u{4F60}\u{5273}\n  two:\u{4F60}\u{5273}"
            )
            .unwrap()
        ),
        [
            ("foo", "foo"),
            (
                "one:\u{4F60}\u{5273}\ntwo:\u{4F60}\u{5273}\n",
                "one:\u{4F60}\u{5273}\n  two:\u{4F60}\u{5273}"
            ),
        ]
    );
}

#[test]
fn test_block_utf8() {
    assert_eq!(
        deref_pairs(&run_parser_and_deref_scalar_spans("foo: >\n  \u{4F60}\u{5273}").unwrap()),
        [("foo", "foo"), ("\u{4F60}\u{5273}\n", "\u{4F60}\u{5273}")],
    );
    assert_eq!(
        deref_pairs(
            &run_parser_and_deref_scalar_spans(
                "foo: >\n  one:\u{4F60}\u{5273}\n  two:\u{4F60}\u{5273}"
            )
            .unwrap()
        ),
        [
            ("foo", "foo"),
            (
                "one:\u{4F60}\u{5273} two:\u{4F60}\u{5273}\n",
                "one:\u{4F60}\u{5273}\n  two:\u{4F60}\u{5273}"
            )
        ],
    );
}

#[test]
fn span_slice_for_quoted_scalar_excludes_trailing_comment() {
    let yaml = "key: \"value\" # comment\n";
    let slices: Vec<_> = Parser::new_from_str(yaml)
        .filter_map(|parsed| {
            let (event, span) = parsed.unwrap();
            matches!(event, Event::Scalar(..)).then(|| span.slice(yaml).unwrap().to_string())
        })
        .collect();

    assert_eq!(slices, vec!["key".to_string(), "\"value\"".to_string()]);
}

#[test]
fn span_slice_for_single_quoted_scalar_excludes_trailing_comment() {
    let yaml = "key: 'value' # comment\n";
    let slices: Vec<_> = Parser::new_from_str(yaml)
        .filter_map(|parsed| {
            let (event, span) = parsed.unwrap();
            matches!(event, Event::Scalar(..)).then(|| span.slice(yaml).unwrap().to_string())
        })
        .collect();

    assert_eq!(slices, vec!["key".to_string(), "'value'".to_string()]);
}

#[test]
fn test_flow_sequence_explicit_mapping_end_span_order() {
    let input = "[? a: [b], ? c: &x d, ? e: !t f]";
    let mut last_end = 0usize;

    for parsed in Parser::new_from_str(input) {
        let (_event, span) = parsed.unwrap();
        let (_, end) = span_offsets(input, span.start, span.end);
        assert!(
            end >= last_end,
            "event end span regressed: current end {end} < previous end {last_end}"
        );
        last_end = end;
    }
}

#[test]
fn test_flow_sequence_explicit_empty_mapping_value_end_span_order() {
    let input = "[? a:, ? b: c]";
    let mut last_end = 0usize;

    for parsed in Parser::new_from_str(input) {
        let (_event, span) = parsed.unwrap();
        let (_, end) = span_offsets(input, span.start, span.end);
        assert!(
            end >= last_end,
            "event end span regressed: current end {end} < previous end {last_end}"
        );
        last_end = end;
    }
}