rpm-spec 0.3.4 - Docs.rs

//! Parser input type and span helpers.
//!
//! The parser consumes [`Input<'a>`], a `LocatedSpan<&'a str>` from
//! `nom_locate`. Spans are produced by [`span_between`] from a "before"
//! and an "after" input cursor — `before` is the position at the start of
//! the matched fragment, `after` is what nom returned as remaining input.

use nom_locate::LocatedSpan;

use crate::ast::Span;

/// Input fed to every parser combinator. The wrapper carries byte-offset
/// and 1-based line/column information for free.
pub type Input<'a> = LocatedSpan<&'a str>;

/// Build an [`Input`] over an entire source string.
pub fn input(source: &str) -> Input<'_> {
    Input::new(source)
}

/// Build a [`Span`] that covers the bytes consumed between `before` and
/// `after`, where both are nom-locate cursors into the same source.
///
/// The convention follows nom's normal flow: `before` is the input you
/// matched against; `after` is the input returned by the combinator (i.e.
/// what is still unconsumed). The resulting span is half-open
/// `[before, after)`.
pub fn span_between(before: &Input<'_>, after: &Input<'_>) -> Span {
    let start_byte = before.location_offset();
    let end_byte = after.location_offset();
    Span::new(
        start_byte,
        end_byte,
        before.location_line(),
        before.get_column() as u32,
        after.location_line(),
        after.get_column() as u32,
    )
}

/// Build a [`Span`] of zero length at the cursor position. Useful for
/// diagnostics that point at a position rather than a range.
pub fn span_at(cursor: &Input<'_>) -> Span {
    let byte = cursor.location_offset();
    let line = cursor.location_line();
    let col = cursor.get_column() as u32;
    Span::new(byte, byte, line, col, line, col)
}

/// Build a [`Span`] over a single line's text, **excluding** the
/// trailing line-ending bytes.
///
/// `start` is the cursor at the beginning of the line; `text` is the
/// `not_line_ending` capture (i.e. the line content without `\n` or
/// `\r\n`). We can't just use `span_between(start, after_line)` because
/// `after_line` sits on the *next* line — that would render in
/// `codespan` as a multi-line carat covering the unrelated line below.
///
/// Columns are **byte offsets** within the line, matching the
/// [`Span`] documented convention and `nom_locate::get_column()`.
/// Using `chars().count()` here would misalign the underline on lines
/// containing multibyte UTF-8 (e.g. a Cyrillic `Summary:` value).
pub fn span_for_line(start: &Input<'_>, text: &Input<'_>) -> Span {
    let start_byte = start.location_offset();
    let end_byte = text.location_offset() + text.fragment().len();
    let line = start.location_line();
    let start_col = start.get_column() as u32;
    let end_col = start_col + text.fragment().len() as u32;
    Span::new(start_byte, end_byte, line, start_col, line, end_col)
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::super::util::physical_line;

    #[test]
    fn span_for_line_ascii_byte_columns() {
        let s = Input::new("hello world\nrest");
        let (_rest, text) = physical_line(s).unwrap();
        let span = span_for_line(&s, &text);
        assert_eq!(span.start_byte, 0);
        assert_eq!(span.end_byte, 11);
        assert_eq!(span.start_line, 1);
        assert_eq!(span.end_line, 1);
        assert_eq!(span.start_column, 1);
        assert_eq!(span.end_column, 12);
    }

    #[test]
    fn span_for_line_utf8_uses_byte_columns() {
        // Three Greek letters: each takes 2 bytes in UTF-8, total 6 bytes,
        // 3 characters. Columns are documented as byte offsets — using
        // `chars().count()` would yield `end_column = start_column + 3`,
        // misaligning the codespan underline by 3 cells.
        let s = Input::new("αβγ\nrest");
        let (_rest, text) = physical_line(s).unwrap();
        let span = span_for_line(&s, &text);
        assert_eq!(span.end_byte - span.start_byte, 6);
        assert_eq!(span.end_column - span.start_column, 6);
    }

    #[test]
    fn span_for_line_stops_before_newline() {
        // The span must not include the trailing `\n` — otherwise the
        // codespan carat overlaps the next physical line.
        let s = Input::new("first\nsecond\n");
        let (_rest, text) = physical_line(s).unwrap();
        let span = span_for_line(&s, &text);
        assert_eq!(span.end_byte, 5);
        assert_eq!(span.start_line, span.end_line);
    }
}