rest-sql 0.1.0 - Docs.rs

/// Byte offset range into the source string.
///
/// `start` is inclusive, `end` is exclusive (Rust slice convention).
/// `line_col` accuracy is byte-based: for ASCII input, column equals the
/// visual position. Multi-byte UTF-8 characters count as one column per
/// code point (char boundary) for line calculation, but `col` is still
/// byte-offset from the line start — annotate accordingly if you expose
/// this to users with non-ASCII input.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
    pub start: usize,
    pub end: usize,
}

impl Span {
    pub fn new(start: usize, end: usize) -> Self {
        Span { start, end }
    }

    /// Returns the (line, col) of `self.start`, both 1-based.
    pub fn line_col(&self, source: &str) -> (usize, usize) {
        let before = &source[..self.start.min(source.len())];
        let line = before.bytes().filter(|&b| b == b'\n').count() + 1;
        let col = match before.rfind('\n') {
            Some(nl) => self.start - nl,
            None => self.start + 1,
        };
        (line, col)
    }

    /// Returns the full source line that contains `self.start`.
    pub fn source_line<'a>(&self, source: &'a str) -> &'a str {
        let capped = self.start.min(source.len());
        let line_start = source[..capped].rfind('\n').map(|i| i + 1).unwrap_or(0);
        let line_end = source[line_start..]
            .find('\n')
            .map(|i| line_start + i)
            .unwrap_or(source.len());
        &source[line_start..line_end]
    }

    /// The source slice covered by this span.
    pub fn slice<'a>(&self, source: &'a str) -> &'a str {
        &source[self.start..self.end.min(source.len())]
    }

    /// Number of bytes in this span.
    pub fn len(&self) -> usize {
        self.end.saturating_sub(self.start)
    }

    pub fn is_empty(&self) -> bool {
        self.start >= self.end
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn line_col_single_line() {
        let src = "name==Alice";
        assert_eq!(Span::new(4, 6).line_col(src), (1, 5)); // "==" starts at byte 4
    }

    #[test]
    fn line_col_multi_line() {
        let src = "a==1\nb>2";
        assert_eq!(Span::new(5, 6).line_col(src), (2, 1)); // 'b' is first char on line 2
    }

    #[test]
    fn line_col_at_newline_boundary() {
        let src = "a\nb";
        assert_eq!(Span::new(2, 3).line_col(src), (2, 1));
    }

    #[test]
    fn source_line_single() {
        assert_eq!(Span::new(0, 4).source_line("name==Alice"), "name==Alice");
    }

    #[test]
    fn source_line_second_line() {
        let src = "first\nsecond";
        assert_eq!(Span::new(6, 10).source_line(src), "second");
    }

    #[test]
    fn slice_basic() {
        assert_eq!(Span::new(0, 4).slice("name==Alice"), "name");
        assert_eq!(Span::new(4, 6).slice("name==Alice"), "==");
    }

    #[test]
    fn len() {
        assert_eq!(Span::new(2, 5).len(), 3);
        assert_eq!(Span::new(5, 5).len(), 0);
    }

    // ── Unicode / multi-byte ──────────────────────────────────────────────────

    #[test]
    fn line_col_is_byte_based_with_multibyte_char() {
        // "é" = 2 bytes; "t" starts at byte 2 → col = 3 (byte-based, 1-indexed).
        // A char-based implementation would give col = 2, so this documents the behavior.
        let src = "étoile==1";
        assert_eq!(Span::new(2, 3).line_col(src), (1, 3));
    }

    #[test]
    fn source_line_with_accented_chars() {
        // source_line is a byte-range slice — it must return the full UTF-8 line intact.
        let src = "prénom==André\nville==Lyon";
        assert_eq!(Span::new(0, 6).source_line(src), "prénom==André");
    }

    #[test]
    fn source_line_second_line_with_cjk() {
        // Each CJK character is 3 bytes; line boundary detection must use byte positions.
        let src = "a==1\n日本語==test";
        assert_eq!(Span::new(5, 8).source_line(src), "日本語==test");
    }

    #[test]
    fn slice_multibyte_word() {
        // "café" = c(1)+a(1)+f(1)+é(2) = 5 bytes
        let src = "café==1";
        assert_eq!(Span::new(0, 5).slice(src), "café");
    }

    #[test]
    fn slice_emoji() {
        // "🚀" = 4 bytes (U+1F680)
        let src = r#"tag=="🚀""#;
        assert_eq!(Span::new(6, 10).slice(src), "🚀");
    }
}