Skip to main content

lex_types/
position.rs

1//! Source positions attached to `TypeError`s (#306 slice 1).
2//!
3//! LLM-driven repair flows need errors that point at a concrete
4//! file:line:col, not just a NodeId. `Position` carries that triple;
5//! `TypeError` variants gain an `Option<Position>` that the type
6//! checker fills in via [`check_program_with_positions`](crate::check_program_with_positions).
7//!
8//! Slice 1 ships function-level granularity: every error from a
9//! given `fn` is stamped with that function's start position. Slice
10//! 1.5 will plumb per-expression spans through the canonicalizer
11//! so deep-body errors land on the offending sub-expression.
12
13use serde::{Deserialize, Serialize};
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
16pub struct Position {
17    /// Source file path, when known. `lex check`'s CLI fills this
18    /// from the path argument; programmatic callers may leave it
19    /// `None` and still benefit from line:col.
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub file: Option<String>,
22    /// 1-based line number.
23    pub line: u32,
24    /// 1-based column number, in chars (not bytes).
25    pub col: u32,
26}
27
28impl Position {
29    pub fn new(file: Option<String>, line: u32, col: u32) -> Self {
30        Self { file, line, col }
31    }
32
33    /// Render as `file:line:col` (or `line:col` if no file).
34    pub fn render(&self) -> String {
35        match &self.file {
36            Some(f) => format!("{f}:{}:{}", self.line, self.col),
37            None => format!("{}:{}", self.line, self.col),
38        }
39    }
40}
41
42/// Translate a byte offset into the source string into a 1-based
43/// `(line, col)`. Lines split on `\n`; columns count chars (not
44/// bytes), so multi-byte UTF-8 doesn't double-count. Out-of-range
45/// offsets clamp to end-of-source.
46pub fn byte_to_line_col(src: &str, byte_offset: usize) -> (u32, u32) {
47    let cap = byte_offset.min(src.len());
48    let mut line: u32 = 1;
49    let mut last_line_start = 0usize;
50    for (i, b) in src.as_bytes().iter().enumerate().take(cap) {
51        if *b == b'\n' {
52            line += 1;
53            last_line_start = i + 1;
54        }
55    }
56    let col = src[last_line_start..cap].chars().count() as u32 + 1;
57    (line, col)
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63
64    #[test]
65    fn line_col_at_start_of_file() {
66        assert_eq!(byte_to_line_col("hello", 0), (1, 1));
67    }
68
69    #[test]
70    fn line_col_after_newline() {
71        // "ab\ncd" — offset 3 is 'c' on line 2 col 1.
72        assert_eq!(byte_to_line_col("ab\ncd", 3), (2, 1));
73    }
74
75    #[test]
76    fn line_col_mid_second_line() {
77        // "ab\ncde" — offset 5 points at 'e' (c=col 1, d=2, e=3).
78        assert_eq!(byte_to_line_col("ab\ncde", 5), (2, 3));
79    }
80
81    #[test]
82    fn line_col_with_multibyte_chars() {
83        // "héllo" — 'é' is 2 bytes; offset past it should still
84        // count as col 3 (chars), not col 4 (bytes).
85        let s = "héllo";
86        let off = s.find('l').unwrap();
87        let (line, col) = byte_to_line_col(s, off);
88        assert_eq!((line, col), (1, 3));
89    }
90
91    #[test]
92    fn out_of_range_offset_clamps() {
93        let (line, col) = byte_to_line_col("abc", 999);
94        assert_eq!((line, col), (1, 4));
95    }
96
97    #[test]
98    fn position_renders_with_and_without_file() {
99        let p = Position::new(Some("hello.lex".into()), 12, 3);
100        assert_eq!(p.render(), "hello.lex:12:3");
101        let p = Position::new(None, 5, 7);
102        assert_eq!(p.render(), "5:7");
103    }
104}