Skip to main content

marco_core/parser/
position.rs

1// Position tracking for editor intelligence integration (line/column mapping)
2
3use serde::{Deserialize, Serialize};
4
5/// Position in a source document using multiple coordinate systems.
6///
7/// This struct tracks a position using three different representations:
8/// - **Line/Column**: CommonMark-style 1-based coordinates
9/// - **Absolute Offset**: Byte offset from document start
10///
11/// # Coordinate Systems
12///
13/// ## Line/Column (Primary for GTK Integration)
14/// - `line`: 1-based line number (CommonMark convention)
15/// - `column`: 1-based byte offset from the start of the line
16///
17/// **Important**: `column` is a BYTE offset, not a character offset!
18/// - For ASCII: byte offset == character offset
19/// - For UTF-8: Multi-byte characters cause divergence
20///   - Example: "Tëst" has 'ë' at byte columns 3-4, but char column 2
21///   - Example: "🎨" (emoji) occupies 4 bytes but is 1 character
22///
23/// ## Absolute Offset (For Debugging Only)
24/// - `offset`: Absolute byte offset from document start
25/// - **Do NOT use** for GTK TextIter positioning!
26/// - Use `line` and `column` instead for robust conversion
27///
28/// # Usage with GTK
29///
30/// When converting to GTK TextIter:
31/// 1. Convert line: `parser_line (1-based)` → `gtk_line (0-based)`
32/// 2. Get line text from GTK buffer
33/// 3. Convert column: `byte_offset → char_offset` using `char_indices()`
34/// 4. Set position: `iter_at_line(gtk_line).set_line_offset(char_offset)`
35///
36/// See the host editor's cursor-conversion bridge for a reference implementation
37/// of byte-to-character offset mapping.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub struct Position {
40    /// Line number (1-based, CommonMark convention)
41    pub line: usize,
42
43    /// Column as byte offset from line start (1-based, CommonMark convention)
44    ///
45    /// **Note**: This is NOT a character offset!
46    /// Multi-byte UTF-8 characters cause byte offsets to differ from character positions.
47    pub column: usize,
48
49    /// Absolute byte offset from document start
50    ///
51    /// **For debugging/logging only** - do not use for GTK positioning!
52    pub offset: usize,
53}
54
55/// A span representing a range in the source document.
56///
57/// Spans are inclusive of the start position and exclusive of the end position.
58/// This matches CommonMark and most parser conventions.
59///
60/// # Example
61///
62/// For the text "**bold**":
63/// - `start`: Position at the first '*'
64/// - `end`: Position after the last '*' (one past the last character)
65///
66/// # Multi-line Spans
67///
68/// For multi-line content like code blocks, for example a fenced Rust code block,
69/// the inner code might look like:
70///
71/// ```text
72/// fn main() {
73/// }
74/// ```
75///
76/// - `start.line`: Line of opening backticks
77/// - `end.line`: Line after closing backticks
78/// - Columns are byte offsets within their respective lines
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
80pub struct Span {
81    /// Start position (inclusive)
82    pub start: Position,
83
84    /// End position (exclusive)
85    pub end: Position,
86}
87
88impl Position {
89    /// Create a new source position from 1-based line/column and absolute offset.
90    pub fn new(line: usize, column: usize, offset: usize) -> Self {
91        Self {
92            line,
93            column,
94            offset,
95        }
96    }
97
98    /// Compute the absolute byte offset of the start of this position's line.
99    ///
100    /// Uses the invariant that `column` is a 1-based byte offset from the
101    /// start of the line, and `offset` is the absolute byte offset from the
102    /// start of the document. The formula is:
103    ///
104    /// line_start_offset = offset - (column - 1)
105    ///
106    /// This function uses saturating math to avoid underflow in case of
107    /// malformed positions.
108    pub fn line_start_offset(&self) -> usize {
109        self.offset.saturating_sub(self.column.saturating_sub(1))
110    }
111}
112
113impl Span {
114    /// Create a new span from inclusive start and exclusive end positions.
115    pub fn new(start: Position, end: Position) -> Self {
116        Self { start, end }
117    }
118
119    /// Return the absolute byte offset of the start of the span's first line.
120    ///
121    /// This is a convenience wrapper around `Position::line_start_offset` for
122    /// the span's `start` position.
123    pub fn start_line_offset(&self) -> usize {
124        self.start.line_start_offset()
125    }
126
127    /// Return the absolute byte offset of the start of the span's end line.
128    /// Useful when expanding a span to include the whole end line.
129    pub fn end_line_offset(&self) -> usize {
130        self.end.line_start_offset()
131    }
132}
133
134/// Convenience helper: compute the absolute byte offset of the start of the
135/// given span's starting line.
136///
137/// This is exposed as a free function to simplify callers that don't have a
138/// `Span` method in scope or prefer a function name matching the refactor plan.
139pub fn compute_line_start_offset(span: &Span) -> usize {
140    span.start_line_offset()
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_line_start_offset_simple() {
149        let pos = Position::new(1, 1, 0);
150        assert_eq!(pos.line_start_offset(), 0);
151
152        let pos2 = Position::new(1, 5, 4);
153        // offset 4, column 5 -> line start = 4 - (5 - 1) = 0
154        assert_eq!(pos2.line_start_offset(), 0);
155
156        let pos3 = Position::new(3, 4, 25);
157        // offset 25, column 4 -> line start = 25 - (4 - 1) = 22
158        assert_eq!(pos3.line_start_offset(), 22);
159    }
160
161    #[test]
162    fn test_span_line_offsets() {
163        let start = Position::new(2, 3, 10);
164        let end = Position::new(4, 1, 40);
165        let span = Span::new(start, end);
166
167        assert_eq!(span.start_line_offset(), 10 - (3 - 1));
168        assert_eq!(span.end_line_offset(), 40);
169    }
170}