Skip to main content

marco_core/parser/
position.rs

1// Position tracking for editor intelligence integration (line/column mapping)
2
3use serde::{Deserialize, Serialize};
4
5/// Position in a source document using multiple coordinate systems.
6///
7/// This struct tracks a position using three different representations:
8/// - **Line/Column**: CommonMark-style 1-based coordinates
9/// - **Absolute Offset**: Byte offset from document start
10///
11/// # Coordinate Systems
12///
13/// ## Line/Column (Primary for GTK Integration)
14/// - `line`: 1-based line number (CommonMark convention)
15/// - `column`: 1-based byte offset from the start of the line
16///
17/// **Important**: `column` is a BYTE offset, not a character offset!
18/// - For ASCII: byte offset == character offset
19/// - For UTF-8: Multi-byte characters cause divergence
20///   - Example: "Tëst" has 'ë' at byte columns 3-4, but char column 2
21///   - Example: "🎨" (emoji) occupies 4 bytes but is 1 character
22///
23/// ## Absolute Offset (For Debugging Only)
24/// - `offset`: Absolute byte offset from document start
25/// - **Do NOT use** for GTK TextIter positioning!
26/// - Use `line` and `column` instead for robust conversion
27///
28/// # Usage with GTK
29///
30/// When converting to GTK TextIter:
31/// 1. Convert line: `parser_line (1-based)` → `gtk_line (0-based)`
32/// 2. Get line text from GTK buffer
33/// 3. Convert column: `byte_offset → char_offset` using `char_indices()`
34/// 4. Set position: `iter_at_line(gtk_line).set_line_offset(char_offset)`
35///
36/// See `marco/src/components/editor/intelligence_integration.rs::position_to_iter()`
37/// for the reference implementation.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub struct Position {
40    /// Line number (1-based, CommonMark convention)
41    pub line: usize,
42
43    /// Column as byte offset from line start (1-based, CommonMark convention)
44    ///
45    /// **Note**: This is NOT a character offset!
46    /// Multi-byte UTF-8 characters cause byte offsets to differ from character positions.
47    pub column: usize,
48
49    /// Absolute byte offset from document start
50    ///
51    /// **For debugging/logging only** - do not use for GTK positioning!
52    pub offset: usize,
53}
54
55/// A span representing a range in the source document.
56///
57/// Spans are inclusive of the start position and exclusive of the end position.
58/// This matches CommonMark and most parser conventions.
59///
60/// # Example
61///
62/// For the text "**bold**":
63/// - `start`: Position at the first '*'
64/// - `end`: Position after the last '*' (one past the last character)
65///
66/// # Multi-line Spans
67///
68/// For multi-line content like code blocks, for example a fenced Rust code block,
69/// the inner code might look like:
70///
71/// ```text
72/// fn main() {
73/// }
74/// ```
75///
76/// - `start.line`: Line of opening backticks
77/// - `end.line`: Line after closing backticks
78/// - Columns are byte offsets within their respective lines
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
80pub struct Span {
81    /// Start position (inclusive)
82    pub start: Position,
83
84    /// End position (exclusive)
85    pub end: Position,
86}
87
88impl Position {
89    pub fn new(line: usize, column: usize, offset: usize) -> Self {
90        Self {
91            line,
92            column,
93            offset,
94        }
95    }
96
97    /// Compute the absolute byte offset of the start of this position's line.
98    ///
99    /// Uses the invariant that `column` is a 1-based byte offset from the
100    /// start of the line, and `offset` is the absolute byte offset from the
101    /// start of the document. The formula is:
102    ///
103    /// line_start_offset = offset - (column - 1)
104    ///
105    /// This function uses saturating math to avoid underflow in case of
106    /// malformed positions.
107    pub fn line_start_offset(&self) -> usize {
108        self.offset.saturating_sub(self.column.saturating_sub(1))
109    }
110}
111
112impl Span {
113    pub fn new(start: Position, end: Position) -> Self {
114        Self { start, end }
115    }
116
117    /// Return the absolute byte offset of the start of the span's first line.
118    ///
119    /// This is a convenience wrapper around `Position::line_start_offset` for
120    /// the span's `start` position.
121    pub fn start_line_offset(&self) -> usize {
122        self.start.line_start_offset()
123    }
124
125    /// Return the absolute byte offset of the start of the span's end line.
126    /// Useful when expanding a span to include the whole end line.
127    pub fn end_line_offset(&self) -> usize {
128        self.end.line_start_offset()
129    }
130}
131
132/// Convenience helper: compute the absolute byte offset of the start of the
133/// given span's starting line.
134///
135/// This is exposed as a free function to simplify callers that don't have a
136/// `Span` method in scope or prefer a function name matching the refactor plan.
137pub fn compute_line_start_offset(span: &Span) -> usize {
138    span.start_line_offset()
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn test_line_start_offset_simple() {
147        let pos = Position::new(1, 1, 0);
148        assert_eq!(pos.line_start_offset(), 0);
149
150        let pos2 = Position::new(1, 5, 4);
151        // offset 4, column 5 -> line start = 4 - (5 - 1) = 0
152        assert_eq!(pos2.line_start_offset(), 0);
153
154        let pos3 = Position::new(3, 4, 25);
155        // offset 25, column 4 -> line start = 25 - (4 - 1) = 22
156        assert_eq!(pos3.line_start_offset(), 22);
157    }
158
159    #[test]
160    fn test_span_line_offsets() {
161        let start = Position::new(2, 3, 10);
162        let end = Position::new(4, 1, 40);
163        let span = Span::new(start, end);
164
165        assert_eq!(span.start_line_offset(), 10 - (3 - 1));
166        assert_eq!(span.end_line_offset(), 40);
167    }
168}