marco_core/parser/position.rs
1// Position tracking for editor intelligence integration (line/column mapping)
2
3use serde::{Deserialize, Serialize};
4
5/// Position in a source document using multiple coordinate systems.
6///
7/// This struct tracks a position using three different representations:
8/// - **Line/Column**: CommonMark-style 1-based coordinates
9/// - **Absolute Offset**: Byte offset from document start
10///
11/// # Coordinate Systems
12///
13/// ## Line/Column (Primary for GTK Integration)
14/// - `line`: 1-based line number (CommonMark convention)
15/// - `column`: 1-based byte offset from the start of the line
16///
17/// **Important**: `column` is a BYTE offset, not a character offset!
18/// - For ASCII: byte offset == character offset
19/// - For UTF-8: Multi-byte characters cause divergence
20/// - Example: "Tëst" has 'ë' at byte columns 3-4, but char column 2
21/// - Example: "🎨" (emoji) occupies 4 bytes but is 1 character
22///
23/// ## Absolute Offset (For Debugging Only)
24/// - `offset`: Absolute byte offset from document start
25/// - **Do NOT use** for GTK TextIter positioning!
26/// - Use `line` and `column` instead for robust conversion
27///
28/// # Usage with GTK
29///
30/// When converting to GTK TextIter:
31/// 1. Convert line: `parser_line (1-based)` → `gtk_line (0-based)`
32/// 2. Get line text from GTK buffer
33/// 3. Convert column: `byte_offset → char_offset` using `char_indices()`
34/// 4. Set position: `iter_at_line(gtk_line).set_line_offset(char_offset)`
35///
36/// See the host editor's cursor-conversion bridge for a reference implementation
37/// of byte-to-character offset mapping.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub struct Position {
40 /// Line number (1-based, CommonMark convention)
41 pub line: usize,
42
43 /// Column as byte offset from line start (1-based, CommonMark convention)
44 ///
45 /// **Note**: This is NOT a character offset!
46 /// Multi-byte UTF-8 characters cause byte offsets to differ from character positions.
47 pub column: usize,
48
49 /// Absolute byte offset from document start
50 ///
51 /// **For debugging/logging only** - do not use for GTK positioning!
52 pub offset: usize,
53}
54
55/// A span representing a range in the source document.
56///
57/// Spans are inclusive of the start position and exclusive of the end position.
58/// This matches CommonMark and most parser conventions.
59///
60/// # Example
61///
62/// For the text "**bold**":
63/// - `start`: Position at the first '*'
64/// - `end`: Position after the last '*' (one past the last character)
65///
66/// # Multi-line Spans
67///
68/// For multi-line content like code blocks, for example a fenced Rust code block,
69/// the inner code might look like:
70///
71/// ```text
72/// fn main() {
73/// }
74/// ```
75///
76/// - `start.line`: Line of opening backticks
77/// - `end.line`: Line after closing backticks
78/// - Columns are byte offsets within their respective lines
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
80pub struct Span {
81 /// Start position (inclusive)
82 pub start: Position,
83
84 /// End position (exclusive)
85 pub end: Position,
86}
87
88impl Position {
89 /// Create a new source position from 1-based line/column and absolute offset.
90 pub fn new(line: usize, column: usize, offset: usize) -> Self {
91 Self {
92 line,
93 column,
94 offset,
95 }
96 }
97
98 /// Compute the absolute byte offset of the start of this position's line.
99 ///
100 /// Uses the invariant that `column` is a 1-based byte offset from the
101 /// start of the line, and `offset` is the absolute byte offset from the
102 /// start of the document. The formula is:
103 ///
104 /// line_start_offset = offset - (column - 1)
105 ///
106 /// This function uses saturating math to avoid underflow in case of
107 /// malformed positions.
108 pub fn line_start_offset(&self) -> usize {
109 self.offset.saturating_sub(self.column.saturating_sub(1))
110 }
111}
112
113impl Span {
114 /// Create a new span from inclusive start and exclusive end positions.
115 pub fn new(start: Position, end: Position) -> Self {
116 Self { start, end }
117 }
118
119 /// Return the absolute byte offset of the start of the span's first line.
120 ///
121 /// This is a convenience wrapper around `Position::line_start_offset` for
122 /// the span's `start` position.
123 pub fn start_line_offset(&self) -> usize {
124 self.start.line_start_offset()
125 }
126
127 /// Return the absolute byte offset of the start of the span's end line.
128 /// Useful when expanding a span to include the whole end line.
129 pub fn end_line_offset(&self) -> usize {
130 self.end.line_start_offset()
131 }
132}
133
134/// Convenience helper: compute the absolute byte offset of the start of the
135/// given span's starting line.
136///
137/// This is exposed as a free function to simplify callers that don't have a
138/// `Span` method in scope or prefer a function name matching the refactor plan.
139pub fn compute_line_start_offset(span: &Span) -> usize {
140 span.start_line_offset()
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn test_line_start_offset_simple() {
149 let pos = Position::new(1, 1, 0);
150 assert_eq!(pos.line_start_offset(), 0);
151
152 let pos2 = Position::new(1, 5, 4);
153 // offset 4, column 5 -> line start = 4 - (5 - 1) = 0
154 assert_eq!(pos2.line_start_offset(), 0);
155
156 let pos3 = Position::new(3, 4, 25);
157 // offset 25, column 4 -> line start = 25 - (4 - 1) = 22
158 assert_eq!(pos3.line_start_offset(), 22);
159 }
160
161 #[test]
162 fn test_span_line_offsets() {
163 let start = Position::new(2, 3, 10);
164 let end = Position::new(4, 1, 40);
165 let span = Span::new(start, end);
166
167 assert_eq!(span.start_line_offset(), 10 - (3 - 1));
168 assert_eq!(span.end_line_offset(), 40);
169 }
170}