Skip to main content

tsz_common/
position.rs

1//! Position and location utilities for LSP.
2//!
3//! LSP uses line/column positions, while our AST uses byte offsets.
4//! This module provides conversion utilities.
5
6/// A position in a source file (0-indexed line and column).
7#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
8pub struct Position {
9    /// 0-indexed line number
10    pub line: u32,
11    /// 0-indexed column (UTF-16 code units for LSP compatibility)
12    pub character: u32,
13}
14
15impl Position {
16    #[must_use]
17    pub const fn new(line: u32, character: u32) -> Self {
18        Self { line, character }
19    }
20}
21
22/// A range in a source file.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
24pub struct Range {
25    pub start: Position,
26    pub end: Position,
27}
28
29impl Range {
30    #[must_use]
31    pub const fn new(start: Position, end: Position) -> Self {
32        Self { start, end }
33    }
34}
35
36/// A location in a source file (file path + range).
37#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
38pub struct Location {
39    #[serde(rename = "uri")]
40    pub file_path: String,
41    pub range: Range,
42}
43
44impl Location {
45    #[must_use]
46    pub const fn new(file_path: String, range: Range) -> Self {
47        Self { file_path, range }
48    }
49}
50
51/// Source location with both offset and line/column info.
52#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
53pub struct SourceLocation {
54    /// Byte offset from start of file
55    pub offset: u32,
56    /// 0-indexed line number
57    pub line: u32,
58    /// 0-indexed column
59    pub character: u32,
60}
61
62impl SourceLocation {
63    #[must_use]
64    pub const fn new(offset: u32, line: u32, character: u32) -> Self {
65        Self {
66            offset,
67            line,
68            character,
69        }
70    }
71}
72
73/// Line map for efficient offset <-> position conversion.
74/// Stores the starting offset of each line.
75#[derive(Debug, Clone)]
76pub struct LineMap {
77    /// Starting offset of each line (`line_starts`[0] is always 0)
78    line_starts: Vec<u32>,
79}
80
81impl LineMap {
82    /// Build a line map from source text.
83    #[must_use]
84    pub fn build(source: &str) -> Self {
85        let mut line_starts = vec![0u32];
86
87        for (i, ch) in source.char_indices() {
88            if ch == '\n' {
89                // Next line starts after the newline
90                line_starts.push(u32::try_from(i + 1).unwrap_or(u32::MAX));
91            } else if ch == '\r' {
92                // Handle \r\n (Windows) and \r (old Mac)
93                let next_idx = i + 1;
94                if source.as_bytes().get(next_idx) != Some(&b'\n') {
95                    // \r not followed by \n - treat as line ending
96                    line_starts.push(u32::try_from(next_idx).unwrap_or(u32::MAX));
97                }
98                // \r followed by \n - the \n will create the line start
99            }
100        }
101
102        Self { line_starts }
103    }
104
105    /// Convert a byte offset to a Position (line, character).
106    /// Character is counted in UTF-16 code units for LSP compatibility.
107    #[must_use]
108    pub fn offset_to_position(&self, offset: u32, source: &str) -> Position {
109        // Binary search for the line containing this offset
110        let line = match self.line_starts.binary_search(&offset) {
111            Ok(exact) => exact,
112            Err(insert_point) => insert_point.saturating_sub(1),
113        };
114
115        let line_start = usize::try_from(self.line_starts.get(line).copied().unwrap_or(0))
116            .unwrap_or(usize::MAX)
117            .min(source.len());
118        let clamped_end = usize::try_from(offset)
119            .unwrap_or(source.len())
120            .min(source.len());
121        let start = line_start.min(clamped_end);
122        let slice = source.get(start..clamped_end).unwrap_or("");
123        let character = slice
124            .chars()
125            .map(|ch| u32::try_from(ch.len_utf16()).unwrap_or(u32::MAX))
126            .sum();
127
128        Position {
129            line: u32::try_from(line).unwrap_or(u32::MAX),
130            character,
131        }
132    }
133
134    /// Convert a Position (line, character) to a byte offset.
135    #[must_use]
136    pub fn position_to_offset(&self, position: Position, source: &str) -> Option<u32> {
137        let line_idx = usize::try_from(position.line).ok()?;
138        let line_start = *self.line_starts.get(line_idx)?;
139        let line_start = usize::try_from(line_start).ok()?;
140        let line_limit = if line_idx + 1 < self.line_starts.len() {
141            usize::try_from(self.line_starts[line_idx + 1]).ok()?
142        } else {
143            source.len()
144        };
145        let slice = source.get(line_start..line_limit).unwrap_or("");
146        let mut utf16_count = 0u32;
147        let mut byte_count = 0usize;
148
149        for ch in slice.chars() {
150            if ch == '\n' || ch == '\r' {
151                break;
152            }
153            let ch_utf16 = u32::try_from(ch.len_utf16()).ok()?;
154            if utf16_count + ch_utf16 > position.character {
155                break;
156            }
157            utf16_count += ch_utf16;
158            byte_count += ch.len_utf8();
159            if utf16_count == position.character {
160                break;
161            }
162        }
163
164        u32::try_from(line_start + byte_count).ok()
165    }
166
167    /// Get the number of lines.
168    #[must_use]
169    pub const fn line_count(&self) -> usize {
170        self.line_starts.len()
171    }
172
173    /// Get the starting offset of a line.
174    #[must_use]
175    pub fn line_start(&self, line: usize) -> Option<u32> {
176        self.line_starts.get(line).copied()
177    }
178}
179
180#[cfg(test)]
181#[path = "../tests/position_tests.rs"]
182mod tests;