Skip to main content

gdeye/
document.rs

1//! Document representation with versioning and position mapping.
2//!
3//! This module provides the [`Document`] type for representing versioned source files,
4//! along with utilities for converting between different position representations
5//! (byte offsets, line/column, LSP positions).
6
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10/// A versioned document representing a source file.
11///
12/// Documents are designed to be cheaply cloneable (source is reference-counted)
13/// and provide efficient position mapping between different representations.
14#[derive(Debug, Clone)]
15pub struct Document {
16    /// The file path (or URI for unsaved files).
17    pub path: PathBuf,
18    /// Version number, incremented on each edit.
19    pub version: i32,
20    /// The source content (shared for cheap cloning).
21    content: Arc<str>,
22    /// Line start byte offsets for position mapping.
23    line_starts: Arc<[usize]>,
24}
25
26impl Document {
27    /// Create a new document with version 0.
28    pub fn new(path: impl Into<PathBuf>, content: impl Into<String>) -> Self {
29        let content: Arc<str> = content.into().into();
30        let line_starts = compute_line_starts(&content);
31        Self {
32            path: path.into(),
33            version: 0,
34            content,
35            line_starts: line_starts.into(),
36        }
37    }
38
39    /// Create a new document with a specific version.
40    pub fn with_version(
41        path: impl Into<PathBuf>,
42        version: i32,
43        content: impl Into<String>,
44    ) -> Self {
45        let content: Arc<str> = content.into().into();
46        let line_starts = compute_line_starts(&content);
47        Self {
48            path: path.into(),
49            version,
50            content,
51            line_starts: line_starts.into(),
52        }
53    }
54
55    /// Get the document path.
56    pub fn path(&self) -> &Path {
57        &self.path
58    }
59
60    /// Get the document version.
61    pub fn version(&self) -> i32 {
62        self.version
63    }
64
65    /// Get the source content.
66    pub fn content(&self) -> &str {
67        &self.content
68    }
69
70    /// Get the number of lines in the document.
71    pub fn line_count(&self) -> usize {
72        self.line_starts.len()
73    }
74
75    /// Convert a (line, column) position to a byte offset.
76    ///
77    /// Lines and columns are 0-indexed. Returns `None` if the position is out of bounds.
78    pub fn offset_at(&self, line: usize, column: usize) -> Option<usize> {
79        let line_start = *self.line_starts.get(line)?;
80        let line_end = self
81            .line_starts
82            .get(line + 1)
83            .copied()
84            .unwrap_or(self.content.len());
85
86        // Get the line content and find the byte offset for the column
87        let line_content = &self.content[line_start..line_end];
88        let offset = utf16_offset_to_byte(line_content, column)?;
89
90        Some(line_start + offset)
91    }
92
93    /// Convert a byte offset to a (line, column) position.
94    ///
95    /// Returns 0-indexed line and column. The column is in UTF-16 code units
96    /// for LSP compatibility.
97    pub fn position_at(&self, offset: usize) -> (usize, usize) {
98        let offset = offset.min(self.content.len());
99
100        // Binary search for the line containing this offset
101        let line = match self.line_starts.binary_search(&offset) {
102            Ok(line) => line,
103            Err(line) => line.saturating_sub(1),
104        };
105
106        let line_start = self.line_starts[line];
107        let line_content = &self.content[line_start..offset];
108        let column = byte_offset_to_utf16(line_content);
109
110        (line, column)
111    }
112
113    /// Get the text for a specific line (0-indexed).
114    ///
115    /// The returned string does not include the line ending.
116    pub fn line(&self, line: usize) -> Option<&str> {
117        let start = *self.line_starts.get(line)?;
118        let end = self
119            .line_starts
120            .get(line + 1)
121            .copied()
122            .unwrap_or(self.content.len());
123
124        let text = &self.content[start..end];
125        // Strip trailing newline characters
126        Some(text.trim_end_matches(['\r', '\n']))
127    }
128
129    /// Get a slice of the content by byte range.
130    pub fn slice(&self, start: usize, end: usize) -> Option<&str> {
131        if start <= end && end <= self.content.len() {
132            Some(&self.content[start..end])
133        } else {
134            None
135        }
136    }
137
138    /// Get the byte offset of the start of a line.
139    pub fn line_start(&self, line: usize) -> Option<usize> {
140        self.line_starts.get(line).copied()
141    }
142
143    /// Get the byte offset of the end of a line (before the newline).
144    pub fn line_end(&self, line: usize) -> Option<usize> {
145        let start = *self.line_starts.get(line)?;
146        let end = self
147            .line_starts
148            .get(line + 1)
149            .copied()
150            .unwrap_or(self.content.len());
151
152        // Find position before newline
153        let line_content = &self.content[start..end];
154        let trimmed_len = line_content.trim_end_matches(['\r', '\n']).len();
155        Some(start + trimmed_len)
156    }
157
158    /// Create a new document with updated content, incrementing the version.
159    pub fn update(&self, content: impl Into<String>) -> Self {
160        Self::with_version(&self.path, self.version + 1, content)
161    }
162
163    /// Apply an incremental edit to the document.
164    ///
165    /// The range is specified as (start_line, start_col, end_line, end_col) with
166    /// 0-indexed positions and UTF-16 columns.
167    pub fn apply_edit(
168        &self,
169        start_line: usize,
170        start_col: usize,
171        end_line: usize,
172        end_col: usize,
173        new_text: &str,
174    ) -> Option<Self> {
175        let start_offset = self.offset_at(start_line, start_col)?;
176        let end_offset = self.offset_at(end_line, end_col)?;
177
178        if start_offset > end_offset {
179            return None;
180        }
181
182        let mut new_content = String::with_capacity(
183            self.content.len() - (end_offset - start_offset) + new_text.len(),
184        );
185        new_content.push_str(&self.content[..start_offset]);
186        new_content.push_str(new_text);
187        new_content.push_str(&self.content[end_offset..]);
188
189        Some(Self::with_version(
190            &self.path,
191            self.version + 1,
192            new_content,
193        ))
194    }
195}
196
197/// Compute the byte offset of the start of each line.
198fn compute_line_starts(content: &str) -> Vec<usize> {
199    let mut line_starts = vec![0];
200    for (i, c) in content.char_indices() {
201        if c == '\n' {
202            line_starts.push(i + 1);
203        }
204    }
205    line_starts
206}
207
208/// Convert a UTF-16 column offset to a byte offset within a line.
209fn utf16_offset_to_byte(line: &str, utf16_col: usize) -> Option<usize> {
210    let mut utf16_count = 0;
211    for (byte_offset, c) in line.char_indices() {
212        if utf16_count >= utf16_col {
213            return Some(byte_offset);
214        }
215        utf16_count += c.len_utf16();
216    }
217    // Allow positioning at end of line
218    if utf16_count == utf16_col {
219        Some(line.len())
220    } else {
221        None
222    }
223}
224
225/// Convert a byte offset within a line to a UTF-16 column offset.
226fn byte_offset_to_utf16(line_prefix: &str) -> usize {
227    line_prefix.chars().map(|c| c.len_utf16()).sum()
228}
229
230/// Position in a document (0-indexed line and UTF-16 column).
231#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
232pub struct Position {
233    pub line: usize,
234    pub column: usize,
235}
236
237impl Position {
238    pub fn new(line: usize, column: usize) -> Self {
239        Self { line, column }
240    }
241}
242
243/// A range in a document.
244#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
245pub struct Range {
246    pub start: Position,
247    pub end: Position,
248}
249
250impl Range {
251    pub fn new(start: Position, end: Position) -> Self {
252        Self { start, end }
253    }
254
255    pub fn from_positions(
256        start_line: usize,
257        start_col: usize,
258        end_line: usize,
259        end_col: usize,
260    ) -> Self {
261        Self {
262            start: Position::new(start_line, start_col),
263            end: Position::new(end_line, end_col),
264        }
265    }
266
267    /// Check if a position is within this range.
268    pub fn contains(&self, pos: Position) -> bool {
269        if pos.line < self.start.line || pos.line > self.end.line {
270            return false;
271        }
272        if pos.line == self.start.line && pos.column < self.start.column {
273            return false;
274        }
275        if pos.line == self.end.line && pos.column > self.end.column {
276            return false;
277        }
278        true
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn new_document() {
288        let doc = Document::new("test.gd", "hello\nworld");
289        assert_eq!(doc.version(), 0);
290        assert_eq!(doc.content(), "hello\nworld");
291        assert_eq!(doc.line_count(), 2);
292    }
293
294    #[test]
295    fn line_access() {
296        let doc = Document::new("test.gd", "line1\nline2\nline3");
297        assert_eq!(doc.line(0), Some("line1"));
298        assert_eq!(doc.line(1), Some("line2"));
299        assert_eq!(doc.line(2), Some("line3"));
300        assert_eq!(doc.line(3), None);
301    }
302
303    #[test]
304    fn line_with_crlf() {
305        let doc = Document::new("test.gd", "line1\r\nline2\r\n");
306        assert_eq!(doc.line(0), Some("line1"));
307        assert_eq!(doc.line(1), Some("line2"));
308    }
309
310    #[test]
311    fn offset_at_simple() {
312        let doc = Document::new("test.gd", "hello\nworld");
313        assert_eq!(doc.offset_at(0, 0), Some(0));
314        assert_eq!(doc.offset_at(0, 5), Some(5));
315        assert_eq!(doc.offset_at(1, 0), Some(6));
316        assert_eq!(doc.offset_at(1, 5), Some(11));
317    }
318
319    #[test]
320    fn position_at_simple() {
321        let doc = Document::new("test.gd", "hello\nworld");
322        assert_eq!(doc.position_at(0), (0, 0));
323        assert_eq!(doc.position_at(5), (0, 5));
324        assert_eq!(doc.position_at(6), (1, 0));
325        assert_eq!(doc.position_at(11), (1, 5));
326    }
327
328    #[test]
329    fn utf16_handling() {
330        // "𝄞" is a musical symbol that takes 2 UTF-16 code units
331        let doc = Document::new("test.gd", "a𝄞b");
332        // 'a' is at column 0, '𝄞' starts at column 1 and takes 2 UTF-16 units
333        // 'b' is at column 3 (1 + 2)
334        assert_eq!(doc.offset_at(0, 0), Some(0)); // 'a'
335        assert_eq!(doc.offset_at(0, 1), Some(1)); // '𝄞' start
336        assert_eq!(doc.offset_at(0, 3), Some(5)); // 'b' (after 4-byte char)
337
338        // Reverse mapping
339        assert_eq!(doc.position_at(0), (0, 0)); // 'a'
340        assert_eq!(doc.position_at(1), (0, 1)); // '𝄞' start
341        assert_eq!(doc.position_at(5), (0, 3)); // 'b'
342    }
343
344    #[test]
345    fn update_increments_version() {
346        let doc = Document::new("test.gd", "v1");
347        let doc2 = doc.update("v2");
348        assert_eq!(doc.version(), 0);
349        assert_eq!(doc2.version(), 1);
350        assert_eq!(doc2.content(), "v2");
351    }
352
353    #[test]
354    fn apply_edit_insert() {
355        let doc = Document::new("test.gd", "hello world");
356        let doc2 = doc.apply_edit(0, 5, 0, 5, " beautiful").unwrap();
357        assert_eq!(doc2.content(), "hello beautiful world");
358    }
359
360    #[test]
361    fn apply_edit_replace() {
362        let doc = Document::new("test.gd", "hello world");
363        let doc2 = doc.apply_edit(0, 6, 0, 11, "rust").unwrap();
364        assert_eq!(doc2.content(), "hello rust");
365    }
366
367    #[test]
368    fn apply_edit_multiline() {
369        let doc = Document::new("test.gd", "line1\nline2\nline3");
370        let doc2 = doc.apply_edit(0, 5, 2, 0, "\nnew\n").unwrap();
371        assert_eq!(doc2.content(), "line1\nnew\nline3");
372    }
373
374    #[test]
375    fn range_contains() {
376        let range = Range::from_positions(1, 5, 3, 10);
377        assert!(!range.contains(Position::new(0, 5))); // before start line
378        assert!(!range.contains(Position::new(1, 4))); // before start column
379        assert!(range.contains(Position::new(1, 5))); // at start
380        assert!(range.contains(Position::new(2, 0))); // middle line
381        assert!(range.contains(Position::new(3, 10))); // at end
382        assert!(!range.contains(Position::new(3, 11))); // after end column
383        assert!(!range.contains(Position::new(4, 0))); // after end line
384    }
385
386    #[test]
387    fn line_start_end() {
388        let doc = Document::new("test.gd", "hello\nworld\n");
389        assert_eq!(doc.line_start(0), Some(0));
390        assert_eq!(doc.line_end(0), Some(5));
391        assert_eq!(doc.line_start(1), Some(6));
392        assert_eq!(doc.line_end(1), Some(11));
393    }
394}