auto_lsp_core/document/
mod.rs

1/*
2This file is part of auto-lsp.
3Copyright (C) 2025 CLAUZEL Adrien
4
5auto-lsp is free software: you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation, either version 3 of the License, or
8(at your option) any later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program.  If not, see <http://www.gnu.org/licenses/>
17*/
18
19use std::{
20    ops::Range,
21    sync::atomic::{AtomicUsize, Ordering},
22};
23use texter::core::text::Text;
24use texter_impl::{change::WrapChange, updateable::WrapTree};
25use tree_sitter::{Point, Tree};
26
27use crate::errors::{DocumentError, PositionError, TexterError, TreeSitterError};
28
29pub(crate) mod texter_impl;
30
31/// Represents a text document that combines plain text [`texter`] with its parsed syntax tree [`tree_sitter::Tree`].
32///
33/// This struct allows for incremental updates of both the text content and the syntax tree,
34/// ensuring they stay synchronized after each change. It provides utility functions for querying
35/// the syntax tree, such as finding nodes by position or range.
36#[derive(Debug, Clone)]
37pub struct Document {
38    pub texter: Text,
39    pub tree: Tree,
40}
41
42thread_local! {
43    /// Thread-local storage for the last line index accessed.
44    ///
45    /// It is initialized to 0, indicating that no lines have been accessed yet.
46    /// This is a performance optimization to avoid searching from the beginning of the document
47    /// every time we need to find a position.
48    /// The value is updated whenever a position is found, so that subsequent calls can start from
49    /// the last accessed line.
50    /// If the offset is greater than value, we reset the counter to 0.
51
52    pub static LAST_LINE: AtomicUsize = const { AtomicUsize::new(0) };
53}
54
55impl Document {
56    pub fn new(texter: Text, tree: Tree) -> Self {
57        Self { texter, tree }
58    }
59
60    pub fn as_str(&self) -> &str {
61        &self.texter.text
62    }
63
64    pub fn as_bytes(&self) -> &[u8] {
65        self.texter.text.as_bytes()
66    }
67
68    pub fn is_empty(&self) -> bool {
69        self.texter.text.is_empty()
70    }
71
72    /// Updates the document based on the provided list of text changes.
73    ///
74    /// This method applies the changes to both the text [`texter`] and the syntax tree [`Tree`].
75    /// It uses incremental parsing to minimize the cost of updating the syntax tree.
76    ///
77    /// # Parameters
78    /// - `parser`: A mutable reference to the Tree-sitter parser used to reparse the document.
79    /// - `changes`: A vector of `TextDocumentContentChangeEvent` objects representing text changes.
80    ///
81    /// # Errors
82    /// Returns an error if Tree-sitter fails to reparse the updated text
83    pub fn update(
84        &mut self,
85        parser: &mut tree_sitter::Parser,
86        changes: &[lsp_types::TextDocumentContentChangeEvent],
87    ) -> Result<(), DocumentError> {
88        let mut new_tree = WrapTree::from(&mut self.tree);
89
90        for change in changes {
91            self.texter
92                .update(WrapChange::from(change).change, &mut new_tree)
93                .map_err(|e| DocumentError::from(TexterError::from(e)))?;
94        }
95
96        self.tree = parser
97            .parse(self.texter.text.as_bytes(), Some(&self.tree))
98            .ok_or_else(|| DocumentError::from(TreeSitterError::TreeSitterParser))?;
99
100        Ok(())
101    }
102
103    /// Retrieves the smallest syntax node that spans the given position in the document.
104    pub fn node_at_position(&self, position: lsp_types::Position) -> Option<tree_sitter::Node<'_>> {
105        let position = Point {
106            row: position.line as usize,
107            column: position.character as usize,
108        };
109
110        self.tree
111            .root_node()
112            .named_descendant_for_point_range(position, position)
113    }
114
115    /// Retrieves the range (start and end positions) of the smallest syntax node that spans the given byte offset.
116    pub fn node_range_at(&self, offset: usize) -> Option<lsp_types::Range> {
117        self.tree
118            .root_node()
119            .named_descendant_for_byte_range(offset, offset)
120            .map(|pos| lsp_types::Range {
121                start: lsp_types::Position {
122                    line: pos.start_position().row as u32,
123                    character: pos.start_position().column as u32,
124                },
125                end: lsp_types::Position {
126                    line: pos.end_position().row as u32,
127                    character: pos.end_position().column as u32,
128                },
129            })
130    }
131
132    /// Converts a byte offset in the document to its corresponding position (line and character).
133    pub fn position_at(&self, offset: usize) -> Result<lsp_types::Position, PositionError> {
134        let mut last_br_index = 0;
135        let last_line = LAST_LINE.with(|a| a.load(Ordering::SeqCst));
136
137        // If the document is a single line, we can avoid the loop
138        if self.texter.br_indexes.0.len() == 1 {
139            return if offset > self.texter.text.len() {
140                Err(PositionError::LineOutOfBound {
141                    offset,
142                    length: self.texter.text.len(),
143                })
144            } else {
145                Ok(lsp_types::Position {
146                    line: 0,
147                    character: offset as u32,
148                })
149            };
150        }
151
152        // Determine the starting line for the search
153        let start = match self.texter.br_indexes.0.get(last_line) {
154            Some(&br_index) if offset > br_index && last_line >= 1 => last_line, // Start from cached line if offset is beyond it
155            _ => 1, // Start from at least index 1 to avoid incorrect 0 offset issues
156        };
157
158        for (i, &br_index) in self.texter.br_indexes.0.iter().skip(start).enumerate() {
159            if offset <= br_index {
160                // Cache this line for future calls
161                LAST_LINE.with(|a| a.store(i + (start - 1), Ordering::Release));
162
163                // Compute column by subtracting the last break index
164                let col = offset.saturating_sub(last_br_index);
165
166                return Ok(lsp_types::Position {
167                    line: (i + (start - 1)) as u32,
168                    character: col as u32,
169                });
170            }
171
172            last_br_index = br_index + 1; // Move past the EOL character
173        }
174
175        if offset <= self.texter.text.len() {
176            let last_known_col = self.texter.br_indexes.0.iter().len();
177            let last_br = *self.texter.br_indexes.0.last().unwrap();
178            Ok(lsp_types::Position {
179                line: last_known_col.saturating_sub(1) as u32,
180                character: offset.saturating_sub(last_br) as u32,
181            })
182        } else {
183            Err(PositionError::WrongPosition { offset })
184        }
185    }
186
187    /// Converts a byte offset in the document to its corresponding range (start and end positions).
188    pub fn range_at(&self, range: Range<usize>) -> Result<lsp_types::Range, PositionError> {
189        let start = self
190            .position_at(range.start)
191            .map_err(|err| PositionError::WrongRange {
192                range: range.clone(),
193                position_error: Box::new(err),
194            })?;
195        let end = self
196            .position_at(range.end)
197            .map_err(|err| PositionError::WrongRange {
198                range: range.clone(),
199                position_error: Box::new(err),
200            })?;
201        Ok(lsp_types::Range { start, end })
202    }
203
204    /// Converts a position (line and character) in the document to its corresponding byte offset.
205    pub fn offset_at(&self, position: lsp_types::Position) -> Option<usize> {
206        let line_index = self.texter.br_indexes.row_start(position.line as usize)?;
207        let line_str = self.texter.get_row(position.line as usize)?;
208        let col = position.character as usize;
209        if col > line_str.len() {
210            None
211        } else {
212            Some(line_index + col)
213        }
214    }
215}
216
217#[cfg(test)]
218mod test {
219    use super::*;
220    use lsp_types::Position;
221    use rstest::{fixture, rstest};
222    use tree_sitter::Parser;
223
224    #[fixture]
225    fn parser() -> Parser {
226        let mut p = Parser::new();
227        p.set_language(&tree_sitter_html::LANGUAGE.into()).unwrap();
228        p
229    }
230
231    fn get_last_line() -> usize {
232        use crate::document::LAST_LINE; // adjust path if needed
233        use std::sync::atomic::Ordering;
234
235        LAST_LINE.with(|val| val.load(Ordering::Acquire))
236    }
237
238    #[rstest]
239    fn position_at(mut parser: Parser) {
240        let source = "<div>こんにちは\nGoodbye\r\nSee you!\n</div>";
241        let text = Text::new(source.into());
242        let document = Document::new(text, parser.parse(source, None).unwrap());
243
244        assert_eq!(&document.texter.br_indexes.0, &[0, 20, 29, 38]);
245
246        assert_eq!(
247            document.position_at(0).unwrap(),
248            Position {
249                line: 0,
250                character: 0
251            }
252        );
253
254        // Offset 11 is inside the Japanese text "こんにちは"
255        assert_eq!(
256            document.position_at(11).unwrap(),
257            Position {
258                line: 0,
259                character: 11
260            }
261        );
262
263        // Offset 21 is at the beginning of "Goodbye" (after '\n')
264        assert_eq!(
265            document.position_at(21).unwrap(),
266            Position {
267                line: 1,
268                character: 0
269            }
270        );
271
272        // Offset 28 is in "Goodbye" (before '\r')
273        assert_eq!(
274            document.position_at(28).unwrap(),
275            Position {
276                line: 1,
277                character: 7
278            }
279        );
280
281        // Offset 30 is the last byte of "\r\n", meaning we move to the next line
282        assert_eq!(
283            document.position_at(30).unwrap(),
284            Position {
285                line: 2,
286                character: 0
287            }
288        );
289
290        // Offset 40 is at the last line at pos 2
291        assert_eq!(
292            document.position_at(40).unwrap(),
293            Position {
294                line: 3,
295                character: 2
296            }
297        );
298    }
299
300    #[rstest]
301    fn position_at_single_line(mut parser: Parser) {
302        let source = "<div>AREALLYREALLYREALLYLONGTEXT<div>";
303        let text = Text::new(source.into());
304        let document = Document::new(text, parser.parse(source, None).unwrap());
305
306        assert_eq!(&document.texter.br_indexes.0, &[0]);
307
308        assert_eq!(
309            document.position_at(0).unwrap(),
310            Position {
311                line: 0,
312                character: 0
313            }
314        );
315
316        assert_eq!(
317            document.position_at(5).unwrap(),
318            Position {
319                line: 0,
320                character: 5
321            }
322        );
323
324        assert_eq!(
325            document.position_at(30).unwrap(),
326            Position {
327                line: 0,
328                character: 30
329            }
330        );
331    }
332
333    #[rstest]
334    fn range_at(mut parser: Parser) {
335        let source = "<div>こんにちは\nGoodbye\r\nSee you!\n</div>";
336        let text = Text::new(source.into());
337        let document = Document::new(text, parser.parse(source, None).unwrap());
338
339        assert_eq!(&document.texter.br_indexes.0, &[0, 20, 29, 38]);
340
341        // Test range covering part of first line
342        assert_eq!(
343            document.range_at(0..11).unwrap(),
344            lsp_types::Range {
345                start: Position {
346                    line: 0,
347                    character: 0
348                },
349                end: Position {
350                    line: 0,
351                    character: 11
352                },
353            }
354        );
355
356        // Test range spanning multiple lines
357        assert_eq!(
358            document.range_at(15..28).unwrap(),
359            lsp_types::Range {
360                start: Position {
361                    line: 0,
362                    character: 15
363                },
364                end: Position {
365                    line: 1,
366                    character: 7
367                },
368            }
369        );
370
371        // Test range from start of a line to another
372        assert_eq!(
373            document.range_at(21..30).unwrap(),
374            lsp_types::Range {
375                start: Position {
376                    line: 1,
377                    character: 0
378                },
379                end: Position {
380                    line: 2,
381                    character: 0
382                },
383            }
384        );
385
386        // Test range entirely in one line
387        assert_eq!(
388            document.range_at(30..35).unwrap(),
389            lsp_types::Range {
390                start: Position {
391                    line: 2,
392                    character: 0
393                },
394                end: Position {
395                    line: 2,
396                    character: 5
397                },
398            }
399        );
400
401        // Test out-of-bounds range
402        assert_eq!(
403            document.range_at(35..50),
404            Err(PositionError::WrongRange {
405                range: 35..50,
406                position_error: Box::new(PositionError::WrongPosition { offset: 50 })
407            })
408        );
409    }
410
411    #[rstest]
412    fn range_at_single_line(mut parser: Parser) {
413        let source = "<div>AREALLYREALLYREALLYLONGTEXT<div>";
414        let text = Text::new(source.into());
415        let document = Document::new(text, parser.parse(source, None).unwrap());
416
417        assert_eq!(&document.texter.br_indexes.0, &[0]);
418
419        // Ensure the line break indexes are correct
420        assert_eq!(&document.texter.br_indexes.0, &[0]);
421
422        // Check range from start to some offset
423        assert_eq!(
424            document.range_at(0..5).unwrap(),
425            lsp_types::Range {
426                start: Position {
427                    line: 0,
428                    character: 0
429                },
430                end: Position {
431                    line: 0,
432                    character: 5
433                }
434            }
435        );
436
437        // Check range covering the entire line
438        let length = source.len();
439        assert_eq!(
440            document.range_at(0..length).unwrap(),
441            lsp_types::Range {
442                start: Position {
443                    line: 0,
444                    character: 0
445                },
446                end: Position {
447                    line: 0,
448                    character: length as u32
449                }
450            }
451        );
452
453        // Out-of-bounds check
454        assert_eq!(
455            document.range_at(0..(length + 5)),
456            Err(PositionError::WrongRange {
457                range: 0..(length + 5),
458                position_error: Box::new(PositionError::LineOutOfBound {
459                    offset: 42,
460                    length: 37
461                })
462            })
463        );
464    }
465
466    #[rstest]
467    fn offset_at(mut parser: Parser) {
468        let source = "Apples\nBashdjad\nashdkasdh\nasdsad";
469        let text = Text::new(source.into());
470        let document = Document::new(text, parser.parse(source, None).unwrap());
471
472        assert_eq!(&document.texter.br_indexes.0, &[0, 6, 15, 25]);
473
474        // Test for start of first line
475        assert_eq!(
476            document.offset_at(Position {
477                line: 0,
478                character: 0
479            }),
480            Some(0)
481        );
482
483        // Test for char at first line
484        assert_eq!(
485            document.offset_at(Position {
486                line: 0,
487                character: 5
488            }),
489            Some(5)
490        );
491
492        // Test for middle of second line (after "Bash")
493        assert_eq!(
494            document.offset_at(Position {
495                line: 1,
496                character: 3
497            }),
498            Some(10)
499        );
500
501        // Test for end of last line
502        assert_eq!(
503            document.offset_at(Position {
504                line: 3,
505                character: 5
506            }),
507            Some(31)
508        );
509
510        // Test for out of bounds position (line too high)
511        assert_eq!(
512            document.offset_at(Position {
513                line: 10,
514                character: 0
515            }),
516            None
517        );
518
519        // Test for out of bounds position (column too high)
520        assert_eq!(
521            document.offset_at(Position {
522                line: 1,
523                character: 100
524            }),
525            None
526        );
527    }
528
529    #[rstest]
530    fn line_tracking(mut parser: Parser) {
531        let source = "one\nline two\nline three\n";
532        let text = Text::new(source.into());
533        let document = Document::new(text, parser.parse(source, None).unwrap());
534
535        // Offset in line 0
536        let pos1 = document.position_at(2).unwrap();
537        assert_eq!(pos1.line, 0);
538        assert_eq!(get_last_line(), 0);
539
540        // Offset in line 1
541        let pos2 = document.position_at(6).unwrap();
542        assert_eq!(pos2.line, 1);
543        assert_eq!(get_last_line(), 1);
544
545        // Offset in line 2
546        let pos3 = document.position_at(18).unwrap();
547        assert_eq!(pos3.line, 2);
548        assert_eq!(get_last_line(), 2);
549
550        // Offset is ine line 0
551        // This should reset the last line index
552        let pos3 = document.position_at(0).unwrap();
553        assert_eq!(pos3.line, 0);
554        assert_eq!(get_last_line(), 0);
555    }
556}