auto_lsp_core/document/
mod.rs

1/*
2This file is part of auto-lsp.
3Copyright (C) 2025 CLAUZEL Adrien
4
5auto-lsp is free software: you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation, either version 3 of the License, or
8(at your option) any later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program.  If not, see <http://www.gnu.org/licenses/>
17*/
18
19use std::{
20    ops::Range,
21    sync::atomic::{AtomicUsize, Ordering},
22};
23use texter::core::text::Text;
24use texter_impl::{change::WrapChange, updateable::WrapTree};
25use tree_sitter::{Point, Tree};
26
27use crate::errors::{DocumentError, PositionError, TexterError, TreeSitterError};
28
29pub(crate) mod texter_impl;
30
31/// Represents a text document that combines plain text [`texter`] with its parsed syntax tree [`tree_sitter::Tree`].
32///
33/// This struct allows for incremental updates of both the text content and the syntax tree,
34/// ensuring they stay synchronized after each change. It provides utility functions for querying
35/// the syntax tree, such as finding nodes by position or range.
36#[derive(Debug, Clone)]
37pub struct Document {
38    pub texter: Text,
39    pub tree: Tree,
40}
41
42thread_local! {
43    /// Thread-local storage for the last line index accessed.
44    ///
45    /// It is initialized to 0, indicating that no lines have been accessed yet.
46    /// This is a performance optimization to avoid searching from the beginning of the document
47    /// every time we need to find a position.
48    /// The value is updated whenever a position is found, so that subsequent calls can start from
49    /// the last accessed line.
50    /// If the offset is greater than value, we reset the counter to 0.
51
52    pub static LAST_LINE: AtomicUsize = const { AtomicUsize::new(0) };
53}
54
55impl Document {
56    pub fn new(texter: Text, tree: Tree) -> Self {
57        Self { texter, tree }
58    }
59
60    /// Updates the document based on the provided list of text changes.
61    ///
62    /// This method applies the changes to both the text [`texter`] and the syntax tree [`Tree`].
63    /// It uses incremental parsing to minimize the cost of updating the syntax tree.
64    ///
65    /// # Parameters
66    /// - `parser`: A mutable reference to the Tree-sitter parser used to reparse the document.
67    /// - `changes`: A vector of `TextDocumentContentChangeEvent` objects representing text changes.
68    ///
69    /// # Errors
70    /// Returns an error if Tree-sitter fails to reparse the updated text
71    pub fn update(
72        &mut self,
73        parser: &mut tree_sitter::Parser,
74        changes: &[lsp_types::TextDocumentContentChangeEvent],
75    ) -> Result<(), DocumentError> {
76        let mut new_tree = WrapTree::from(&mut self.tree);
77
78        for change in changes {
79            self.texter
80                .update(WrapChange::from(change).change, &mut new_tree)
81                .map_err(|e| DocumentError::from(TexterError::from(e)))?;
82        }
83
84        self.tree = parser
85            .parse(self.texter.text.as_bytes(), Some(&self.tree))
86            .ok_or_else(|| DocumentError::from(TreeSitterError::TreeSitterParser))?;
87
88        Ok(())
89    }
90
91    /// Retrieves the smallest syntax node that spans the given position in the document.
92    pub fn node_at_position(&self, position: lsp_types::Position) -> Option<tree_sitter::Node<'_>> {
93        let position = Point {
94            row: position.line as usize,
95            column: position.character as usize,
96        };
97
98        self.tree
99            .root_node()
100            .named_descendant_for_point_range(position, position)
101    }
102
103    /// Retrieves the range (start and end positions) of the smallest syntax node that spans the given byte offset.
104    pub fn node_range_at(&self, offset: usize) -> Option<lsp_types::Range> {
105        self.tree
106            .root_node()
107            .named_descendant_for_byte_range(offset, offset)
108            .map(|pos| lsp_types::Range {
109                start: lsp_types::Position {
110                    line: pos.start_position().row as u32,
111                    character: pos.start_position().column as u32,
112                },
113                end: lsp_types::Position {
114                    line: pos.end_position().row as u32,
115                    character: pos.end_position().column as u32,
116                },
117            })
118    }
119
120    /// Converts a byte offset in the document to its corresponding position (line and character).
121    pub fn position_at(&self, offset: usize) -> Result<lsp_types::Position, PositionError> {
122        let mut last_br_index = 0;
123        let last_line = LAST_LINE.with(|a| a.load(Ordering::SeqCst));
124
125        // If the document is a single line, we can avoid the loop
126        if self.texter.br_indexes.0.len() == 1 {
127            return if offset > self.texter.text.len() {
128                Err(PositionError::LineOutOfBound {
129                    offset,
130                    length: self.texter.text.len(),
131                })
132            } else {
133                Ok(lsp_types::Position {
134                    line: 0,
135                    character: offset as u32,
136                })
137            };
138        }
139
140        // Determine the starting line for the search
141        let start = match self.texter.br_indexes.0.get(last_line) {
142            Some(&br_index) if offset > br_index && last_line >= 1 => last_line, // Start from cached line if offset is beyond it
143            _ => 1, // Start from at least index 1 to avoid incorrect 0 offset issues
144        };
145
146        for (i, &br_index) in self.texter.br_indexes.0.iter().skip(start).enumerate() {
147            if offset <= br_index {
148                // Cache this line for future calls
149                LAST_LINE.with(|a| a.store(i + (start - 1), Ordering::Release));
150
151                // Compute column by subtracting the last break index
152                let col = offset.saturating_sub(last_br_index);
153
154                return Ok(lsp_types::Position {
155                    line: (i + (start - 1)) as u32,
156                    character: col as u32,
157                });
158            }
159
160            last_br_index = br_index + 1; // Move past the EOL character
161        }
162
163        if offset <= self.texter.text.len() {
164            let last_known_col = self.texter.br_indexes.0.iter().len();
165            let last_br = *self.texter.br_indexes.0.last().unwrap();
166            Ok(lsp_types::Position {
167                line: last_known_col.saturating_sub(1) as u32,
168                character: offset.saturating_sub(last_br) as u32,
169            })
170        } else {
171            Err(PositionError::WrongPosition { offset })
172        }
173    }
174
175    /// Converts a byte offset in the document to its corresponding range (start and end positions).
176    pub fn range_at(&self, range: Range<usize>) -> Result<lsp_types::Range, PositionError> {
177        let start = self
178            .position_at(range.start)
179            .map_err(|err| PositionError::WrongRange {
180                range: range.clone(),
181                position_error: Box::new(err),
182            })?;
183        let end = self
184            .position_at(range.end)
185            .map_err(|err| PositionError::WrongRange {
186                range: range.clone(),
187                position_error: Box::new(err),
188            })?;
189        Ok(lsp_types::Range { start, end })
190    }
191
192    /// Converts a position (line and character) in the document to its corresponding byte offset.
193    pub fn offset_at(&self, position: lsp_types::Position) -> Option<usize> {
194        let line_index = self.texter.br_indexes.row_start(position.line as usize)?;
195        let line_str = self.texter.get_row(position.line as usize)?;
196        let col = position.character as usize;
197        if col > line_str.len() {
198            None
199        } else {
200            Some(line_index + col)
201        }
202    }
203}
204
205#[cfg(test)]
206mod test {
207    use super::*;
208    use lsp_types::Position;
209    use rstest::{fixture, rstest};
210    use tree_sitter::Parser;
211
212    #[fixture]
213    fn parser() -> Parser {
214        let mut p = Parser::new();
215        p.set_language(&tree_sitter_html::LANGUAGE.into()).unwrap();
216        p
217    }
218
219    fn get_last_line() -> usize {
220        use crate::document::LAST_LINE; // adjust path if needed
221        use std::sync::atomic::Ordering;
222
223        LAST_LINE.with(|val| val.load(Ordering::Acquire))
224    }
225
226    #[rstest]
227    fn position_at(mut parser: Parser) {
228        let source = "<div>こんにちは\nGoodbye\r\nSee you!\n</div>";
229        let text = Text::new(source.into());
230        let document = Document::new(text, parser.parse(source, None).unwrap());
231
232        assert_eq!(&document.texter.br_indexes.0, &[0, 20, 29, 38]);
233
234        assert_eq!(
235            document.position_at(0).unwrap(),
236            Position {
237                line: 0,
238                character: 0
239            }
240        );
241
242        // Offset 11 is inside the Japanese text "こんにちは"
243        assert_eq!(
244            document.position_at(11).unwrap(),
245            Position {
246                line: 0,
247                character: 11
248            }
249        );
250
251        // Offset 21 is at the beginning of "Goodbye" (after '\n')
252        assert_eq!(
253            document.position_at(21).unwrap(),
254            Position {
255                line: 1,
256                character: 0
257            }
258        );
259
260        // Offset 28 is in "Goodbye" (before '\r')
261        assert_eq!(
262            document.position_at(28).unwrap(),
263            Position {
264                line: 1,
265                character: 7
266            }
267        );
268
269        // Offset 30 is the last byte of "\r\n", meaning we move to the next line
270        assert_eq!(
271            document.position_at(30).unwrap(),
272            Position {
273                line: 2,
274                character: 0
275            }
276        );
277
278        // Offset 40 is at the last line at pos 2
279        assert_eq!(
280            document.position_at(40).unwrap(),
281            Position {
282                line: 3,
283                character: 2
284            }
285        );
286    }
287
288    #[rstest]
289    fn position_at_single_line(mut parser: Parser) {
290        let source = "<div>AREALLYREALLYREALLYLONGTEXT<div>";
291        let text = Text::new(source.into());
292        let document = Document::new(text, parser.parse(source, None).unwrap());
293
294        assert_eq!(&document.texter.br_indexes.0, &[0]);
295
296        assert_eq!(
297            document.position_at(0).unwrap(),
298            Position {
299                line: 0,
300                character: 0
301            }
302        );
303
304        assert_eq!(
305            document.position_at(5).unwrap(),
306            Position {
307                line: 0,
308                character: 5
309            }
310        );
311
312        assert_eq!(
313            document.position_at(30).unwrap(),
314            Position {
315                line: 0,
316                character: 30
317            }
318        );
319    }
320
321    #[rstest]
322    fn range_at(mut parser: Parser) {
323        let source = "<div>こんにちは\nGoodbye\r\nSee you!\n</div>";
324        let text = Text::new(source.into());
325        let document = Document::new(text, parser.parse(source, None).unwrap());
326
327        assert_eq!(&document.texter.br_indexes.0, &[0, 20, 29, 38]);
328
329        // Test range covering part of first line
330        assert_eq!(
331            document.range_at(0..11).unwrap(),
332            lsp_types::Range {
333                start: Position {
334                    line: 0,
335                    character: 0
336                },
337                end: Position {
338                    line: 0,
339                    character: 11
340                },
341            }
342        );
343
344        // Test range spanning multiple lines
345        assert_eq!(
346            document.range_at(15..28).unwrap(),
347            lsp_types::Range {
348                start: Position {
349                    line: 0,
350                    character: 15
351                },
352                end: Position {
353                    line: 1,
354                    character: 7
355                },
356            }
357        );
358
359        // Test range from start of a line to another
360        assert_eq!(
361            document.range_at(21..30).unwrap(),
362            lsp_types::Range {
363                start: Position {
364                    line: 1,
365                    character: 0
366                },
367                end: Position {
368                    line: 2,
369                    character: 0
370                },
371            }
372        );
373
374        // Test range entirely in one line
375        assert_eq!(
376            document.range_at(30..35).unwrap(),
377            lsp_types::Range {
378                start: Position {
379                    line: 2,
380                    character: 0
381                },
382                end: Position {
383                    line: 2,
384                    character: 5
385                },
386            }
387        );
388
389        // Test out-of-bounds range
390        assert_eq!(
391            document.range_at(35..50),
392            Err(PositionError::WrongRange {
393                range: 35..50,
394                position_error: Box::new(PositionError::WrongPosition { offset: 50 })
395            })
396        );
397    }
398
399    #[rstest]
400    fn range_at_single_line(mut parser: Parser) {
401        let source = "<div>AREALLYREALLYREALLYLONGTEXT<div>";
402        let text = Text::new(source.into());
403        let document = Document::new(text, parser.parse(source, None).unwrap());
404
405        assert_eq!(&document.texter.br_indexes.0, &[0]);
406
407        // Ensure the line break indexes are correct
408        assert_eq!(&document.texter.br_indexes.0, &[0]);
409
410        // Check range from start to some offset
411        assert_eq!(
412            document.range_at(0..5).unwrap(),
413            lsp_types::Range {
414                start: Position {
415                    line: 0,
416                    character: 0
417                },
418                end: Position {
419                    line: 0,
420                    character: 5
421                }
422            }
423        );
424
425        // Check range covering the entire line
426        let length = source.len();
427        assert_eq!(
428            document.range_at(0..length).unwrap(),
429            lsp_types::Range {
430                start: Position {
431                    line: 0,
432                    character: 0
433                },
434                end: Position {
435                    line: 0,
436                    character: length as u32
437                }
438            }
439        );
440
441        // Out-of-bounds check
442        assert_eq!(
443            document.range_at(0..(length + 5)),
444            Err(PositionError::WrongRange {
445                range: 0..(length + 5),
446                position_error: Box::new(PositionError::LineOutOfBound {
447                    offset: 42,
448                    length: 37
449                })
450            })
451        );
452    }
453
454    #[rstest]
455    fn offset_at(mut parser: Parser) {
456        let source = "Apples\nBashdjad\nashdkasdh\nasdsad";
457        let text = Text::new(source.into());
458        let document = Document::new(text, parser.parse(source, None).unwrap());
459
460        assert_eq!(&document.texter.br_indexes.0, &[0, 6, 15, 25]);
461
462        // Test for start of first line
463        assert_eq!(
464            document.offset_at(Position {
465                line: 0,
466                character: 0
467            }),
468            Some(0)
469        );
470
471        // Test for char at first line
472        assert_eq!(
473            document.offset_at(Position {
474                line: 0,
475                character: 5
476            }),
477            Some(5)
478        );
479
480        // Test for middle of second line (after "Bash")
481        assert_eq!(
482            document.offset_at(Position {
483                line: 1,
484                character: 3
485            }),
486            Some(10)
487        );
488
489        // Test for end of last line
490        assert_eq!(
491            document.offset_at(Position {
492                line: 3,
493                character: 5
494            }),
495            Some(31)
496        );
497
498        // Test for out of bounds position (line too high)
499        assert_eq!(
500            document.offset_at(Position {
501                line: 10,
502                character: 0
503            }),
504            None
505        );
506
507        // Test for out of bounds position (column too high)
508        assert_eq!(
509            document.offset_at(Position {
510                line: 1,
511                character: 100
512            }),
513            None
514        );
515    }
516
517    #[rstest]
518    fn line_tracking(mut parser: Parser) {
519        let source = "one\nline two\nline three\n";
520        let text = Text::new(source.into());
521        let document = Document::new(text, parser.parse(source, None).unwrap());
522
523        // Offset in line 0
524        let pos1 = document.position_at(2).unwrap();
525        assert_eq!(pos1.line, 0);
526        assert_eq!(get_last_line(), 0);
527
528        // Offset in line 1
529        let pos2 = document.position_at(6).unwrap();
530        assert_eq!(pos2.line, 1);
531        assert_eq!(get_last_line(), 1);
532
533        // Offset in line 2
534        let pos3 = document.position_at(18).unwrap();
535        assert_eq!(pos3.line, 2);
536        assert_eq!(get_last_line(), 2);
537
538        // Offset is ine line 0
539        // This should reset the last line index
540        let pos3 = document.position_at(0).unwrap();
541        assert_eq!(pos3.line, 0);
542        assert_eq!(get_last_line(), 0);
543    }
544}