Skip to main content

deb822_lossless/
parse.rs

1//! Parse wrapper type following rust-analyzer's pattern for thread-safe storage in Salsa.
2
3use crate::lossless::{Deb822, ParseError, PositionedParseError};
4use rowan::ast::AstNode;
5use rowan::{GreenNode, SyntaxNode};
6use std::marker::PhantomData;
7
8/// The result of parsing: a syntax tree and a collection of errors.
9///
10/// This type is designed to be stored in Salsa databases as it contains
11/// the thread-safe `GreenNode` instead of the non-thread-safe `SyntaxNode`.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct Parse<T> {
14    green: GreenNode,
15    errors: Vec<String>,
16    positioned_errors: Vec<PositionedParseError>,
17    _ty: PhantomData<T>,
18}
19
20impl<T> Parse<T> {
21    /// Create a new Parse result from a GreenNode and errors
22    pub fn new(green: GreenNode, errors: Vec<String>) -> Self {
23        Parse {
24            green,
25            errors,
26            positioned_errors: Vec::new(),
27            _ty: PhantomData,
28        }
29    }
30
31    /// Create a new Parse result from a GreenNode, errors, and positioned errors
32    pub fn new_with_positioned_errors(
33        green: GreenNode,
34        errors: Vec<String>,
35        positioned_errors: Vec<PositionedParseError>,
36    ) -> Self {
37        Parse {
38            green,
39            errors,
40            positioned_errors,
41            _ty: PhantomData,
42        }
43    }
44
45    /// Get the green node (thread-safe representation)
46    pub fn green(&self) -> &GreenNode {
47        &self.green
48    }
49
50    /// Get the syntax errors
51    pub fn errors(&self) -> &[String] {
52        &self.errors
53    }
54
55    /// Get parse errors with position information
56    pub fn positioned_errors(&self) -> &[PositionedParseError] {
57        &self.positioned_errors
58    }
59
60    /// Get parse errors as strings (for backward compatibility if needed)
61    pub fn error_messages(&self) -> Vec<String> {
62        self.positioned_errors
63            .iter()
64            .map(|e| e.message.clone())
65            .collect()
66    }
67
68    /// Check if there are any errors
69    pub fn ok(&self) -> bool {
70        self.errors.is_empty()
71    }
72
73    /// Convert to a Result, returning the tree if there are no errors
74    pub fn to_result(self) -> Result<T, ParseError>
75    where
76        T: AstNode<Language = crate::lossless::Lang>,
77    {
78        if self.errors.is_empty() {
79            let node = SyntaxNode::new_root_mut(self.green);
80            Ok(T::cast(node).expect("root node has wrong type"))
81        } else {
82            Err(ParseError(self.errors))
83        }
84    }
85
86    /// Get the parsed syntax tree
87    ///
88    /// Returns the syntax tree even if there are parse errors.
89    /// Use `errors()` or `positioned_errors()` to check for errors if needed.
90    /// This allows for error-resilient tooling that can work with partial/invalid input.
91    pub fn tree(&self) -> T
92    where
93        T: AstNode<Language = crate::lossless::Lang>,
94    {
95        let node = SyntaxNode::new_root_mut(self.green.clone());
96        T::cast(node).expect("root node has wrong type")
97    }
98
99    /// Get the syntax node
100    pub fn syntax_node(&self) -> SyntaxNode<crate::lossless::Lang> {
101        SyntaxNode::new_root(self.green.clone())
102    }
103}
104
105// Implement Send + Sync since GreenNode is thread-safe
106unsafe impl<T> Send for Parse<T> {}
107unsafe impl<T> Sync for Parse<T> {}
108
109impl Parse<Deb822> {
110    /// Parse deb822 text, returning a Parse result
111    pub fn parse_deb822(text: &str) -> Self {
112        let parsed = crate::lossless::parse(text);
113        Parse::new_with_positioned_errors(
114            parsed.green_node,
115            parsed.errors,
116            parsed.positioned_errors,
117        )
118    }
119
120    /// Incrementally reparse after a text edit.
121    ///
122    /// Given the new full text and the range that was edited (in the *new* text
123    /// coordinates after the edit has been applied), this tries to reuse
124    /// unchanged paragraphs from the previous parse and only reparse the
125    /// affected region.
126    ///
127    /// Falls back to a full reparse if the edit spans the entire file or if
128    /// incremental reparsing is not beneficial.
129    pub fn reparse(&self, new_text: &str, edit: rowan::TextRange) -> Self {
130        use rowan::TextSize;
131
132        let root = &self.green;
133
134        // Collect children with their text ranges
135        let mut children: Vec<(
136            rowan::NodeOrToken<&rowan::GreenNodeData, &rowan::GreenTokenData>,
137            TextSize,
138            TextSize,
139        )> = Vec::new();
140        let mut offset = TextSize::from(0);
141        for child in root.children() {
142            let len = match &child {
143                rowan::NodeOrToken::Node(n) => n.text_len(),
144                rowan::NodeOrToken::Token(t) => t.text_len(),
145            };
146            children.push((child, offset, offset + len));
147            offset += len;
148        }
149
150        let old_len = offset;
151
152        // If there are very few children, just do a full reparse
153        if children.len() <= 2 {
154            return Self::parse_deb822(new_text);
155        }
156
157        // Find the range of children affected by the edit.
158        // The edit range is in terms of the *new* text. We need to figure out
159        // which old children overlap. The edit replaces old text
160        // [edit.start .. edit.start + old_len - new_len + edit.len()] with
161        // new text [edit.start .. edit.end].
162        let new_len = TextSize::of(new_text);
163        let len_delta: i64 = i64::from(u32::from(new_len)) - i64::from(u32::from(old_len));
164
165        // In old-text coordinates, the edit covered:
166        let old_edit_start = edit.start();
167        let old_edit_end = TextSize::from((i64::from(u32::from(edit.end())) - len_delta) as u32);
168
169        // Find first and last affected child indices.
170        // Use >= for the first child to catch inserts at child boundaries.
171        let first_affected = children
172            .iter()
173            .position(|(_, _, end)| *end >= old_edit_start);
174        let last_affected = children
175            .iter()
176            .rposition(|(_, start, _)| *start <= old_edit_end);
177
178        let (first_affected, last_affected) = match (first_affected, last_affected) {
179            (Some(f), Some(l)) => (f, l),
180            _ => return Self::parse_deb822(new_text),
181        };
182
183        // Expand to paragraph boundaries: find the text region in the *new*
184        // text that covers all affected children.  We use the old children
185        // offsets for the prefix (unchanged) and derive the suffix.
186        let reparse_start = children[first_affected].1;
187        let reparse_old_end = children[last_affected].2;
188
189        // In new-text coordinates, the end of the affected region is shifted
190        // by the length delta.
191        let reparse_new_end =
192            TextSize::from((i64::from(u32::from(reparse_old_end)) + len_delta) as u32);
193
194        // Bounds check
195        if u32::from(reparse_start) > u32::from(new_len)
196            || u32::from(reparse_new_end) > u32::from(new_len)
197        {
198            return Self::parse_deb822(new_text);
199        }
200
201        let reparse_slice = &new_text[usize::from(reparse_start)..usize::from(reparse_new_end)];
202
203        // Parse just the affected region
204        let reparsed = crate::lossless::parse(reparse_slice);
205        let reparsed_root = reparsed.green_node;
206
207        // Build the new root by splicing: prefix children + reparsed children + suffix children
208        let to_owned = |c: &rowan::NodeOrToken<&rowan::GreenNodeData, &rowan::GreenTokenData>| -> rowan::NodeOrToken<GreenNode, rowan::GreenToken> {
209            match c {
210                rowan::NodeOrToken::Node(n) => rowan::NodeOrToken::Node((*n).to_owned()),
211                rowan::NodeOrToken::Token(t) => rowan::NodeOrToken::Token((*t).to_owned()),
212            }
213        };
214        let mut new_root_children = Vec::new();
215        for (c, _, _) in &children[..first_affected] {
216            new_root_children.push(to_owned(c));
217        }
218        for c in reparsed_root.children() {
219            new_root_children.push(c.to_owned());
220        }
221        for (c, _, _) in &children[last_affected + 1..] {
222            new_root_children.push(to_owned(c));
223        }
224
225        let new_green = GreenNode::new(
226            rowan::SyntaxKind(crate::lex::SyntaxKind::ROOT as u16),
227            new_root_children,
228        );
229
230        // Collect errors: errors from unchanged prefix/suffix are tricky to
231        // preserve with correct offsets, so we re-collect from scratch.
232        // For a full solution we'd need to offset-shift the old errors, but
233        // since most files are error-free, a full error re-scan is acceptable.
234        // We use the reparsed errors (offset-shifted) plus any errors from
235        // children outside the reparsed region.
236        // For simplicity, combine positioned errors from the reparse
237        // (shifted to absolute positions) with a note that prefix/suffix
238        // errors are dropped. TODO: preserve prefix/suffix errors with shifted offsets.
239        let positioned_errors: Vec<_> = reparsed
240            .positioned_errors
241            .iter()
242            .map(|e| PositionedParseError {
243                message: e.message.clone(),
244                range: rowan::TextRange::new(
245                    e.range.start() + reparse_start,
246                    e.range.end() + reparse_start,
247                ),
248                code: e.code.clone(),
249            })
250            .collect();
251        let errors: Vec<_> = positioned_errors
252            .iter()
253            .map(|e| e.message.clone())
254            .collect();
255
256        Parse::new_with_positioned_errors(new_green, errors, positioned_errors)
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn test_positioned_errors_api() {
266        let input = "Invalid field without colon\nBroken: field: extra colon\n";
267        let parsed = Parse::<Deb822>::parse_deb822(input);
268
269        // Should have positioned errors
270        let positioned_errors = parsed.positioned_errors();
271        assert!(!positioned_errors.is_empty());
272
273        // Should still have string errors for backward compatibility
274        let string_errors = parsed.errors();
275        assert!(!string_errors.is_empty());
276
277        // Should be able to get error messages
278        let error_messages = parsed.error_messages();
279        assert_eq!(error_messages.len(), positioned_errors.len());
280
281        for (i, positioned_error) in positioned_errors.iter().enumerate() {
282            assert_eq!(positioned_error.message, error_messages[i]);
283            assert!(positioned_error.range.start() <= positioned_error.range.end());
284        }
285    }
286
287    #[test]
288    fn test_positioned_errors_example() {
289        // Example from the requirements document
290        let input = "Invalid: field\nBroken field without colon";
291        let parsed = Parse::<Deb822>::parse_deb822(input);
292
293        let positioned_errors = parsed.positioned_errors();
294        assert!(!positioned_errors.is_empty());
295
296        // Example usage like in a language server
297        for error in positioned_errors {
298            let start_offset: u32 = error.range.start().into();
299            let end_offset: u32 = error.range.end().into();
300            println!(
301                "Error at {:?} ({}..{}): {}",
302                error.range, start_offset, end_offset, error.message
303            );
304
305            // Verify we can extract the problematic text
306            if end_offset <= input.len() as u32 {
307                let error_text = &input[start_offset as usize..end_offset as usize];
308                assert!(!error_text.is_empty());
309            }
310        }
311    }
312
313    /// Helper: parse old text, apply edit to get new text, reparse incrementally,
314    /// and verify the result matches a full parse of the new text.
315    fn assert_incremental_matches_full(
316        old_text: &str,
317        edit_start: u32,
318        edit_end: u32,
319        replacement: &str,
320    ) {
321        let parsed = Parse::<Deb822>::parse_deb822(old_text);
322        let mut new_text = old_text.to_string();
323        new_text.replace_range(edit_start as usize..edit_end as usize, replacement);
324
325        // The edit range in new-text coordinates
326        let new_edit_end = edit_start + replacement.len() as u32;
327        let edit_range = rowan::TextRange::new(
328            rowan::TextSize::from(edit_start),
329            rowan::TextSize::from(new_edit_end),
330        );
331
332        let incremental = parsed.reparse(&new_text, edit_range);
333        let full = Parse::<Deb822>::parse_deb822(&new_text);
334
335        // The trees must produce identical text
336        let inc_tree = incremental.tree();
337        let full_tree = full.tree();
338        assert_eq!(
339            inc_tree.syntax().text().to_string(),
340            full_tree.syntax().text().to_string(),
341            "tree text mismatch for edit [{edit_start}..{edit_end}] -> {replacement:?}"
342        );
343
344        // Green nodes should be structurally equal
345        assert_eq!(
346            incremental.green(),
347            full.green(),
348            "green node mismatch for edit [{edit_start}..{edit_end}] -> {replacement:?}"
349        );
350    }
351
352    #[test]
353    fn test_reparse_edit_within_first_paragraph() {
354        let old = "Source: foo\nMaintainer: Alice\n\nPackage: bar\nArchitecture: all\n";
355        // Change "foo" to "baz" (bytes 8..11)
356        assert_incremental_matches_full(old, 8, 11, "baz");
357    }
358
359    #[test]
360    fn test_reparse_edit_within_second_paragraph() {
361        let old = "Source: foo\nMaintainer: Alice\n\nPackage: bar\nArchitecture: all\n";
362        // Change "bar" to "qux" (bytes 38..41)
363        assert_incremental_matches_full(old, 38, 41, "qux");
364    }
365
366    #[test]
367    fn test_reparse_insert_field_in_paragraph() {
368        let old = "Source: foo\n\nPackage: bar\nArchitecture: all\n";
369        // Insert "Section: net\n" after "Source: foo\n" (at byte 12)
370        assert_incremental_matches_full(old, 12, 12, "Section: net\n");
371    }
372
373    #[test]
374    fn test_reparse_delete_field() {
375        let old = "Source: foo\nSection: net\nMaintainer: Alice\n\nPackage: bar\n";
376        // Delete "Section: net\n" (bytes 12..25)
377        assert_incremental_matches_full(old, 12, 25, "");
378    }
379
380    #[test]
381    fn test_reparse_edit_spanning_paragraph_boundary() {
382        let old = "Source: foo\n\nPackage: bar\n\nPackage: baz\n";
383        // Delete the blank line and merge paragraphs (bytes 11..13)
384        assert_incremental_matches_full(old, 11, 13, "\n");
385    }
386
387    #[test]
388    fn test_reparse_single_paragraph() {
389        // Only one paragraph — should fall back to full reparse
390        let old = "Source: foo\nMaintainer: Alice\n";
391        assert_incremental_matches_full(old, 8, 11, "bar");
392    }
393
394    #[test]
395    fn test_reparse_add_new_paragraph() {
396        let old = "Source: foo\n\nPackage: bar\n";
397        // Add a new paragraph at the end
398        assert_incremental_matches_full(old, 25, 25, "\nPackage: baz\nArchitecture: any\n");
399    }
400
401    #[test]
402    fn test_tree_with_errors_does_not_panic() {
403        // Verify that tree() returns a tree even when there are parse errors,
404        // enabling error-resilient tooling
405        let input = "Invalid field without colon\nBroken: field: extra colon\n";
406        let parsed = Parse::<Deb822>::parse_deb822(input);
407
408        // Verify there are errors
409        assert!(!parsed.errors().is_empty());
410
411        // tree() should not panic despite errors
412        let tree = parsed.tree();
413
414        // Verify we got a valid syntax tree
415        assert!(tree.syntax().text().to_string() == input);
416    }
417}