Skip to main content

svelte_syntax/
cst.rs

1use tree_sitter::{InputEdit, Node, Parser, Point, Tree};
2
3use crate::error::CompileError;
4use crate::primitives::{BytePos, Span};
5use crate::source::SourceText;
6
7/// Languages supported by the CST parser.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Language {
10    /// The Svelte component language.
11    Svelte,
12}
13
14/// A parsed tree-sitter document holding the source text, language, and
15/// concrete syntax tree.
16///
17/// Use [`parse_svelte`] to create a `Document` from source text, or
18/// [`CstParser`] for more control over parser reuse.
19#[derive(Debug)]
20pub struct Document<'src> {
21    /// The language this document was parsed as.
22    pub language: Language,
23    /// The original source text.
24    pub source: SourceText<'src>,
25    /// The tree-sitter syntax tree.
26    pub tree: Tree,
27}
28
29impl<'src> Document<'src> {
30    /// Return the root tree-sitter node.
31    pub fn root_node(&self) -> Node<'_> {
32        self.tree.root_node()
33    }
34
35    /// Return the root node kind.
36    pub fn root_kind(&self) -> &str {
37        self.root_node().kind()
38    }
39
40    /// Return `true` if the CST contains parse errors.
41    pub fn has_error(&self) -> bool {
42        self.root_node().has_error()
43    }
44
45    /// Return the root node span in byte offsets.
46    pub fn root_span(&self) -> Span {
47        node_span(self.root_node())
48    }
49
50    /// Apply an edit to the stored tree so it can be reused for incremental reparsing.
51    pub fn apply_edit(&mut self, edit: CstEdit) {
52        self.tree.edit(&edit.into_input_edit());
53    }
54
55    /// Clone the tree for incremental parsing. The source text reference is
56    /// preserved but the tree is cloned so `apply_edit` can be called on the
57    /// copy without mutating the original.
58    pub fn clone_for_incremental(&self) -> Document<'src> {
59        Document {
60            language: self.language,
61            source: self.source,
62            tree: self.tree.clone(),
63        }
64    }
65
66    /// Return byte ranges that differ structurally between this document and a
67    /// previously parsed document. Wraps [`Tree::changed_ranges`].
68    pub fn changed_ranges(&self, old: &Document<'_>) -> Vec<std::ops::Range<usize>> {
69        old.tree
70            .changed_ranges(&self.tree)
71            .map(|r| r.start_byte..r.end_byte)
72            .collect()
73    }
74}
75
76/// A row/column position in source text, used by [`CstEdit`].
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub struct CstPoint {
79    /// Zero-based line number.
80    pub row: usize,
81    /// Zero-based byte column within the line.
82    pub column: usize,
83}
84
85/// Describes a text edit for incremental reparsing.
86///
87/// Records the byte range that was replaced and the resulting positions after
88/// the edit. Use the convenience constructors [`CstEdit::replace`],
89/// [`CstEdit::insert`], and [`CstEdit::delete`] to build edits from the old
90/// source text.
91///
92/// # Example
93///
94/// ```
95/// use svelte_syntax::CstEdit;
96///
97/// let old = "<div>Hello</div>";
98/// let edit = CstEdit::replace(old, 5, 10, "World");
99///
100/// assert_eq!(edit.start_byte, 5);
101/// assert_eq!(edit.new_end_byte, 10); // 5 + "World".len()
102/// ```
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct CstEdit {
105    /// Byte offset where the edit begins.
106    pub start_byte: usize,
107    /// Byte offset where the old text ended (before the edit).
108    pub old_end_byte: usize,
109    /// Byte offset where the new text ends (after the edit).
110    pub new_end_byte: usize,
111    /// Row/column position where the edit begins.
112    pub start_position: CstPoint,
113    /// Row/column position where the old text ended.
114    pub old_end_position: CstPoint,
115    /// Row/column position where the new text ends.
116    pub new_end_position: CstPoint,
117}
118
119impl CstEdit {
120    /// Create an edit that replaces `old_source[start_byte..old_end_byte]` with
121    /// `new_text`. Positions are computed automatically from the old source.
122    pub fn replace(
123        old_source: &str,
124        start_byte: usize,
125        old_end_byte: usize,
126        new_text: &str,
127    ) -> Self {
128        let start_position = byte_point_at_offset(old_source, start_byte);
129        let old_end_position = byte_point_at_offset(old_source, old_end_byte);
130        let new_end_byte = start_byte.saturating_add(new_text.len());
131        let new_end_position = advance_point(start_position, new_text);
132
133        Self {
134            start_byte,
135            old_end_byte,
136            new_end_byte,
137            start_position,
138            old_end_position,
139            new_end_position,
140        }
141    }
142
143    /// Create an edit that inserts `new_text` at `start_byte` without removing
144    /// any existing text.
145    pub fn insert(old_source: &str, start_byte: usize, new_text: &str) -> Self {
146        Self::replace(old_source, start_byte, start_byte, new_text)
147    }
148
149    /// Create an edit that deletes `old_source[start_byte..old_end_byte]`.
150    pub fn delete(old_source: &str, start_byte: usize, old_end_byte: usize) -> Self {
151        Self::replace(old_source, start_byte, old_end_byte, "")
152    }
153
154    fn into_input_edit(self) -> InputEdit {
155        InputEdit {
156            start_byte: self.start_byte,
157            old_end_byte: self.old_end_byte,
158            new_end_byte: self.new_end_byte,
159            start_position: self.start_position.into_point(),
160            old_end_position: self.old_end_position.into_point(),
161            new_end_position: self.new_end_position.into_point(),
162        }
163    }
164}
165
166impl CstPoint {
167    fn into_point(self) -> Point {
168        Point {
169            row: self.row,
170            column: self.column,
171        }
172    }
173}
174
175/// Typestate marker for a parser before a language has been selected.
176pub struct Unconfigured;
177/// Typestate marker for a parser after a language has been selected.
178pub struct Configured {
179    language: Language,
180}
181
182/// Tree-sitter-backed CST parser with typestate for language selection.
183///
184/// Create a parser with [`CstParser::new`], configure it with
185/// [`CstParser::configure`], then call [`parse`](CstParser::parse) or
186/// [`parse_incremental`](CstParser::parse_incremental).
187///
188/// For a simpler one-shot API, use the free function [`parse_svelte`].
189///
190/// # Example
191///
192/// ```
193/// use svelte_syntax::cst::{CstParser, Language};
194/// use svelte_syntax::{SourceId, SourceText};
195///
196/// let mut parser = CstParser::new().configure(Language::Svelte)?;
197/// let source = SourceText::new(SourceId::new(0), "<p>hi</p>", None);
198/// let doc = parser.parse(source)?;
199///
200/// assert_eq!(doc.root_kind(), "document");
201/// # Ok::<(), svelte_syntax::CompileError>(())
202/// ```
203pub struct CstParser<State> {
204    parser: Parser,
205    state: State,
206}
207
208impl CstParser<Unconfigured> {
209    /// Create a parser with no configured language.
210    pub fn new() -> Self {
211        Self {
212            parser: Parser::new(),
213            state: Unconfigured,
214        }
215    }
216
217    /// Configure the parser for a supported language.
218    pub fn configure(mut self, language: Language) -> Result<CstParser<Configured>, CompileError> {
219        let ts_lang = match language {
220            Language::Svelte => tree_sitter_svelte::language(),
221        };
222
223        self.parser
224            .set_language(&ts_lang)
225            .map_err(|_| CompileError::internal("failed to configure tree-sitter language"))?;
226
227        Ok(CstParser {
228            parser: self.parser,
229            state: Configured { language },
230        })
231    }
232}
233
234impl Default for CstParser<Unconfigured> {
235    fn default() -> Self {
236        Self::new()
237    }
238}
239
240impl CstParser<Configured> {
241    /// Parse source text into a CST document.
242    pub fn parse<'src>(
243        &mut self,
244        source: SourceText<'src>,
245    ) -> Result<Document<'src>, CompileError> {
246        let tree = self
247            .parser
248            .parse(source.text, None)
249            .ok_or_else(|| CompileError::internal("tree-sitter parser returned no syntax tree"))?;
250
251        Ok(Document {
252            language: self.state.language,
253            source,
254            tree,
255        })
256    }
257
258    /// Parse source text using a previous tree plus edit information for incremental reparsing.
259    pub fn parse_incremental<'src>(
260        &mut self,
261        source: SourceText<'src>,
262        previous: &Document<'_>,
263        edit: CstEdit,
264    ) -> Result<Document<'src>, CompileError> {
265        let mut previous_tree = previous.tree.clone();
266        previous_tree.edit(&edit.into_input_edit());
267
268        let tree = self
269            .parser
270            .parse(source.text, Some(&previous_tree))
271            .ok_or_else(|| CompileError::internal("tree-sitter parser returned no syntax tree"))?;
272
273        Ok(Document {
274            language: self.state.language,
275            source,
276            tree,
277        })
278    }
279}
280
281/// Parse Svelte source into a tree-sitter CST document.
282///
283/// This is the simplest way to obtain a concrete syntax tree. For parser
284/// reuse across multiple files, use [`CstParser`] directly.
285///
286/// # Example
287///
288/// ```
289/// use svelte_syntax::{SourceId, SourceText, parse_svelte};
290///
291/// let source = SourceText::new(SourceId::new(0), "<div>hello</div>", None);
292/// let cst = parse_svelte(source)?;
293///
294/// assert_eq!(cst.root_kind(), "document");
295/// # Ok::<(), svelte_syntax::CompileError>(())
296/// ```
297pub fn parse_svelte<'src>(source: SourceText<'src>) -> Result<Document<'src>, CompileError> {
298    let mut parser = CstParser::new().configure(Language::Svelte)?;
299    parser.parse(source)
300}
301
302/// Parse Svelte source using an already-edited old tree for incremental reparsing.
303/// Unlike `parse_svelte_incremental`, this expects the caller to have already
304/// called `apply_edit` on the old document.
305pub fn parse_svelte_with_old_tree<'src>(
306    source: SourceText<'src>,
307    edited_old: &Document<'_>,
308) -> Result<Document<'src>, CompileError> {
309    let ts_lang = match edited_old.language {
310        Language::Svelte => tree_sitter_svelte::language(),
311    };
312    let mut parser = Parser::new();
313    parser
314        .set_language(&ts_lang)
315        .map_err(|_| CompileError::internal("failed to configure tree-sitter language"))?;
316    let tree = parser
317        .parse(source.text, Some(&edited_old.tree))
318        .ok_or_else(|| CompileError::internal("tree-sitter parser returned no syntax tree"))?;
319    Ok(Document {
320        language: edited_old.language,
321        source,
322        tree,
323    })
324}
325
326/// Parse Svelte source into a tree-sitter CST using a previous CST and edit for incremental reparsing.
327pub fn parse_svelte_incremental<'src>(
328    source: SourceText<'src>,
329    previous: &Document<'_>,
330    edit: CstEdit,
331) -> Result<Document<'src>, CompileError> {
332    let mut parser = CstParser::new().configure(Language::Svelte)?;
333    parser.parse_incremental(source, previous, edit)
334}
335
336fn node_span(node: Node<'_>) -> Span {
337    let start = byte_pos_saturating(node.start_byte());
338    let end = byte_pos_saturating(node.end_byte());
339    Span::new(start, end)
340}
341
342fn byte_pos_saturating(offset: usize) -> BytePos {
343    u32::try_from(offset)
344        .map(BytePos::from)
345        .unwrap_or_else(|_| BytePos::from(u32::MAX))
346}
347
348fn byte_point_at_offset(source: &str, offset: usize) -> CstPoint {
349    let bounded = offset.min(source.len());
350    let mut row = 0usize;
351    let mut column = 0usize;
352
353    for byte in source.as_bytes().iter().take(bounded) {
354        if *byte == b'\n' {
355            row += 1;
356            column = 0;
357        } else {
358            column += 1;
359        }
360    }
361
362    CstPoint { row, column }
363}
364
365fn advance_point(start: CstPoint, inserted_text: &str) -> CstPoint {
366    let mut point = start;
367
368    for byte in inserted_text.as_bytes() {
369        if *byte == b'\n' {
370            point.row += 1;
371            point.column = 0;
372        } else {
373            point.column += 1;
374        }
375    }
376
377    point
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use crate::primitives::SourceId;
384
385    #[test]
386    fn parses_svelte_cst_document() {
387        let source = SourceText::new(SourceId::new(1), "<div>Hello</div>", None);
388        let cst = parse_svelte(source).expect("expected tree-sitter CST parse to succeed");
389
390        assert!(!cst.root_kind().is_empty());
391        assert!(cst.root_span().end.as_usize() >= cst.source.len());
392    }
393
394    #[test]
395    fn cst_contains_attribute_nodes() {
396        let source = SourceText::new(SourceId::new(2), "<div class='foo'></div>", None);
397        let cst = parse_svelte(source).expect("expected cst parse to succeed");
398        let sexp = cst.root_node().to_sexp();
399
400        assert!(sexp.contains("(attribute"));
401        assert!(sexp.contains("(attribute_name"));
402    }
403
404    #[test]
405    fn cst_style_directive_shape() {
406        let source = SourceText::new(SourceId::new(3), "<div style:color={myColor}></div>", None);
407        let cst = parse_svelte(source).expect("expected cst parse to succeed");
408        let sexp = cst.root_node().to_sexp();
409
410        assert!(sexp.contains("attribute_directive"));
411        assert!(sexp.contains("attribute_identifier"));
412    }
413
414    #[test]
415    fn cst_if_block_shape() {
416        let source = SourceText::new(SourceId::new(4), "{#if foo}bar{/if}", None);
417        let cst = parse_svelte(source).expect("expected cst parse to succeed");
418        let sexp = cst.root_node().to_sexp();
419
420        assert!(sexp.contains("if_block"));
421        assert!(sexp.contains("block_end"));
422    }
423
424    #[test]
425    fn cst_breaks_unterminated_tags_before_block_branches() {
426        let source = SourceText::new(
427            SourceId::new(5),
428            "{#if true}\n\t<input>\n{:else}\n{/if}\n\n{#await true}\n\t<input>\n{:then f}\n{/await}",
429            None,
430        );
431        let cst = parse_svelte(source).expect("expected cst parse to succeed");
432        let sexp = cst.root_node().to_sexp();
433
434        assert!(sexp.matches("(else_clause").count() + sexp.matches("(await_branch").count() >= 2);
435    }
436
437    #[test]
438    fn cst_directive_and_debug_tag_shapes() {
439        let source = SourceText::new(
440            SourceId::new(6),
441            "<div let:x style:color={c} transition:fade={t} animate:flip={a} use:act={u}></div>{@debug x, y}",
442            None,
443        );
444        let cst = parse_svelte(source).expect("expected cst parse to succeed");
445        let sexp = cst.root_node().to_sexp();
446
447        assert!(sexp.contains("attribute_name"));
448        assert!(sexp.contains("debug_tag"));
449        assert!(sexp.contains("expression_value"));
450    }
451
452    #[test]
453    fn cst_malformed_snippet_headers_report_error_shape() {
454        let source = SourceText::new(SourceId::new(7), "{#snippet children()hi{/snippet}", None);
455        let cst = parse_svelte(source).expect("expected cst parse to succeed");
456        let sexp = cst.root_node().to_sexp();
457        assert!(
458            cst.has_error(),
459            "expected malformed snippet header CST error"
460        );
461        assert!(sexp.contains("(snippet_name"));
462
463        let source = SourceText::new(SourceId::new(8), "{#snippet children(hi{/snippet}", None);
464        let cst = parse_svelte(source).expect("expected cst parse to succeed");
465        let sexp = cst.root_node().to_sexp();
466        assert!(sexp.contains("(snippet_name"));
467        assert!(sexp.contains("(snippet_parameters"));
468    }
469
470    #[test]
471    fn incremental_parse_matches_fresh_parse_after_insert() {
472        let before_text = "<div>Hello</div>";
473        let after_text = "<div>Hello {name}</div>";
474        let before = SourceText::new(SourceId::new(9), before_text, None);
475        let after = SourceText::new(SourceId::new(10), after_text, None);
476
477        let mut parser = CstParser::new()
478            .configure(Language::Svelte)
479            .expect("parser");
480        let previous = parser.parse(before).expect("initial parse");
481        let edit = CstEdit::insert(before_text, "<div>Hello".len(), " {name}");
482
483        let incremental = parser
484            .parse_incremental(after, &previous, edit)
485            .expect("incremental parse");
486        let fresh = parse_svelte(after).expect("fresh parse");
487
488        assert_eq!(
489            incremental.root_node().to_sexp(),
490            fresh.root_node().to_sexp()
491        );
492    }
493
494    #[test]
495    fn document_apply_edit_keeps_tree_reusable() {
496        let before_text = "<div>Hello</div>";
497        let after_text = "<div>Hi</div>";
498        let before = SourceText::new(SourceId::new(11), before_text, None);
499        let after = SourceText::new(SourceId::new(12), after_text, None);
500
501        let mut parser = CstParser::new()
502            .configure(Language::Svelte)
503            .expect("parser");
504        let mut previous = parser.parse(before).expect("initial parse");
505        let edit = CstEdit::replace(before_text, "<div>".len(), "<div>Hello".len(), "Hi");
506        previous.apply_edit(edit.clone());
507
508        let incremental = parser
509            .parse_incremental(after, &previous, edit)
510            .expect("incremental parse");
511        let fresh = parse_svelte(after).expect("fresh parse");
512
513        assert_eq!(
514            incremental.root_node().to_sexp(),
515            fresh.root_node().to_sexp()
516        );
517    }
518}