Skip to main content

tree_sitter_language_pack/
parsing.rs

1//! Universal parser/tree/node surface over `tree_sitter`.
2//!
3//! These wrappers own the relationship between a [`Tree`] and the [`Node`]s
4//! and [`TreeCursor`]s derived from it. Upstream `tree_sitter::Node<'a>` and
5//! `tree_sitter::TreeCursor<'a>` borrow from `tree_sitter::Tree`; that
6//! borrow can't be propagated across FFI boundaries, so each [`Node`] and
7//! [`TreeCursor`] here owns an `Arc<tree_sitter::Tree>` and stores the raw
8//! upstream value with an extended `'static` lifetime. The `Arc<>` keeps
9//! the underlying tree alive for as long as any derived node or cursor
10//! exists.
11//!
12//! Cloning a [`Tree`] (and therefore any [`Node`] or [`TreeCursor`]) is
13//! cheap — one atomic refcount bump.
14
15use std::sync::Arc;
16
17use crate::error::Error;
18
19/// A source position — row + column, zero-indexed.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub struct Point {
23    /// Zero-indexed row number.
24    pub row: usize,
25    /// Zero-indexed column number, in UTF-16 code units.
26    pub column: usize,
27}
28
29impl Point {
30    /// Construct a [`Point`] from row and column.
31    #[must_use]
32    pub fn new(row: usize, column: usize) -> Self {
33        Self { row, column }
34    }
35}
36
37impl From<tree_sitter::Point> for Point {
38    fn from(p: tree_sitter::Point) -> Self {
39        Self {
40            row: p.row,
41            column: p.column,
42        }
43    }
44}
45
46/// A byte range — start (inclusive) to end (exclusive).
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct ByteRange {
50    /// Inclusive start byte offset.
51    pub start: usize,
52    /// Exclusive end byte offset.
53    pub end: usize,
54}
55
56/// A tree-sitter parser configured for one language at a time.
57///
58/// # Example
59///
60/// ```no_run
61/// use tree_sitter_language_pack::Parser;
62///
63/// let mut parser = Parser::new();
64/// parser.set_language("python")?;
65/// let tree = parser.parse("def hello(): pass").expect("parse failed");
66/// assert_eq!(tree.root_node().kind(), "module");
67/// # Ok::<(), tree_sitter_language_pack::Error>(())
68/// ```
69pub struct Parser {
70    inner: tree_sitter::Parser,
71}
72
73impl Parser {
74    /// Construct a new parser with no language set.
75    ///
76    /// Call [`Parser::set_language`] before parsing.
77    #[must_use]
78    pub fn new() -> Self {
79        Self {
80            inner: tree_sitter::Parser::new(),
81        }
82    }
83
84    /// Configure the parser to use the language identified by name (e.g. `"python"`).
85    ///
86    /// Resolves the language through the global registry — auto-downloading
87    /// if necessary, when the `download` feature is enabled.
88    ///
89    /// # Errors
90    ///
91    /// Returns [`Error::LanguageNotFound`] if the language is not recognized,
92    /// or [`Error::ParserSetup`] if the language ABI is incompatible.
93    pub fn set_language(&mut self, name: &str) -> Result<(), Error> {
94        let language = crate::get_language(name)?;
95        self.inner
96            .set_language(&language)
97            .map_err(|e| Error::ParserSetup(format!("{e}")))
98    }
99
100    /// Parse a UTF-8 source string. Returns `None` if parsing was cancelled
101    /// or no language is set.
102    #[must_use]
103    pub fn parse(&mut self, source: &str) -> Option<Tree> {
104        self.inner.parse(source, None).map(|t| Tree(Arc::new(t)))
105    }
106
107    /// Parse a raw byte slice. Returns `None` if parsing was cancelled or
108    /// no language is set.
109    #[must_use]
110    pub fn parse_bytes(&mut self, source: &[u8]) -> Option<Tree> {
111        self.inner.parse(source, None).map(|t| Tree(Arc::new(t)))
112    }
113
114    /// Reset internal state. The next call to [`parse`](Self::parse) will
115    /// not be incremental.
116    pub fn reset(&mut self) {
117        self.inner.reset();
118    }
119}
120
121impl Default for Parser {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127/// A parsed syntax tree. Cheap to clone (refcount bump).
128#[derive(Clone)]
129pub struct Tree(Arc<tree_sitter::Tree>);
130
131impl Tree {
132    /// Return the root [`Node`] of this tree.
133    #[must_use]
134    pub fn root_node(&self) -> Node {
135        // SAFETY: Node holds an `Arc<tree_sitter::Tree>` that keeps the
136        // upstream tree alive for the entire lifetime of `raw`. Extending
137        // the borrow to 'static is valid because the Arc owns the
138        // backing storage; no aliasing rules are violated because every
139        // Node clone increments the Arc refcount.
140        let raw: tree_sitter::Node<'static> = unsafe { std::mem::transmute(self.0.root_node()) };
141        Node {
142            tree: Arc::clone(&self.0),
143            raw,
144        }
145    }
146
147    /// Return a [`TreeCursor`] positioned at the root.
148    #[must_use]
149    pub fn walk(&self) -> TreeCursor {
150        // SAFETY: same justification as `root_node` — TreeCursor owns the
151        // Arc<Tree> that keeps the backing tree alive.
152        let raw: tree_sitter::TreeCursor<'static> = unsafe { std::mem::transmute(self.0.walk()) };
153        TreeCursor {
154            tree: Arc::clone(&self.0),
155            raw,
156        }
157    }
158}
159
160/// A single syntax node within a [`Tree`].
161///
162/// Nodes hold a strong reference to their parent tree so they remain valid
163/// regardless of how the tree is moved or stored at the FFI boundary.
164pub struct Node {
165    tree: Arc<tree_sitter::Tree>,
166    raw: tree_sitter::Node<'static>,
167}
168
169impl Clone for Node {
170    fn clone(&self) -> Self {
171        Self {
172            tree: Arc::clone(&self.tree),
173            raw: self.raw,
174        }
175    }
176}
177
178impl Node {
179    /// Return the node's kind name (e.g. `"function_definition"`).
180    #[must_use]
181    pub fn kind(&self) -> String {
182        self.raw.kind().to_string()
183    }
184
185    /// Return the node's numeric kind ID.
186    #[must_use]
187    pub fn kind_id(&self) -> u16 {
188        self.raw.kind_id()
189    }
190
191    /// Return the inclusive start byte offset of this node.
192    #[must_use]
193    pub fn start_byte(&self) -> usize {
194        self.raw.start_byte()
195    }
196
197    /// Return the exclusive end byte offset of this node.
198    #[must_use]
199    pub fn end_byte(&self) -> usize {
200        self.raw.end_byte()
201    }
202
203    /// Return the node's byte range as a [`ByteRange`].
204    ///
205    /// Callers should slice their own source bytes — this is a zero-copy
206    /// text accessor.
207    #[must_use]
208    pub fn byte_range(&self) -> ByteRange {
209        let r = self.raw.byte_range();
210        ByteRange {
211            start: r.start,
212            end: r.end,
213        }
214    }
215
216    /// Return the start [`Point`] (row, column).
217    #[must_use]
218    pub fn start_position(&self) -> Point {
219        self.raw.start_position().into()
220    }
221
222    /// Return the end [`Point`] (row, column).
223    #[must_use]
224    pub fn end_position(&self) -> Point {
225        self.raw.end_position().into()
226    }
227
228    /// True when this node is named (not punctuation/whitespace).
229    #[must_use]
230    pub fn is_named(&self) -> bool {
231        self.raw.is_named()
232    }
233
234    /// True when this is an error node.
235    #[must_use]
236    pub fn is_error(&self) -> bool {
237        self.raw.is_error()
238    }
239
240    /// True when this is a missing-token node.
241    #[must_use]
242    pub fn is_missing(&self) -> bool {
243        self.raw.is_missing()
244    }
245
246    /// True when this is an "extra" node (e.g. a comment).
247    #[must_use]
248    pub fn is_extra(&self) -> bool {
249        self.raw.is_extra()
250    }
251
252    /// True when this node or any descendant is an error.
253    #[must_use]
254    pub fn has_error(&self) -> bool {
255        self.raw.has_error()
256    }
257
258    /// Return this node's parent, if any.
259    #[must_use]
260    pub fn parent(&self) -> Option<Node> {
261        // SAFETY: the returned Node holds Arc<Tree>, keeping the parent
262        // tree alive while the lifetime-extended raw is used.
263        self.raw.parent().map(|raw| Node {
264            tree: Arc::clone(&self.tree),
265            raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
266        })
267    }
268
269    /// Return the i-th child of this node, if any.
270    #[must_use]
271    pub fn child(&self, index: u32) -> Option<Node> {
272        // SAFETY: see `parent`.
273        self.raw.child(index).map(|raw| Node {
274            tree: Arc::clone(&self.tree),
275            raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
276        })
277    }
278
279    /// Total number of children (including unnamed).
280    #[must_use]
281    pub fn child_count(&self) -> usize {
282        self.raw.child_count()
283    }
284
285    /// Return the i-th named child of this node, if any.
286    #[must_use]
287    pub fn named_child(&self, index: u32) -> Option<Node> {
288        // SAFETY: see `parent`.
289        self.raw.named_child(index).map(|raw| Node {
290            tree: Arc::clone(&self.tree),
291            raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
292        })
293    }
294
295    /// Number of named children of this node.
296    #[must_use]
297    pub fn named_child_count(&self) -> usize {
298        self.raw.named_child_count()
299    }
300
301    /// Look up a child by its grammar-defined field name.
302    #[must_use]
303    pub fn child_by_field_name(&self, name: &str) -> Option<Node> {
304        // SAFETY: see `parent`.
305        self.raw.child_by_field_name(name).map(|raw| Node {
306            tree: Arc::clone(&self.tree),
307            raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
308        })
309    }
310
311    /// Return the S-expression form of this node's subtree.
312    #[must_use]
313    pub fn to_sexp(&self) -> String {
314        self.raw.to_sexp()
315    }
316
317    /// Return a [`TreeCursor`] positioned at this node.
318    #[must_use]
319    pub fn walk(&self) -> TreeCursor {
320        // SAFETY: see `Tree::walk`. The cursor holds Arc<Tree>.
321        let raw: tree_sitter::TreeCursor<'static> = unsafe { std::mem::transmute(self.raw.walk()) };
322        TreeCursor {
323            tree: Arc::clone(&self.tree),
324            raw,
325        }
326    }
327}
328
329/// A cursor for traversing a [`Tree`].
330pub struct TreeCursor {
331    tree: Arc<tree_sitter::Tree>,
332    raw: tree_sitter::TreeCursor<'static>,
333}
334
335impl TreeCursor {
336    /// Return the [`Node`] at the cursor's current position.
337    #[must_use]
338    pub fn node(&self) -> Node {
339        // SAFETY: see `Tree::root_node`.
340        let raw: tree_sitter::Node<'static> = unsafe { std::mem::transmute(self.raw.node()) };
341        Node {
342            tree: Arc::clone(&self.tree),
343            raw,
344        }
345    }
346
347    /// Move the cursor to the first child of the current node.
348    /// Returns `true` if a child existed.
349    pub fn goto_first_child(&mut self) -> bool {
350        self.raw.goto_first_child()
351    }
352
353    /// Move the cursor to the parent of the current node.
354    /// Returns `true` if a parent existed.
355    pub fn goto_parent(&mut self) -> bool {
356        self.raw.goto_parent()
357    }
358
359    /// Move the cursor to the next sibling of the current node.
360    /// Returns `true` if a sibling existed.
361    pub fn goto_next_sibling(&mut self) -> bool {
362        self.raw.goto_next_sibling()
363    }
364
365    /// Return the field name for the current node, if any.
366    #[must_use]
367    pub fn field_name(&self) -> Option<String> {
368        self.raw.field_name().map(str::to_string)
369    }
370}