tree_sitter_language_pack/parsing.rs
1//! Universal parser/tree/node surface over `tree_sitter`.
2//!
3//! These wrappers own the relationship between a [`Tree`] and the [`Node`]s
4//! and [`TreeCursor`]s derived from it. Upstream `tree_sitter::Node<'a>` and
5//! `tree_sitter::TreeCursor<'a>` borrow from `tree_sitter::Tree`; that
6//! borrow can't be propagated across FFI boundaries, so each [`Node`] and
7//! [`TreeCursor`] here owns an `Arc<tree_sitter::Tree>` and stores the raw
8//! upstream value with an extended `'static` lifetime. The `Arc<>` keeps
9//! the underlying tree alive for as long as any derived node or cursor
10//! exists.
11//!
12//! Cloning a [`Tree`] (and therefore any [`Node`] or [`TreeCursor`]) is
13//! cheap — one atomic refcount bump.
14
15use std::sync::Arc;
16
17use crate::error::Error;
18
19/// A source position — row + column, zero-indexed.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22pub struct Point {
23 /// Zero-indexed row number.
24 pub row: usize,
25 /// Zero-indexed column number, in UTF-16 code units.
26 pub column: usize,
27}
28
29impl Point {
30 /// Construct a [`Point`] from row and column.
31 #[must_use]
32 pub fn new(row: usize, column: usize) -> Self {
33 Self { row, column }
34 }
35}
36
37impl From<tree_sitter::Point> for Point {
38 fn from(p: tree_sitter::Point) -> Self {
39 Self {
40 row: p.row,
41 column: p.column,
42 }
43 }
44}
45
46/// A byte range — start (inclusive) to end (exclusive).
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct ByteRange {
50 /// Inclusive start byte offset.
51 pub start: usize,
52 /// Exclusive end byte offset.
53 pub end: usize,
54}
55
56/// A tree-sitter parser configured for one language at a time.
57///
58/// # Example
59///
60/// ```no_run
61/// use tree_sitter_language_pack::Parser;
62///
63/// let mut parser = Parser::new();
64/// parser.set_language("python")?;
65/// let tree = parser.parse("def hello(): pass").expect("parse failed");
66/// assert_eq!(tree.root_node().kind(), "module");
67/// # Ok::<(), tree_sitter_language_pack::Error>(())
68/// ```
69pub struct Parser {
70 inner: tree_sitter::Parser,
71}
72
73impl Parser {
74 /// Construct a new parser with no language set.
75 ///
76 /// Call [`Parser::set_language`] before parsing.
77 #[must_use]
78 pub fn new() -> Self {
79 Self {
80 inner: tree_sitter::Parser::new(),
81 }
82 }
83
84 /// Configure the parser to use the language identified by name (e.g. `"python"`).
85 ///
86 /// Resolves the language through the global registry — auto-downloading
87 /// if necessary, when the `download` feature is enabled.
88 ///
89 /// # Errors
90 ///
91 /// Returns [`Error::LanguageNotFound`] if the language is not recognized,
92 /// or [`Error::ParserSetup`] if the language ABI is incompatible.
93 pub fn set_language(&mut self, name: &str) -> Result<(), Error> {
94 let language = crate::get_language(name)?;
95 self.inner
96 .set_language(&language)
97 .map_err(|e| Error::ParserSetup(format!("{e}")))
98 }
99
100 /// Parse a UTF-8 source string. Returns `None` if parsing was cancelled
101 /// or no language is set.
102 #[must_use]
103 pub fn parse(&mut self, source: &str) -> Option<Tree> {
104 self.inner.parse(source, None).map(|t| Tree(Arc::new(t)))
105 }
106
107 /// Parse a raw byte slice. Returns `None` if parsing was cancelled or
108 /// no language is set.
109 #[must_use]
110 pub fn parse_bytes(&mut self, source: &[u8]) -> Option<Tree> {
111 self.inner.parse(source, None).map(|t| Tree(Arc::new(t)))
112 }
113
114 /// Reset internal state. The next call to [`parse`](Self::parse) will
115 /// not be incremental.
116 pub fn reset(&mut self) {
117 self.inner.reset();
118 }
119}
120
121impl Default for Parser {
122 fn default() -> Self {
123 Self::new()
124 }
125}
126
127/// A parsed syntax tree. Cheap to clone (refcount bump).
128#[derive(Clone)]
129pub struct Tree(Arc<tree_sitter::Tree>);
130
131impl Tree {
132 /// Return the root [`Node`] of this tree.
133 #[must_use]
134 pub fn root_node(&self) -> Node {
135 // SAFETY: Node holds an `Arc<tree_sitter::Tree>` that keeps the
136 // upstream tree alive for the entire lifetime of `raw`. Extending
137 // the borrow to 'static is valid because the Arc owns the
138 // backing storage; no aliasing rules are violated because every
139 // Node clone increments the Arc refcount.
140 let raw: tree_sitter::Node<'static> = unsafe { std::mem::transmute(self.0.root_node()) };
141 Node {
142 tree: Arc::clone(&self.0),
143 raw,
144 }
145 }
146
147 /// Return a [`TreeCursor`] positioned at the root.
148 #[must_use]
149 pub fn walk(&self) -> TreeCursor {
150 // SAFETY: same justification as `root_node` — TreeCursor owns the
151 // Arc<Tree> that keeps the backing tree alive.
152 let raw: tree_sitter::TreeCursor<'static> = unsafe { std::mem::transmute(self.0.walk()) };
153 TreeCursor {
154 tree: Arc::clone(&self.0),
155 raw,
156 }
157 }
158}
159
160/// A single syntax node within a [`Tree`].
161///
162/// Nodes hold a strong reference to their parent tree so they remain valid
163/// regardless of how the tree is moved or stored at the FFI boundary.
164pub struct Node {
165 tree: Arc<tree_sitter::Tree>,
166 raw: tree_sitter::Node<'static>,
167}
168
169impl Clone for Node {
170 fn clone(&self) -> Self {
171 Self {
172 tree: Arc::clone(&self.tree),
173 raw: self.raw,
174 }
175 }
176}
177
178impl Node {
179 /// Return the node's kind name (e.g. `"function_definition"`).
180 #[must_use]
181 pub fn kind(&self) -> String {
182 self.raw.kind().to_string()
183 }
184
185 /// Return the node's numeric kind ID.
186 #[must_use]
187 pub fn kind_id(&self) -> u16 {
188 self.raw.kind_id()
189 }
190
191 /// Return the inclusive start byte offset of this node.
192 #[must_use]
193 pub fn start_byte(&self) -> usize {
194 self.raw.start_byte()
195 }
196
197 /// Return the exclusive end byte offset of this node.
198 #[must_use]
199 pub fn end_byte(&self) -> usize {
200 self.raw.end_byte()
201 }
202
203 /// Return the node's byte range as a [`ByteRange`].
204 ///
205 /// Callers should slice their own source bytes — this is a zero-copy
206 /// text accessor.
207 #[must_use]
208 pub fn byte_range(&self) -> ByteRange {
209 let r = self.raw.byte_range();
210 ByteRange {
211 start: r.start,
212 end: r.end,
213 }
214 }
215
216 /// Return the start [`Point`] (row, column).
217 #[must_use]
218 pub fn start_position(&self) -> Point {
219 self.raw.start_position().into()
220 }
221
222 /// Return the end [`Point`] (row, column).
223 #[must_use]
224 pub fn end_position(&self) -> Point {
225 self.raw.end_position().into()
226 }
227
228 /// True when this node is named (not punctuation/whitespace).
229 #[must_use]
230 pub fn is_named(&self) -> bool {
231 self.raw.is_named()
232 }
233
234 /// True when this is an error node.
235 #[must_use]
236 pub fn is_error(&self) -> bool {
237 self.raw.is_error()
238 }
239
240 /// True when this is a missing-token node.
241 #[must_use]
242 pub fn is_missing(&self) -> bool {
243 self.raw.is_missing()
244 }
245
246 /// True when this is an "extra" node (e.g. a comment).
247 #[must_use]
248 pub fn is_extra(&self) -> bool {
249 self.raw.is_extra()
250 }
251
252 /// True when this node or any descendant is an error.
253 #[must_use]
254 pub fn has_error(&self) -> bool {
255 self.raw.has_error()
256 }
257
258 /// Return this node's parent, if any.
259 #[must_use]
260 pub fn parent(&self) -> Option<Node> {
261 // SAFETY: the returned Node holds Arc<Tree>, keeping the parent
262 // tree alive while the lifetime-extended raw is used.
263 self.raw.parent().map(|raw| Node {
264 tree: Arc::clone(&self.tree),
265 raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
266 })
267 }
268
269 /// Return the i-th child of this node, if any.
270 #[must_use]
271 pub fn child(&self, index: u32) -> Option<Node> {
272 // SAFETY: see `parent`.
273 self.raw.child(index).map(|raw| Node {
274 tree: Arc::clone(&self.tree),
275 raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
276 })
277 }
278
279 /// Total number of children (including unnamed).
280 #[must_use]
281 pub fn child_count(&self) -> usize {
282 self.raw.child_count()
283 }
284
285 /// Return the i-th named child of this node, if any.
286 #[must_use]
287 pub fn named_child(&self, index: u32) -> Option<Node> {
288 // SAFETY: see `parent`.
289 self.raw.named_child(index).map(|raw| Node {
290 tree: Arc::clone(&self.tree),
291 raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
292 })
293 }
294
295 /// Number of named children of this node.
296 #[must_use]
297 pub fn named_child_count(&self) -> usize {
298 self.raw.named_child_count()
299 }
300
301 /// Look up a child by its grammar-defined field name.
302 #[must_use]
303 pub fn child_by_field_name(&self, name: &str) -> Option<Node> {
304 // SAFETY: see `parent`.
305 self.raw.child_by_field_name(name).map(|raw| Node {
306 tree: Arc::clone(&self.tree),
307 raw: unsafe { std::mem::transmute::<tree_sitter::Node<'_>, tree_sitter::Node<'static>>(raw) },
308 })
309 }
310
311 /// Return the S-expression form of this node's subtree.
312 #[must_use]
313 pub fn to_sexp(&self) -> String {
314 self.raw.to_sexp()
315 }
316
317 /// Return a [`TreeCursor`] positioned at this node.
318 #[must_use]
319 pub fn walk(&self) -> TreeCursor {
320 // SAFETY: see `Tree::walk`. The cursor holds Arc<Tree>.
321 let raw: tree_sitter::TreeCursor<'static> = unsafe { std::mem::transmute(self.raw.walk()) };
322 TreeCursor {
323 tree: Arc::clone(&self.tree),
324 raw,
325 }
326 }
327}
328
329/// A cursor for traversing a [`Tree`].
330pub struct TreeCursor {
331 tree: Arc<tree_sitter::Tree>,
332 raw: tree_sitter::TreeCursor<'static>,
333}
334
335impl TreeCursor {
336 /// Return the [`Node`] at the cursor's current position.
337 #[must_use]
338 pub fn node(&self) -> Node {
339 // SAFETY: see `Tree::root_node`.
340 let raw: tree_sitter::Node<'static> = unsafe { std::mem::transmute(self.raw.node()) };
341 Node {
342 tree: Arc::clone(&self.tree),
343 raw,
344 }
345 }
346
347 /// Move the cursor to the first child of the current node.
348 /// Returns `true` if a child existed.
349 pub fn goto_first_child(&mut self) -> bool {
350 self.raw.goto_first_child()
351 }
352
353 /// Move the cursor to the parent of the current node.
354 /// Returns `true` if a parent existed.
355 pub fn goto_parent(&mut self) -> bool {
356 self.raw.goto_parent()
357 }
358
359 /// Move the cursor to the next sibling of the current node.
360 /// Returns `true` if a sibling existed.
361 pub fn goto_next_sibling(&mut self) -> bool {
362 self.raw.goto_next_sibling()
363 }
364
365 /// Return the field name for the current node, if any.
366 #[must_use]
367 pub fn field_name(&self) -> Option<String> {
368 self.raw.field_name().map(str::to_string)
369 }
370}