tree_house_bindings/
node.rs

1use std::ffi::{c_char, c_void, CStr};
2use std::fmt;
3use std::marker::PhantomData;
4use std::ops::Range;
5use std::ptr::NonNull;
6
7use crate::tree::Tree;
8use crate::tree_cursor::TreeCursor;
9use crate::Grammar;
10
11#[repr(C)]
12#[derive(Debug, Clone, Copy)]
13pub(super) struct NodeRaw {
14    context: [u32; 4],
15    id: *const c_void,
16    tree: *const c_void,
17}
18
19impl From<Node<'_>> for NodeRaw {
20    fn from(node: Node) -> NodeRaw {
21        NodeRaw {
22            context: node.context,
23            id: node.id.as_ptr(),
24            tree: node.tree.as_ptr(),
25        }
26    }
27}
28
29#[derive(Clone)]
30#[repr(C)]
31pub struct Node<'tree> {
32    context: [u32; 4],
33    id: NonNull<c_void>,
34    tree: NonNull<c_void>,
35    _phantom: PhantomData<&'tree Tree>,
36}
37
38impl fmt::Debug for Node<'_> {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        let range = self.byte_range();
41        write!(f, "{{Node {} {range:?}}}", self.kind())
42    }
43}
44
45impl<'tree> Node<'tree> {
46    #[inline]
47    pub(super) unsafe fn from_raw(raw: NodeRaw) -> Option<Self> {
48        Some(Node {
49            context: raw.context,
50            id: NonNull::new(raw.id as *mut _)?,
51            tree: unsafe { NonNull::new_unchecked(raw.tree as *mut _) },
52            _phantom: PhantomData,
53        })
54    }
55
56    #[inline]
57    pub(crate) fn as_raw(&self) -> NodeRaw {
58        NodeRaw {
59            context: self.context,
60            id: self.id.as_ptr(),
61            tree: self.tree.as_ptr(),
62        }
63    }
64
65    pub fn id(&self) -> usize {
66        self.id.as_ptr() as usize
67    }
68
69    /// Get this node's type as a string
70    #[inline]
71    pub fn kind(&self) -> &'tree str {
72        unsafe { CStr::from_ptr(ts_node_type(self.as_raw())) }
73            .to_str()
74            .unwrap()
75    }
76
77    /// Get this node's type as a numerical id.
78    #[inline]
79    pub fn kind_id(&self) -> u16 {
80        unsafe { ts_node_symbol(self.as_raw()) }
81    }
82
83    /// Get the [`Grammar`] that was used to parse this node's syntax tree.
84    #[inline]
85    pub fn grammar(&self) -> Grammar {
86        unsafe { ts_node_language(self.as_raw()) }
87    }
88
89    /// Check if this node is *named*.
90    ///
91    /// Named nodes correspond to named rules in the grammar, whereas
92    /// *anonymous* nodes correspond to string literals in the grammar.
93    #[inline]
94    pub fn is_named(&self) -> bool {
95        unsafe { ts_node_is_named(self.as_raw()) }
96    }
97
98    /// Returns true if and only if this node is contained "inside" the given
99    /// input range, i.e. either start_new > start_old and end_new <= end_old OR
100    /// start_new >= start_old and end_new < end_old
101    pub fn is_contained_within(&self, range: Range<u32>) -> bool {
102        (self.start_byte() > range.start && self.end_byte() <= range.end)
103            || (self.start_byte() >= range.start && self.end_byte() < range.end)
104    }
105
106    /// Check if this node is *missing*.
107    ///
108    /// Missing nodes are inserted by the parser in order to recover from
109    /// certain kinds of syntax errors.
110    #[inline]
111    pub fn is_missing(&self) -> bool {
112        unsafe { ts_node_is_missing(self.as_raw()) }
113    }
114
115    /// Check if this node is *extra*.
116    ///
117    /// Extra nodes represent things like comments, which are not required by the
118    /// grammar, but can appear anywhere.
119    #[inline]
120    pub fn is_extra(&self) -> bool {
121        unsafe { ts_node_is_extra(self.as_raw()) }
122    }
123
124    /// Get the byte offsets where this node starts.
125    #[inline(always)]
126    pub fn start_byte(&self) -> u32 {
127        // Normally we would implement this method like so:
128        //
129        //     extern "C" {
130        //         /// Get the node's start byte.
131        //         fn ts_node_start_byte(self_: NodeRaw) -> u32;
132        //     }
133        //     unsafe { ts_node_start_byte(self.as_raw()) }
134        //
135        // However this method has a trivial implementation which is unlikely to change (though
136        // there is no guarantee) and this method can be called often, in tight loops, on a hot
137        // code path (for example the highlighter's `next_event_offset` method). So we inline the
138        // implementation directly from `node.c` in the C library to minimize overhead:
139        self.context[0]
140    }
141
142    /// Get the byte offsets where this node end.
143    #[inline]
144    pub fn end_byte(&self) -> u32 {
145        unsafe { ts_node_end_byte(self.as_raw()) }
146    }
147
148    /// Get the byte range of source code that this node represents.
149    #[inline]
150    pub fn byte_range(&self) -> Range<u32> {
151        self.start_byte()..self.end_byte()
152    }
153
154    /// Get the node's child at the given index, where zero represents the first
155    /// child.
156    ///
157    /// This method is fairly fast, but its cost is technically log(i), so if
158    /// you might be iterating over a long list of children, you should use
159    /// [`Node::children`] instead.
160    #[inline]
161    pub fn child(&self, i: u32) -> Option<Node<'tree>> {
162        unsafe { Node::from_raw(ts_node_child(self.as_raw(), i)) }
163    }
164
165    /// Get this node's number of children.
166    #[inline]
167    pub fn child_count(&self) -> u32 {
168        unsafe { ts_node_child_count(self.as_raw()) }
169    }
170
171    /// Get this node's *named* child at the given index.
172    ///
173    /// See also [`Node::is_named`].
174    /// This method is fairly fast, but its cost is technically log(i), so if
175    /// you might be iterating over a long list of children, you should use
176    /// `Node::named_children` instead.
177    #[inline]
178    pub fn named_child(&self, i: u32) -> Option<Node<'tree>> {
179        unsafe { Node::from_raw(ts_node_named_child(self.as_raw(), i)) }
180    }
181
182    /// Get this node's number of *named* children.
183    ///
184    /// See also [`Node::is_named`].
185    #[inline]
186    pub fn named_child_count(&self) -> u32 {
187        unsafe { ts_node_named_child_count(self.as_raw()) }
188    }
189
190    #[inline]
191    unsafe fn map(&self, f: unsafe extern "C" fn(NodeRaw) -> NodeRaw) -> Option<Node<'tree>> {
192        Node::from_raw(f(self.as_raw()))
193    }
194
195    /// Get this node's immediate parent.
196    #[inline]
197    pub fn parent(&self) -> Option<Self> {
198        unsafe { self.map(ts_node_parent) }
199    }
200
201    /// Get this node's next sibling.
202    #[inline]
203    pub fn next_sibling(&self) -> Option<Self> {
204        unsafe { self.map(ts_node_next_sibling) }
205    }
206
207    /// Get this node's previous sibling.
208    #[inline]
209    pub fn prev_sibling(&self) -> Option<Self> {
210        unsafe { self.map(ts_node_prev_sibling) }
211    }
212
213    /// Get this node's next named sibling.
214    #[inline]
215    pub fn next_named_sibling(&self) -> Option<Self> {
216        unsafe { self.map(ts_node_next_named_sibling) }
217    }
218
219    /// Get this node's previous named sibling.
220    #[inline]
221    pub fn prev_named_sibling(&self) -> Option<Self> {
222        unsafe { self.map(ts_node_prev_named_sibling) }
223    }
224
225    /// Get the smallest node within this node that spans the given range.
226    #[inline]
227    pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Self> {
228        unsafe { Self::from_raw(ts_node_descendant_for_byte_range(self.as_raw(), start, end)) }
229    }
230
231    /// Get the smallest named node within this node that spans the given range.
232    #[inline]
233    pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Self> {
234        unsafe {
235            Self::from_raw(ts_node_named_descendant_for_byte_range(
236                self.as_raw(),
237                start,
238                end,
239            ))
240        }
241    }
242
243    /// Iterate over this node's children.
244    ///
245    /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain
246    /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid
247    /// unnecessary allocations, you should reuse the same cursor for
248    /// subsequent calls to this method.
249    ///
250    /// If you're walking the tree recursively, you may want to use the
251    /// [`TreeCursor`] APIs directly instead.
252    pub fn children(&self) -> impl ExactSizeIterator<Item = Node<'tree>> {
253        let mut cursor = TreeCursor::new(self);
254        cursor.goto_first_child();
255        (0..self.child_count()).map(move |_| {
256            let result = cursor.node();
257            cursor.goto_next_sibling();
258            result
259        })
260    }
261
262    pub fn walk(&self) -> TreeCursor<'tree> {
263        TreeCursor::new(self)
264    }
265}
266
267impl PartialEq for Node<'_> {
268    fn eq(&self, other: &Self) -> bool {
269        self.id == other.id
270    }
271}
272
273impl Eq for Node<'_> {}
274
275unsafe impl Send for Node<'_> {}
276unsafe impl Sync for Node<'_> {}
277
278extern "C" {
279    /// Get the node's type as a null-terminated string.
280    fn ts_node_type(node: NodeRaw) -> *const c_char;
281
282    /// Get the node's type as a numerical id.
283    fn ts_node_symbol(node: NodeRaw) -> u16;
284
285    /// Get the node's language.
286    fn ts_node_language(node: NodeRaw) -> Grammar;
287
288    /// Check if the node is *named*. Named nodes correspond to named rules in
289    /// the grammar, whereas *anonymous* nodes correspond to string literals in
290    /// the grammar
291    fn ts_node_is_named(node: NodeRaw) -> bool;
292
293    /// Check if the node is *missing*. Missing nodes are inserted by the parser
294    /// in order to recover from certain kinds of syntax errors
295    fn ts_node_is_missing(node: NodeRaw) -> bool;
296
297    /// Check if this node is *extra*.
298    ///
299    /// Extra nodes represent things like comments, which are not required by the
300    /// grammar, but can appear anywhere.
301    fn ts_node_is_extra(node: NodeRaw) -> bool;
302
303    /// Get the node's immediate parent
304    fn ts_node_parent(node: NodeRaw) -> NodeRaw;
305
306    /// Get the node's child at the given index, where zero represents the first
307    /// child
308    fn ts_node_child(node: NodeRaw, child_index: u32) -> NodeRaw;
309
310    /// Get the node's number of children
311    fn ts_node_child_count(node: NodeRaw) -> u32;
312
313    /// Get the node's *named* child at the given index. See also
314    /// [`ts_node_is_named`]
315    fn ts_node_named_child(node: NodeRaw, child_index: u32) -> NodeRaw;
316
317    /// Get the node's number of *named* children. See also [`ts_node_is_named`]
318    fn ts_node_named_child_count(node: NodeRaw) -> u32;
319
320    /// Get the node's next sibling
321    fn ts_node_next_sibling(node: NodeRaw) -> NodeRaw;
322
323    fn ts_node_prev_sibling(node: NodeRaw) -> NodeRaw;
324
325    /// Get the node's next *named* sibling
326    fn ts_node_next_named_sibling(node: NodeRaw) -> NodeRaw;
327
328    fn ts_node_prev_named_sibling(node: NodeRaw) -> NodeRaw;
329
330    /// Get the smallest node within this node that spans the given range of
331    /// bytes or (row, column) positions
332    fn ts_node_descendant_for_byte_range(node: NodeRaw, start: u32, end: u32) -> NodeRaw;
333
334    /// Get the smallest named node within this node that spans the given range
335    /// of bytes or (row, column) positions
336    fn ts_node_named_descendant_for_byte_range(node: NodeRaw, start: u32, end: u32) -> NodeRaw;
337
338    /// Get the node's end byte.
339    fn ts_node_end_byte(node: NodeRaw) -> u32;
340}