tree_house_bindings/
node.rs

1use std::ffi::{c_char, c_void, CStr};
2use std::fmt;
3use std::marker::PhantomData;
4use std::ops::Range;
5use std::ptr::NonNull;
6
7use crate::tree::Tree;
8use crate::tree_cursor::TreeCursor;
9use crate::Grammar;
10
11#[repr(C)]
12#[derive(Debug, Clone, Copy)]
13pub(super) struct NodeRaw {
14    context: [u32; 4],
15    id: *const c_void,
16    tree: *const c_void,
17}
18
19impl From<Node<'_>> for NodeRaw {
20    fn from(node: Node) -> NodeRaw {
21        NodeRaw {
22            context: node.context,
23            id: node.id.as_ptr(),
24            tree: node.tree.as_ptr(),
25        }
26    }
27}
28
29#[derive(Clone)]
30#[repr(C)]
31pub struct Node<'tree> {
32    context: [u32; 4],
33    id: NonNull<c_void>,
34    tree: NonNull<c_void>,
35    _phantom: PhantomData<&'tree Tree>,
36}
37
38impl fmt::Debug for Node<'_> {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        let range = self.byte_range();
41        write!(f, "{{Node {} {range:?}}}", self.kind())
42    }
43}
44
45impl<'tree> Node<'tree> {
46    #[inline]
47    pub(super) unsafe fn from_raw(raw: NodeRaw) -> Option<Self> {
48        Some(Node {
49            context: raw.context,
50            id: NonNull::new(raw.id as *mut _)?,
51            tree: unsafe { NonNull::new_unchecked(raw.tree as *mut _) },
52            _phantom: PhantomData,
53        })
54    }
55
56    #[inline]
57    pub(crate) fn as_raw(&self) -> NodeRaw {
58        NodeRaw {
59            context: self.context,
60            id: self.id.as_ptr(),
61            tree: self.tree.as_ptr(),
62        }
63    }
64
65    pub fn id(&self) -> usize {
66        self.id.as_ptr() as usize
67    }
68
69    /// Get this node's type as a string
70    #[inline]
71    pub fn kind(&self) -> &'tree str {
72        unsafe { CStr::from_ptr(ts_node_type(self.as_raw())) }
73            .to_str()
74            .unwrap()
75    }
76
77    /// Get this node's type as a numerical id.
78    #[inline]
79    pub fn kind_id(&self) -> u16 {
80        unsafe { ts_node_symbol(self.as_raw()) }
81    }
82
83    /// Get the [`Grammar`] that was used to parse this node's syntax tree.
84    #[inline]
85    pub fn grammar(&self) -> Grammar {
86        unsafe { ts_node_language(self.as_raw()) }
87    }
88
89    /// Check if this node is *named*.
90    ///
91    /// Named nodes correspond to named rules in the grammar, whereas
92    /// *anonymous* nodes correspond to string literals in the grammar.
93    #[inline]
94    pub fn is_named(&self) -> bool {
95        unsafe { ts_node_is_named(self.as_raw()) }
96    }
97
98    /// Returns true if and only if this node is contained "inside" the given
99    /// input range, i.e. either start_new > start_old and end_new <= end_old OR
100    /// start_new >= start_old and end_new < end_old
101    pub fn is_contained_within(&self, range: Range<u32>) -> bool {
102        (self.start_byte() > range.start && self.end_byte() <= range.end)
103            || (self.start_byte() >= range.start && self.end_byte() < range.end)
104    }
105
106    /// Check if this node is *missing*.
107    ///
108    /// Missing nodes are inserted by the parser in order to recover from
109    /// certain kinds of syntax errors.
110    #[inline]
111    pub fn is_missing(&self) -> bool {
112        unsafe { ts_node_is_missing(self.as_raw()) }
113    }
114
115    /// Get the byte offsets where this node starts.
116    #[inline(always)]
117    pub fn start_byte(&self) -> u32 {
118        // Normally we would implement this method like so:
119        //
120        //     extern "C" {
121        //         /// Get the node's start byte.
122        //         fn ts_node_start_byte(self_: NodeRaw) -> u32;
123        //     }
124        //     unsafe { ts_node_start_byte(self.as_raw()) }
125        //
126        // However this method has a trivial implementation which is unlikely to change (though
127        // there is no guarantee) and this method can be called often, in tight loops, on a hot
128        // code path (for example the highlighter's `next_event_offset` method). So we inline the
129        // implementation directly from `node.c` in the C library to minimize overhead:
130        self.context[0]
131    }
132
133    /// Get the byte offsets where this node end.
134    #[inline]
135    pub fn end_byte(&self) -> u32 {
136        unsafe { ts_node_end_byte(self.as_raw()) }
137    }
138
139    /// Get the byte range of source code that this node represents.
140    #[inline]
141    pub fn byte_range(&self) -> Range<u32> {
142        self.start_byte()..self.end_byte()
143    }
144
145    /// Get the node's child at the given index, where zero represents the first
146    /// child.
147    ///
148    /// This method is fairly fast, but its cost is technically log(i), so if
149    /// you might be iterating over a long list of children, you should use
150    /// [`Node::children`] instead.
151    #[inline]
152    pub fn child(&self, i: u32) -> Option<Node<'tree>> {
153        unsafe { Node::from_raw(ts_node_child(self.as_raw(), i)) }
154    }
155
156    /// Get this node's number of children.
157    #[inline]
158    pub fn child_count(&self) -> u32 {
159        unsafe { ts_node_child_count(self.as_raw()) }
160    }
161
162    /// Get this node's *named* child at the given index.
163    ///
164    /// See also [`Node::is_named`].
165    /// This method is fairly fast, but its cost is technically log(i), so if
166    /// you might be iterating over a long list of children, you should use
167    /// `Node::named_children` instead.
168    #[inline]
169    pub fn named_child(&self, i: u32) -> Option<Node<'tree>> {
170        unsafe { Node::from_raw(ts_node_named_child(self.as_raw(), i)) }
171    }
172
173    /// Get this node's number of *named* children.
174    ///
175    /// See also [`Node::is_named`].
176    #[inline]
177    pub fn named_child_count(&self) -> u32 {
178        unsafe { ts_node_named_child_count(self.as_raw()) }
179    }
180
181    #[inline]
182    unsafe fn map(&self, f: unsafe extern "C" fn(NodeRaw) -> NodeRaw) -> Option<Node<'tree>> {
183        Node::from_raw(f(self.as_raw()))
184    }
185
186    /// Get this node's immediate parent.
187    #[inline]
188    pub fn parent(&self) -> Option<Self> {
189        unsafe { self.map(ts_node_parent) }
190    }
191
192    /// Get this node's next sibling.
193    #[inline]
194    pub fn next_sibling(&self) -> Option<Self> {
195        unsafe { self.map(ts_node_next_sibling) }
196    }
197
198    /// Get this node's previous sibling.
199    #[inline]
200    pub fn prev_sibling(&self) -> Option<Self> {
201        unsafe { self.map(ts_node_prev_sibling) }
202    }
203
204    /// Get this node's next named sibling.
205    #[inline]
206    pub fn next_named_sibling(&self) -> Option<Self> {
207        unsafe { self.map(ts_node_next_named_sibling) }
208    }
209
210    /// Get this node's previous named sibling.
211    #[inline]
212    pub fn prev_named_sibling(&self) -> Option<Self> {
213        unsafe { self.map(ts_node_prev_named_sibling) }
214    }
215
216    /// Get the smallest node within this node that spans the given range.
217    #[inline]
218    pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Self> {
219        unsafe { Self::from_raw(ts_node_descendant_for_byte_range(self.as_raw(), start, end)) }
220    }
221
222    /// Get the smallest named node within this node that spans the given range.
223    #[inline]
224    pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option<Self> {
225        unsafe {
226            Self::from_raw(ts_node_named_descendant_for_byte_range(
227                self.as_raw(),
228                start,
229                end,
230            ))
231        }
232    }
233
234    /// Iterate over this node's children.
235    ///
236    /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain
237    /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid
238    /// unnecessary allocations, you should reuse the same cursor for
239    /// subsequent calls to this method.
240    ///
241    /// If you're walking the tree recursively, you may want to use the
242    /// [`TreeCursor`] APIs directly instead.
243    pub fn children(&self) -> impl ExactSizeIterator<Item = Node<'tree>> {
244        let mut cursor = TreeCursor::new(self);
245        cursor.goto_first_child();
246        (0..self.child_count()).map(move |_| {
247            let result = cursor.node();
248            cursor.goto_next_sibling();
249            result
250        })
251    }
252
253    pub fn walk(&self) -> TreeCursor<'tree> {
254        TreeCursor::new(self)
255    }
256}
257
258impl PartialEq for Node<'_> {
259    fn eq(&self, other: &Self) -> bool {
260        self.id == other.id
261    }
262}
263
264impl Eq for Node<'_> {}
265
266unsafe impl Send for Node<'_> {}
267unsafe impl Sync for Node<'_> {}
268
269extern "C" {
270    /// Get the node's type as a null-terminated string.
271    fn ts_node_type(node: NodeRaw) -> *const c_char;
272
273    /// Get the node's type as a numerical id.
274    fn ts_node_symbol(node: NodeRaw) -> u16;
275
276    /// Get the node's language.
277    fn ts_node_language(node: NodeRaw) -> Grammar;
278
279    /// Check if the node is *named*. Named nodes correspond to named rules in
280    /// the grammar, whereas *anonymous* nodes correspond to string literals in
281    /// the grammar
282    fn ts_node_is_named(node: NodeRaw) -> bool;
283
284    /// Check if the node is *missing*. Missing nodes are inserted by the parser
285    /// in order to recover from certain kinds of syntax errors
286    fn ts_node_is_missing(node: NodeRaw) -> bool;
287
288    /// Get the node's immediate parent
289    fn ts_node_parent(node: NodeRaw) -> NodeRaw;
290
291    /// Get the node's child at the given index, where zero represents the first
292    /// child
293    fn ts_node_child(node: NodeRaw, child_index: u32) -> NodeRaw;
294
295    /// Get the node's number of children
296    fn ts_node_child_count(node: NodeRaw) -> u32;
297
298    /// Get the node's *named* child at the given index. See also
299    /// [`ts_node_is_named`]
300    fn ts_node_named_child(node: NodeRaw, child_index: u32) -> NodeRaw;
301
302    /// Get the node's number of *named* children. See also [`ts_node_is_named`]
303    fn ts_node_named_child_count(node: NodeRaw) -> u32;
304
305    /// Get the node's next sibling
306    fn ts_node_next_sibling(node: NodeRaw) -> NodeRaw;
307
308    fn ts_node_prev_sibling(node: NodeRaw) -> NodeRaw;
309
310    /// Get the node's next *named* sibling
311    fn ts_node_next_named_sibling(node: NodeRaw) -> NodeRaw;
312
313    fn ts_node_prev_named_sibling(node: NodeRaw) -> NodeRaw;
314
315    /// Get the smallest node within this node that spans the given range of
316    /// bytes or (row, column) positions
317    fn ts_node_descendant_for_byte_range(node: NodeRaw, start: u32, end: u32) -> NodeRaw;
318
319    /// Get the smallest named node within this node that spans the given range
320    /// of bytes or (row, column) positions
321    fn ts_node_named_descendant_for_byte_range(node: NodeRaw, start: u32, end: u32) -> NodeRaw;
322
323    /// Get the node's end byte.
324    fn ts_node_end_byte(node: NodeRaw) -> u32;
325}