1use std::cell::Cell;
2use std::ops::ControlFlow;
3use std::os::raw::c_void;
4use std::panic::{catch_unwind, AssertUnwindSafe};
5use std::ptr::NonNull;
6use std::time::{Duration, Instant};
7use std::{fmt, mem, ptr};
8
9use regex_cursor::Cursor;
10
11use crate::grammar::IncompatibleGrammarError;
12use crate::tree::{SyntaxTreeData, Tree};
13use crate::{Grammar, Input, IntoInput, Point, Range};
14
15enum ParserData {}
17
18#[clippy::msrv = "1.76.0"]
19thread_local! {
20 static PARSER_CACHE: Cell<Option<RawParser>> = const { Cell::new(None) };
21}
22
23struct RawParser {
24 ptr: NonNull<ParserData>,
25}
26
27impl Drop for RawParser {
28 fn drop(&mut self) {
29 unsafe { ts_parser_delete(self.ptr) }
30 }
31}
32
33pub struct Parser {
36 ptr: NonNull<ParserData>,
37}
38
39impl Parser {
40 #[must_use]
42 pub fn new() -> Parser {
43 let ptr = match PARSER_CACHE.take() {
44 Some(cached) => {
45 let ptr = cached.ptr;
46 mem::forget(cached);
47 ptr
48 }
49 None => unsafe { ts_parser_new() },
50 };
51 Parser { ptr }
52 }
53
54 pub fn set_grammar(&mut self, grammar: Grammar) -> Result<(), IncompatibleGrammarError> {
56 if unsafe { ts_parser_set_language(self.ptr, grammar) } {
57 Ok(())
58 } else {
59 Err(IncompatibleGrammarError {
60 abi_version: grammar.abi_version(),
61 })
62 }
63 }
64
65 pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), InvalidRangesError> {
72 let success = unsafe {
77 ts_parser_set_included_ranges(self.ptr, ranges.as_ptr(), ranges.len() as u32)
78 };
79 if success {
80 Ok(())
81 } else {
82 Err(InvalidRangesError)
83 }
84 }
85
86 #[must_use]
87 pub fn parse<I: Input>(
88 &mut self,
89 input: impl IntoInput<Input = I>,
90 old_tree: Option<&Tree>,
91 ) -> Option<Tree> {
92 let mut input = input.into_input();
93 unsafe extern "C" fn read<C: Input>(
94 payload: NonNull<c_void>,
95 byte_index: u32,
96 _position: Point,
97 bytes_read: *mut u32,
98 ) -> *const u8 {
99 let cursor = catch_unwind(AssertUnwindSafe(move || {
100 let input: &mut C = payload.cast().as_mut();
101 let cursor = input.cursor_at(byte_index);
102 let slice = cursor.chunk();
103 let offset: u32 = cursor.offset().try_into().unwrap();
104 let len: u32 = slice.len().try_into().unwrap();
105 (byte_index - offset, slice.as_ptr(), len)
106 }));
107 match cursor {
108 Ok((chunk_offset, ptr, len)) if chunk_offset < len => {
109 *bytes_read = len - chunk_offset;
110 ptr.add(chunk_offset as usize)
111 }
112 _ => {
113 *bytes_read = 0;
114 ptr::null()
115 }
116 }
117 }
118 let raw_input = ParserInputRaw {
119 payload: NonNull::from(&mut input).cast(),
120 read: read::<I>,
121 encoding: InputEncoding::Utf8,
122 decode: None,
123 };
124
125 unsafe {
126 let old_tree = old_tree.map(|tree| tree.as_raw());
127 ts_parser_parse(self.ptr, old_tree, raw_input).map(|raw| Tree::from_raw(raw))
128 }
129 }
130
131 #[must_use]
134 pub fn parse_with_options<I: Input>(
135 &mut self,
136 input: impl IntoInput<Input = I>,
137 old_tree: Option<&Tree>,
138 mut options: ParseOptions<'_>,
139 ) -> Option<Tree> {
140 let mut input = input.into_input();
141 unsafe extern "C" fn read<C: Input>(
142 payload: NonNull<c_void>,
143 byte_index: u32,
144 _position: Point,
145 bytes_read: *mut u32,
146 ) -> *const u8 {
147 let cursor = catch_unwind(AssertUnwindSafe(move || {
148 let input: &mut C = payload.cast().as_mut();
149 let cursor = input.cursor_at(byte_index);
150 let slice = cursor.chunk();
151 let offset: u32 = cursor.offset().try_into().unwrap();
152 let len: u32 = slice.len().try_into().unwrap();
153 (byte_index - offset, slice.as_ptr(), len)
154 }));
155 match cursor {
156 Ok((chunk_offset, ptr, len)) if chunk_offset < len => {
157 *bytes_read = len - chunk_offset;
158 ptr.add(chunk_offset as usize)
159 }
160 _ => {
161 *bytes_read = 0;
162 ptr::null()
163 }
164 }
165 }
166 let raw_input = ParserInputRaw {
167 payload: NonNull::from(&mut input).cast(),
168 read: read::<I>,
169 encoding: InputEncoding::Utf8,
170 decode: None,
171 };
172
173 unsafe extern "C" fn progress_cb(raw_state: NonNull<RawParseState>) -> bool {
176 let raw_ref = raw_state.as_ref();
177 let cb: *mut &mut dyn FnMut(&ParseState) -> ControlFlow<()> =
178 raw_ref.payload.as_ptr().cast();
179 let public_state = ParseState {
180 current_byte_offset: raw_ref.current_byte_offset,
181 has_error: raw_ref.has_error,
182 };
183 (*cb)(&public_state).is_break()
184 }
185
186 let raw_options = RawParseOptions {
187 payload: unsafe {
188 Some(NonNull::new_unchecked(
189 ptr::addr_of_mut!(options.callback).cast(),
190 ))
191 },
192 progress_callback: Some(progress_cb),
193 };
194
195 unsafe {
196 let old_tree = old_tree.map(|tree| tree.as_raw());
197 ts_parser_parse_with_options(self.ptr, old_tree, raw_input, raw_options)
198 .map(|raw| Tree::from_raw(raw))
199 }
200 }
201
202 #[must_use]
204 pub fn parse_with_timeout<I: Input>(
205 &mut self,
206 input: impl IntoInput<Input = I>,
207 old_tree: Option<&Tree>,
208 timeout: Duration,
209 ) -> Option<Tree> {
210 let deadline = Instant::now() + timeout;
211 let mut check = |_: &ParseState| {
212 if Instant::now() >= deadline {
213 ControlFlow::Break(())
214 } else {
215 ControlFlow::Continue(())
216 }
217 };
218 self.parse_with_options(input, old_tree, ParseOptions::new(&mut check))
219 }
220}
221
222impl Default for Parser {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228unsafe impl Sync for Parser {}
229unsafe impl Send for Parser {}
230
231impl Drop for Parser {
232 fn drop(&mut self) {
233 PARSER_CACHE.set(Some(RawParser { ptr: self.ptr }));
234 }
235}
236
237#[derive(Debug, Clone, Copy)]
239pub struct ParseState {
240 pub current_byte_offset: u32,
241 pub has_error: bool,
242}
243
244pub struct ParseOptions<'a> {
248 callback: &'a mut dyn FnMut(&ParseState) -> ControlFlow<()>,
249}
250
251impl<'a> ParseOptions<'a> {
252 pub fn new(callback: &'a mut impl FnMut(&ParseState) -> ControlFlow<()>) -> ParseOptions<'a> {
253 ParseOptions { callback }
254 }
255}
256
257#[derive(Debug, PartialEq, Eq)]
260pub struct InvalidRangesError;
261
262impl fmt::Display for InvalidRangesError {
263 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
264 write!(f, "include ranges overlap or are not sorted",)
265 }
266}
267impl std::error::Error for InvalidRangesError {}
268
269type TreeSitterReadFn = unsafe extern "C" fn(
270 payload: NonNull<c_void>,
271 byte_index: u32,
272 position: Point,
273 bytes_read: *mut u32,
274) -> *const u8;
275
276type DecodeInputFn =
279 unsafe extern "C" fn(string: *const u8, length: u32, code_point: *const i32) -> u32;
280
281#[repr(C)]
282#[derive(Debug)]
283pub struct ParserInputRaw {
284 pub payload: NonNull<c_void>,
285 pub read: TreeSitterReadFn,
286 pub encoding: InputEncoding,
287 pub decode: Option<DecodeInputFn>,
291}
292
293#[repr(u32)]
295#[derive(Debug, Clone, Copy)]
296pub enum InputEncoding {
297 Utf8,
298 Utf16LE,
299 Utf16BE,
300 Custom,
301}
302
303#[repr(C)]
304#[derive(Debug)]
305struct RawParseState {
306 payload: NonNull<c_void>,
308 current_byte_offset: u32,
309 has_error: bool,
310}
311
312type ProgressCallback = unsafe extern "C" fn(state: NonNull<RawParseState>) -> bool;
315
316#[repr(C)]
317#[derive(Debug, Default)]
318struct RawParseOptions {
319 payload: Option<NonNull<c_void>>,
320 progress_callback: Option<ProgressCallback>,
321}
322
323extern "C" {
324 fn ts_parser_new() -> NonNull<ParserData>;
326 fn ts_parser_delete(parser: NonNull<ParserData>);
328 fn ts_parser_set_language(parser: NonNull<ParserData>, language: Grammar) -> bool;
335 fn ts_parser_set_included_ranges(
348 parser: NonNull<ParserData>,
349 ranges: *const Range,
350 count: u32,
351 ) -> bool;
352
353 fn ts_parser_parse(
354 parser: NonNull<ParserData>,
355 old_tree: Option<NonNull<SyntaxTreeData>>,
356 input: ParserInputRaw,
357 ) -> Option<NonNull<SyntaxTreeData>>;
358
359 fn ts_parser_parse_with_options(
365 parser: NonNull<ParserData>,
366 old_tree: Option<NonNull<SyntaxTreeData>>,
367 input: ParserInputRaw,
368 parse_options: RawParseOptions,
369 ) -> Option<NonNull<SyntaxTreeData>>;
370}