Skip to main content

rustidy_parse/
lib.rs

1//! Parsing
2
3// Features
4#![feature(
5	never_type,
6	try_trait_v2,
7	try_trait_v2_residual,
8	pattern,
9	coverage_attribute,
10	decl_macro,
11	macro_metavar_expr_concat,
12	unwrap_infallible,
13	substr_range
14)]
15
16// Lints
17#![cfg_attr(
18	not(feature = "flamegraph-traces"),
19	expect(unused_crate_dependencies, reason = "It's only used with that feature flag")
20)]
21
22// Modules
23mod error;
24mod recursive;
25mod tag;
26mod whitespace;
27
28// Exports
29pub use {
30	self::{
31		error::{ParseError, ParserError},
32		recursive::{
33			FromRecursiveRoot,
34			IntoRecursiveRoot,
35			ParsableRecursive,
36			ParseRecursive,
37			RecursiveWrapper,
38			TryFromRecursiveRoot,
39		},
40		tag::ParserTag,
41	},
42	rustidy_macros::Parse,
43};
44
45// Imports
46use {
47	app_error::AppError,
48	arcstr::ArcStr,
49	core::{marker::PhantomData, ops::{Residual, Try}},
50	rustidy_util::{ArenaData, ArenaIdx, AstPos, AstRange, AstStr},
51	std::fmt,
52};
53#[cfg(feature = "flamegraph-traces")]
54use {
55	app_error::Context,
56	flate2::write::GzEncoder,
57	std::{env, fs, io::BufWriter, io::Write},
58};
59
60/// Parsable types
61pub trait Parse: Sized {
62	/// Error type
63	type Error: ParseError;
64
65	/// A name for this type.
66	///
67	/// This is mostly used in error reporting and should be a lower case name,
68	/// with `a` / `an` prefixed. Used for example in the following way:
69	/// `expected {name}, found {other_name}`.
70	///
71	/// If this returns `None`, no extra error is displayed when parsing the type.
72	/// This is useful for containers that usually don't want to expose themselves
73	#[must_use]
74	#[coverage(off)]
75	fn name() -> Option<impl fmt::Display> {
76		None::<!>
77	}
78
79	/// Parses this type from `input`, mutating it in-place.
80	fn parse_from(parser: &mut Parser) -> Result<Self, Self::Error>;
81}
82
83#[derive(Debug)]
84pub struct NeverError;
85
86impl ParseError for NeverError {
87	fn is_fatal(&self) -> bool {
88		false
89	}
90
91	fn pos(&self) -> Option<AstPos> {
92		None
93	}
94
95	fn to_app_error(&self, _parser: &Parser) -> AppError {
96		AppError::from_multiple([])
97	}
98}
99
100impl Parse for ! {
101	type Error = NeverError;
102
103	fn parse_from(_parser: &mut Parser) -> Result<Self, Self::Error> {
104		Err(NeverError)
105	}
106}
107
108impl<T> Parse for PhantomData<T> {
109	type Error = !;
110
111	fn parse_from(_parser: &mut Parser) -> Result<Self, Self::Error> {
112		Ok(Self)
113	}
114}
115
116impl<T> Parse for Box<T>
117where
118	T: Parse, {
119	type Error = T::Error;
120
121	fn name() -> Option<impl fmt::Display> {
122		T::name()
123	}
124
125	fn parse_from(parser: &mut Parser) -> Result<Self, Self::Error> {
126		T::parse_from(parser).map(Self::new)
127	}
128}
129
130impl<T> Parse for Option<T>
131where
132	T: Parse, {
133	type Error = T::Error;
134
135	fn name() -> Option<impl fmt::Display> {
136		T::name()
137	}
138
139	fn parse_from(parser: &mut Parser) -> Result<Self, Self::Error> {
140		parser
141			.try_parse_with(T::parse_from)
142			.map(Result::ok)
143	}
144}
145
146impl<T> Parse for Vec<T>
147where
148	T: Parse, {
149	type Error = T::Error;
150
151	fn name() -> Option<impl fmt::Display> {
152		T::name()
153	}
154
155	fn parse_from(parser: &mut Parser) -> Result<Self, Self::Error> {
156		let mut values = vec![];
157		loop {
158			let start_pos = parser.cur_pos;
159			match parser.try_parse_with(T::parse_from)? {
160				Ok(value) if parser.cur_pos != start_pos => values.push(value),
161				_ => break,
162			}
163		}
164
165		Ok(values)
166	}
167}
168
169impl Parse for () {
170	type Error = !;
171
172	fn parse_from(_parser: &mut Parser) -> Result<Self, Self::Error> {
173		Ok(())
174	}
175}
176
177macro tuple_impl(
178	$N:literal, $($T:ident),* $(,)?
179) {
180	#[automatically_derived]
181	impl< $($T: Parse,)* > Parse for ( $($T,)* ) {
182		type Error = ${concat( Tuple, $N, Error )}< $($T,)* >;
183
184		#[expect(non_snake_case)]
185		fn parse_from(parser: &mut Parser) -> Result<Self, Self::Error> {
186			$(
187				let $T = parser.parse().map_err(Self::Error::$T)?;
188			)*
189			Ok(( $( $T, )* ))
190		}
191	}
192
193	#[derive(derive_more::Debug)]
194	pub enum ${concat( Tuple, $N, Error )}< $($T: Parse,)* > {
195		$(
196			$T(ParserError<$T>),
197		)*
198	}
199
200	#[automatically_derived]
201	impl< $($T: Parse,)* > ParseError for ${concat( Tuple, $N, Error )}< $($T,)* > {
202		fn is_fatal(&self) -> bool {
203			match *self {
204				$(
205					Self::$T(ref err, ..) => err.is_fatal(),
206				)*
207			}
208		}
209
210		fn pos(&self) -> Option<AstPos> {
211			match *self {
212				$(
213					Self::$T(ref err, ..) => err.pos(),
214				)*
215			}
216		}
217
218		fn to_app_error(&self, parser: &Parser) -> AppError {
219			match *self {
220				$(
221					Self::$T(ref err, ..) => err.to_app_error(parser),
222				)*
223			}
224		}
225	}
226}
227
228tuple_impl! { 1, T0 }
229tuple_impl! { 2, T0, T1 }
230tuple_impl! { 3, T0, T1, T2 }
231tuple_impl! { 4, T0, T1, T2, T3 }
232
233impl<T: ArenaData + Parse> Parse for ArenaIdx<T> {
234	type Error = T::Error;
235
236	fn name() -> Option<impl fmt::Display> {
237		T::name()
238	}
239
240	fn parse_from(parser: &mut Parser) -> Result<Self, Self::Error> {
241		let value = T::parse_from(parser)?;
242		let idx = Self::new(value);
243		Ok(idx)
244	}
245}
246
247/// Parser
248#[derive(Debug)]
249pub struct Parser {
250	/// Input
251	input:           ArcStr,
252
253	/// Current position
254	cur_pos:         AstPos,
255
256	/// Tags
257	// Note: Always sorted by ast position.
258	tags:            Vec<(AstPos, ParserTag)>,
259
260	/// Tags offset
261	tags_offset:     usize,
262
263	#[cfg(feature = "flamegraph-traces")]
264	stack:           Vec<&'static str>,
265
266	#[cfg(feature = "flamegraph-traces")]
267	trace_max_depth: usize,
268
269	#[cfg(feature = "flamegraph-traces")]
270	trace_file:      BufWriter<GzEncoder<fs::File>>,
271}
272
273impl Parser {
274	/// Creates a new parser
275	#[must_use]
276	pub fn new(input: impl Into<ArcStr>) -> Self {
277		Self {
278			input: input.into(),
279			cur_pos: AstPos::from_usize(0),
280			tags: vec![],
281			tags_offset: 0,
282			#[cfg(feature = "flamegraph-traces")]
283			stack: {
284				let mut stack = Vec::with_capacity(128);
285				stack.push("parse");
286				stack
287			},
288			#[cfg(feature = "flamegraph-traces")]
289			trace_max_depth: {
290				let var = "RUSTIDY_FLAMEGRAPH_TRACE_MAX_DEPTH";
291				let default = 50;
292
293				match self::get_flamegraph_trace_max_depth(var, default) {
294					Ok(max_depth) => max_depth,
295					Err(err) => {
296						tracing::warn!("Unable to parse {var:?}: {err:?}");
297						default
298					},
299				}
300			},
301			#[cfg(feature = "flamegraph-traces")]
302			trace_file: {
303				let var = "RUSTIDY_FLAMEGRAPH_TRACE_FILE";
304				let default = "output.gz";
305
306				self::open_flamegraph_trace_file(var, default).unwrap_or_else(
307					|err| panic!("Unable to create {var:?}: {err:?}")
308				)
309			},
310		}
311	}
312
313	/// Returns the whole input of the parser
314	#[must_use]
315	pub const fn input(&self) -> &ArcStr {
316		&self.input
317	}
318
319	/// Returns the remaining string for the parser
320	#[must_use]
321	pub fn remaining(&self) -> &str {
322		&self.input[self.cur_pos.0..]
323	}
324
325	/// Returns the current position of the parser
326	pub const fn cur_pos(&mut self) -> AstPos {
327		self.cur_pos
328	}
329
330	/// Sets the position of this parser
331	pub const fn set_pos(&mut self, pos: AstPos) {
332		self.cur_pos = pos;
333	}
334
335	/// Reverses all whitespace (except the last) in the current position
336	pub fn reverse_whitespace(&mut self) {
337		self.cur_pos.0 = self
338			.input[..self
339			.cur_pos.0]
340			.rfind(|ch: char| !ch.is_whitespace())
341			.map_or(0, |idx| idx + 1);
342	}
343
344	/// Reverses to the start of the current line
345	pub fn reverse_line(&mut self) {
346		self.cur_pos.0 = self
347			.input[..self
348			.cur_pos.0]
349			.rfind('\n')
350			.map_or(0, |idx| idx + 1);
351	}
352
353	/// Returns the current line of the parser, not including the end
354	#[must_use]
355	pub fn cur_line(&self) -> &str {
356		let start = self
357			.input[..self
358			.cur_pos.0]
359			.rfind('\n')
360			.map_or(0, |idx| idx + 1);
361		let end = self.cur_pos.0 + self
362			.input[self
363			.cur_pos.0..]
364			.find('\n')
365			.unwrap_or(self.input.len() - self.cur_pos.0);
366
367		&self.input[start..end]
368	}
369
370	/// Gets the position (0-indexed) of the parser at a position
371	#[must_use]
372	pub fn loc(&self, pos: AstPos) -> ParserLoc {
373		let line = self
374			.input[..pos.0]
375			.chars()
376			.filter(|&ch| ch == '\n').count();
377		let column = match self.input[..pos.0].rfind('\n') {
378			Some(newline_pos) => pos.0 - newline_pos - 1,
379			None => pos.0,
380		};
381
382		ParserLoc { line, column }
383	}
384
385	/// Gets the current position (0-indexed) of the parser
386	#[must_use]
387	pub fn cur_loc(&self) -> ParserLoc {
388		self.loc(self.cur_pos)
389	}
390
391	/// Returns if the parser is finished
392	#[must_use]
393	pub fn is_finished(&self) -> bool {
394		self.remaining().is_empty()
395	}
396
397	/// Updates this parser from a string.
398	///
399	/// See [`Self::try_update_with`] for more details.
400	pub fn update_with<F, O>(&mut self, f: F) -> (AstStr, O)
401	where
402		F: FnOnce(&mut &str) -> O,
403	{
404		self
405			.try_update_with(|remaining| Ok::<_, !>(f(remaining)))
406			.into_ok()
407	}
408
409	/// Updates this parser from a string.
410	///
411	/// The function `f` receives a string to update.
412	/// The value it is updated with *must* be a substring of the
413	/// received function.
414	///
415	/// # Success
416	/// When `f` returns successfully, the parser is updated from
417	/// the state of the string.
418	///
419	/// # Failure
420	/// If `f` returns unsuccessfully, an error will be returned
421	/// with the latest change to the string as it's position.
422	pub fn try_update_with<F, T>(&mut self, f: F) -> <T::Residual as Residual<(AstStr, T::Output)>>::TryType
423	where
424		F: FnOnce(&mut &str) -> T,
425		T: Try<Residual: Residual<(AstStr, T::Output)>>,
426	{
427		let mut remaining = self.remaining();
428		let res = f(&mut remaining);
429
430		let remaining_range = self
431			.remaining()
432			.substr_range(remaining)
433			.expect("Result was not a substring of the input");
434		assert_eq!(self.cur_pos.0 + remaining_range.end, self.input.len(), "Updated string truncated input");
435
436		let output_range = self.cur_pos.0..self.cur_pos.0 + remaining_range.start;
437		self.cur_pos.0 += remaining_range.start;
438
439		// After updating the remaining, quit if an error occurred
440		let value = res?;
441
442		<_>::from_output(
443			(AstStr::from_input(self.input.substr(output_range)), value)
444		)
445	}
446
447	/// Parses `T` from this parser
448	pub fn parse<T: Parse>(&mut self) -> Result<T, ParserError<T>> {
449		#[cfg(feature = "flamegraph-traces")]
450		let emit_traces = self.stack.len() < self.trace_max_depth;
451
452		#[cfg(feature = "flamegraph-traces")]
453		if emit_traces {
454			self.stack.push(std::any::type_name::<T>());
455		}
456
457		#[cfg(feature = "flamegraph-traces")]
458		let start = self::flamegraph_trace_get_timestamp();
459
460		let start_pos = self.cur_pos;
461		let res = T::parse_from(self).map_err(
462			|source| ParserError::new(source, AstRange::new(start_pos, self.cur_pos))
463		);
464
465		#[cfg(feature = "flamegraph-traces")]
466		let end = self::flamegraph_trace_get_timestamp();
467
468		// TODO: Do this in another thread?
469		#[cfg(feature = "flamegraph-traces")]
470		if emit_traces {
471			self.stack.pop();
472			for &ty in &self.stack {
473				write!(self.trace_file, "{ty};")
474					.expect("Unable to write to trace file");
475			}
476			writeln!(self.trace_file, "{} {}", std::any::type_name::<T>(), end - start)
477				.expect("Unable to write to trace file");
478		}
479
480		res
481	}
482
483	/// Parses `T` from this parser with a peeked value
484	pub fn parse_with_peeked<T: ParsePeeked<U>, U>(&mut self, parsed: U) -> Result<T, ParserError<T>> {
485		let start_pos = self.cur_pos;
486		T::parse_from_with_peeked(self, parsed).map_err(
487			|source| ParserError::new(source, AstRange::new(start_pos, self.cur_pos))
488		)
489	}
490
491	/// Tries to parses `T` from this parser using `parser` for parsing.
492	///
493	/// On error, nothing is modified.
494	pub fn try_parse_with<T, E: ParseError>(&mut self, parser: impl FnOnce(&mut Self) -> Result<T, E>,) -> Result<Result<T, E>, E> {
495		let prev_pos = self.cur_pos;
496		match parser(self) {
497			Ok(value) => Ok(Ok(value)),
498			Err(err) if err.is_fatal() => Err(err),
499			Err(err) => {
500				self.cur_pos = prev_pos;
501				Ok(Err(err))
502			},
503		}
504	}
505
506	/// Tries to parses `T` from this parser.
507	///
508	/// On error, nothing is modified.
509	pub fn try_parse<T: Parse>(&mut self) -> Result<Result<T, ParserError<T>>, ParserError<T>> {
510		self.try_parse_with(Self::parse::<T>)
511	}
512
513	/// Peeks a `T` from this parser using `parser` for parsing.
514	///
515	/// Parser is only advanced is a fatal error occurs
516	pub fn peek_with<T, E: ParseError>(&mut self, parse: impl FnOnce(&mut Self) -> Result<T, E>,) -> Result<Result<(T, PeekState), E>, E> {
517		let start_pos = self.cur_pos;
518		let output = match parse(self) {
519			Ok(value) => Ok(value),
520			Err(err) if err.is_fatal() => return Err(err),
521			Err(err) => Err(err),
522		};
523
524		let peek_state = PeekState { cur_pos: self.cur_pos };
525		self.cur_pos = start_pos;
526
527		let output = output.map(|value| (value, peek_state));
528		Ok(output)
529	}
530
531	/// Peeks a `T` from this parser.
532	///
533	/// Parser is only advanced is a fatal error occurs
534	#[expect(clippy::type_complexity, reason = "TODO")]
535	pub fn peek<T: Parse>(&mut self) -> Result<Result<(T, PeekState), ParserError<T>>, ParserError<T>> {
536		self.peek_with(Self::parse::<T>)
537	}
538
539	/// Accepts a peeked state.
540	#[expect(
541		clippy::needless_pass_by_value,
542		reason = "It's to ensure the user doesn't use the same peek state multiple times"
543	)]
544	pub const fn set_peeked(&mut self, peek_state: PeekState) {
545		self.cur_pos = peek_state.cur_pos;
546	}
547
548	/// Returns all current tags
549	pub fn tags(&self) -> impl Iterator<Item = ParserTag> {
550		self
551			.tags[self
552			.tags_offset..]
553			.iter()
554			.rev()
555			.take_while(|&&(pos, _)| pos == self.cur_pos)
556			.map(|&(_, tag)| tag)
557	}
558
559	/// Returns if this parser has a tag
560	#[must_use]
561	pub fn has_tag(&self, tag: impl Into<ParserTag>) -> bool {
562		let tag = tag.into();
563		self.tags().any(|cur_tag| cur_tag == tag)
564	}
565
566	/// Calls `f` with tags `tags` added to this parser
567	pub fn with_tags<O>(
568		&mut self,
569		tags: impl IntoIterator<Item = ParserTag>,
570		f: impl FnOnce(&mut Self) -> O
571	) -> O {
572		let tags_len = self.tags.len();
573
574		for tag in tags {
575			self.tags.push((self.cur_pos, tag));
576		}
577		let output = f(self);
578		self.tags.truncate(tags_len);
579
580		output
581	}
582
583	/// Calls `f` with tag `tag` added to this parser
584	pub fn with_tag<O>(
585		&mut self,
586		tag: impl Into<ParserTag>,
587		f: impl FnOnce(&mut Self) -> O
588	) -> O {
589		self.with_tags([tag.into()], f)
590	}
591
592	/// Calls `f` with all tags removed.
593	pub fn without_tags<O>(&mut self, f: impl FnOnce(&mut Self) -> O) -> O {
594		let prev_offset = self.tags_offset;
595		self.tags_offset = self.tags.len();
596		let output = f(self);
597		self.tags_offset = prev_offset;
598
599		output
600	}
601}
602
603/// Peek state
604#[derive(Debug)]
605pub struct PeekState {
606	cur_pos: AstPos,
607}
608
609impl PeekState {
610	/// Returns if this peek state is further ahead than another
611	#[must_use]
612	pub fn ahead_of(&self, other: &Self) -> bool {
613		self.cur_pos > other.cur_pos
614	}
615
616	/// Returns if this peek state is further ahead or equal to another
617	#[must_use]
618	pub fn ahead_of_or_equal(&self, other: &Self) -> bool {
619		self.cur_pos >= other.cur_pos
620	}
621}
622
623/// Parser location (0-indexed).
624///
625/// # Display
626/// The display impl transforms this into a 1-indexed format of `{line}:{column}`
627#[derive(Debug)]
628#[derive(derive_more::Display)]
629#[display("{}:{}", line + 1, column + 1)]
630pub struct ParserLoc {
631	pub line:   usize,
632	pub column: usize,
633}
634
635/// Types that may be parsed using a peek into itself
636pub trait ParsePeeked<T>: Parse {
637	fn parse_from_with_peeked(parser: &mut Parser, parsed: T) -> Result<Self, Self::Error>;
638}
639
640impl<T, U> ParsePeeked<U> for T
641where
642	T: Parse + From<U>, {
643	fn parse_from_with_peeked(_parser: &mut Parser, parsed: U) -> Result<Self, Self::Error> {
644		Ok(parsed.into())
645	}
646}
647
648/// Types that may be parsed from another
649pub trait ParsableFrom<T> {
650	fn from_parsable(value: T) -> Self;
651}
652
653/// `[Parser::parse]` for strings
654pub fn parse_from_str<'a, F, E>(s: &mut &'a str, parse: F) -> Result<&'a str, E>
655where
656	F: FnOnce(&mut &'a str) -> Result<(), E>,
657	E: ParseError,
658{
659	let start = *s;
660	parse(s)?;
661	let range = start
662		.substr_range(s)
663		.expect("Output was not a substring of the input");
664	let parsed = &start[..range.start];
665	Ok(parsed)
666}
667
668/// `[Parser::try_parse]` for strings
669pub fn try_parse_from_str<'a, F, E>(s: &mut &'a str, parse: F) -> Result<Result<&'a str, E>, E>
670where
671	F: FnOnce(&mut &'a str) -> Result<(), E>,
672	E: ParseError,
673{
674	match self::parse_from_str(s, parse) {
675		Ok(value) => Ok(Ok(value)),
676		Err(err) if err.is_fatal() => Err(err),
677		Err(err) => Ok(Err(err)),
678	}
679}
680
681#[cfg(feature = "flamegraph-traces")]
682fn get_flamegraph_trace_max_depth(var: &str, default: usize) -> Result<usize, AppError> {
683	let Ok(max_depth) = env::var(var) else {
684		return Ok(default);
685	};
686
687	max_depth
688		.parse::<usize>()
689		.context("Unable to parse value")
690}
691
692#[cfg(feature = "flamegraph-traces")]
693fn open_flamegraph_trace_file(var: &str, default: &str) -> Result<BufWriter<GzEncoder<fs::File>>, AppError> {
694	let res = env::var(var);
695	let path = match &res {
696		Ok(path) => path,
697		Err(_) => default,
698	};
699
700	let file = fs::File::create(path)
701		.context("Unable to create file")?;
702	let file = GzEncoder::new(file, flate2::Compression::fast());
703	Ok(BufWriter::new(file))
704}
705
706#[cfg(feature = "flamegraph-traces")]
707fn flamegraph_trace_get_timestamp() -> u64 {
708	// Safety: `rdtsc` is always safe to call
709	unsafe {
710		std::arch::x86_64::_rdtsc()
711	}
712}