static_regular_grammar/
lib.rs

1//! This library provides the handy `RegularGrammar` derive macro that helps you
2//! create unsized type wrapping byte or char strings validated by a regular
3//! grammar. It works by parsing a grammar specified in a file or the
4//! documentation of your type, statically compiling it into a deterministic,
5//! minimal, regular automaton then translated into a Rust validation function.
6//!
7//! For now, only the [ABNF] grammar format is supported.
8//!
9//! [ABNF]: <https://datatracker.ietf.org/doc/html/rfc5234>
10//!
11//! # Basic Usage
12//!
13//! The grammar is specified by code blocks in the type documentation.
14//! The type itself must be a simple tutple struct with a single unnamed field
15//! specifying the grammar "token string type". This token string type can be:
16//! - `[u8]`: the grammar is defined on bytes.
17//! - `str`: the grammar is defined on unicode characters.
18//!
19//! ## Example
20//!
21//! ```
22//! use static_regular_grammar::RegularGrammar;
23//!
24//! /// Example grammar.
25//! ///
26//! /// ```abnf
27//! /// foo = "f" 1*("oo") ; the first non-terminal is used as entry point.
28//! /// ```
29//! #[derive(RegularGrammar)]
30//! pub struct Foo([u8]);
31//!
32//! let foo = Foo::new(b"foooooo").unwrap();
33//! ```
34//!
35//! The derive macro also provides a `grammar` attribute to configure the
36//! grammar and the generated code. With this attribute, instead of using the
37//! documentation, you can specify a path to a file containing the grammar:
38//!
39//! ```
40//! # use static_regular_grammar::RegularGrammar;
41//! /// Example grammar.
42//! #[derive(RegularGrammar)]
43//! #[grammar(file = "examples/test.abnf")]
44//! pub struct Foo([u8]);
45//!
46//! let foo = Foo::new(b"foooooo").unwrap();
47//! ```
48//!
49//! # Grammar Entry Point
50//!
51//! By default the first non-terminal defined in the grammar is used as entry
52//! point. You can specify a different entry point using the `entry_point`
53//! sub-attribute of the `grammar` attribute:
54//!
55//! ```
56//! # use static_regular_grammar::RegularGrammar;
57//! /// Example grammar.
58//! #[derive(RegularGrammar)]
59//! #[grammar(file = "examples/test.abnf", entry_point = "bar")]
60//! pub struct Bar([u8]);
61//!
62//! let bar = Bar::new(b"baaaar").unwrap();
63//! ```
64//!
65//! # ASCII
66//!
67//! Using the `[u8]` token string type, it is possible to specify that the
68//! value can be interpreted as an ASCII text string. Then the resulting type
69//! will implement `Display`, `Deref<Target=str>`, `AsRef<str>`, ect.
70//! ```
71//! # use static_regular_grammar::RegularGrammar;
72//! #[derive(RegularGrammar)]
73//! #[grammar(file = "examples/test.abnf", ascii)]
74//! pub struct Bar([u8]);
75//!
76//! let bar = Bar::new(b"baaaar").unwrap();
77//! println!("{bar}");
78//! ```
79//!
80//! # Sized Type
81//!
82//! The `RegularGrammar` macro works on unsized type, but it is often useful
83//! to have an sized equivalent that can own the data while still guaranteeing
84//! the validity of the data. The derive macro can do that for you using the
85//! `sized` sub-attribute of the `grammar` attribute.
86//!
87//! ```
88//! # use static_regular_grammar::RegularGrammar;
89//! /// Example grammar, with sized variant.
90//! ///
91//! /// ```abnf
92//! /// foo = "f" 1*("oo")
93//! /// ```
94//! #[derive(RegularGrammar)]
95//! #[grammar(sized(FooBuf))] // this will generate a `FooBuf` type.
96//! pub struct Foo([u8]);
97//!
98//! let foo = FooBuf::new(b"foooooo".to_vec()).unwrap();
99//! ```
100//!
101//! The sized type will implement `Deref`, `Borrow` and `AsRef` to the unsized
102//! type. It will also include a method named `as_unsized_type_name` (e.g.
103//! `as_foo` in the example above) returning a reference to the unsized type.
104//!
105//! ## Common trait implementations
106//!
107//! You can specify what common trait to automatically implement for the sized
108//! type using the `derive` sub-attribute.
109//!
110//! ```ignore
111//! #[grammar(sized(FooBuf, derive(PartialEq, Eq)))]
112//! ```
113//!
114//! The supported traits are:
115//! - `Debug`
116//! - `Display`
117//! - `PartialEq`
118//! - `Eq`
119//! - `PartialOrd`
120//! - `Ord`
121//! - `Hash`
122//!
123//! All will rely on an equivalent implementation for the unsized type.
124//!
125//! # Caching
126//!
127//! When compiled, the input grammar is determinized and minimized. Those are
128//! expensive operation that can take several seconds on large grammars.
129//! To avoid unnecessary work, the resulting automaton is stored on disk until
130//! changes are made to the grammar. By default, the automaton will be stored
131//! in the `target` folder, as `regular-grammar/TypeName.automaton.cbor`. For
132//! instance, in the example above the path will be
133//! `target/regular-grammar/Foo.automaton.cbor`.
134//! You can specify the file path yourself using the `cache` sub-attribute:
135//!
136//! ```ignore
137//! #[grammar(cache = "path/to/cache.automaton.cbor")]
138//! ```
139//!
140//! The path must be relative, and must not include `..` segments.
141//! If you have multiple grammar types having the same name, use this attribute
142//! to avoid conflicts, otherwise caching will not work.
143//! For large grammars, it might be a good idea to cache the automaton directly
144//! with the sources, and ship it with your library/application to reduce
145//! compilation time on the user machine.
146//!
147//! # Disable automaton generation
148//!
149//! When using a linter such as [`rust-analyzer`], it may be too expensive to
150//! regenerate the grammar automaton continually, even with caching. On large
151//! grammars the generated automaton code can span hundreds or even thousands
152//! of lines. In that case it is possible to disable the automaton generation
153//! all together using the `disable` option:
154//! ```ignore
155//! #[grammar(disable)]
156//! ```
157//!
158//! Of course it is best to use this option behind a feature used only by the
159//! linter:
160//! ```ignore
161//! #[cfg_attr(feature = "disable-grammars", grammar(disable))]
162//! ```
163//!
164//! [`rust-analyzer`](https://rust-analyzer.github.io/)
165use std::env;
166
167use indoc::formatdoc;
168use proc_macro2::{Ident, Span, TokenStream};
169use proc_macro_error::{abort, proc_macro_error};
170use quote::{format_ident, quote};
171use syn::{parse_macro_input, spanned::Spanned, Data, DeriveInput};
172
173mod attribute;
174mod byteset;
175mod charset;
176mod grammar;
177mod options;
178mod token;
179mod utils;
180
181use attribute::Attribute;
182use byteset::ByteSet;
183use charset::CharSet;
184use grammar::{extract_grammar, Grammar, GrammarError};
185use options::*;
186use token::{Token, TokenSet};
187use utils::{automaton::DetAutomaton, SnakeCase};
188
189#[proc_macro_derive(RegularGrammar, attributes(grammar))]
190#[proc_macro_error]
191pub fn derive_regular_grammar(input_tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
192	let input = parse_macro_input!(input_tokens as DeriveInput);
193	match generate_regular_grammar(input) {
194		Ok(tokens) => tokens.into(),
195		Err((e, span)) => abort!(span, e),
196	}
197}
198
199#[derive(Debug, thiserror::Error)]
200pub(crate) enum Error {
201	#[error(transparent)]
202	Var(env::VarError),
203
204	#[error("unexpected type parameter")]
205	UnexpectedTypeParameter,
206
207	#[error("unexpected union type")]
208	UnexpectedUnion,
209
210	#[error("unexpected enum type")]
211	UnexpectedEnum,
212
213	#[error("unexpected unit structure")]
214	UnexpectedUnitStruct,
215
216	#[error("unexpected nammed fields")]
217	UnexpectedNamedFields,
218
219	#[error("missing unnamed field")]
220	MissingField,
221
222	#[error("unexpected unnamed field")]
223	UnexpectedUnnamedField,
224
225	#[error("unsafe visibility")]
226	UnsafeVisibility,
227
228	#[error("invalid inner type")]
229	InvalidInnerType,
230
231	#[error("`grammar` attribute error: {0}")]
232	Attribute(#[from] attribute::Error),
233
234	#[error(transparent)]
235	Grammar(GrammarError),
236
237	#[error("target directory not found: {0}")]
238	TargetDirNotFound(std::env::VarError),
239
240	#[error("missing sized type identifier")]
241	MissingSizedTypeIdent,
242
243	#[error("grammar is not ASCII")]
244	NotAscii,
245}
246
247enum TokenType {
248	Bytes,
249	String,
250}
251
252fn is_str_path(path: &syn::Path) -> bool {
253	path.is_ident("str")
254}
255
256fn is_u8_path(path: &syn::Path) -> bool {
257	path.is_ident("u8")
258}
259
260impl TokenType {
261	fn from_type(ty: syn::Type) -> Result<Self, (Error, Span)> {
262		match ty {
263			syn::Type::Slice(ty) => Self::from_array_element_type(*ty.elem),
264			syn::Type::Path(ty) if is_str_path(&ty.path) => Ok(Self::String),
265			ty => Err((Error::InvalidInnerType, ty.span())),
266		}
267	}
268
269	fn from_array_element_type(ty: syn::Type) -> Result<Self, (Error, Span)> {
270		match ty {
271			syn::Type::Path(ty) if is_u8_path(&ty.path) => Ok(Self::Bytes),
272			ty => Err((Error::InvalidInnerType, ty.span())),
273		}
274	}
275}
276
277struct GrammarData {
278	vis: syn::Visibility,
279	ident: Ident,
280	token: TokenType,
281	options: Options,
282}
283
284fn extract_grammar_data(
285	input: DeriveInput,
286) -> Result<(GrammarData, Vec<syn::Attribute>), (Error, Span)> {
287	match input.data {
288		Data::Struct(s) => match s.fields {
289			syn::Fields::Unit => Err((Error::UnexpectedUnitStruct, s.fields.span())),
290			syn::Fields::Named(_) => Err((Error::UnexpectedNamedFields, s.fields.span())),
291			syn::Fields::Unnamed(fields) => {
292				let fields_span = fields.span();
293				let mut iter = fields.unnamed.into_iter();
294
295				match iter.next() {
296					Some(field) => match iter.next() {
297						Some(f) => Err((Error::UnexpectedUnnamedField, f.span())),
298						None => match field.vis {
299							syn::Visibility::Inherited => {
300								let mut grammar_attr = Attribute::default();
301								let mut attrs_rest = Vec::with_capacity(input.attrs.len());
302								for attr in input.attrs {
303									if attr.meta.path().is_ident("grammar") {
304										grammar_attr
305											.append(Attribute::parse(attr).map_err(
306												|(e, span)| (Error::Attribute(e), span),
307											)?);
308									} else if attr.meta.path().is_ident("cfg_attr") {
309										// This only happens inside `rustanalyzer`.
310										// See: https://github.com/rust-lang/rust-analyzer/issues/13360
311										// See: https://github.com/rust-lang/rust-analyzer/issues/13436
312										// Just to be sure, we disable the grammar.
313										grammar_attr.disable = true
314									} else {
315										attrs_rest.push(attr);
316									}
317								}
318
319								let options = Options::from_attribute(&input.ident, grammar_attr)?;
320
321								Ok((
322									GrammarData {
323										vis: input.vis,
324										ident: input.ident,
325										token: TokenType::from_type(field.ty)?,
326										options,
327									},
328									attrs_rest,
329								))
330							}
331							vis => Err((Error::UnsafeVisibility, vis.span())),
332						},
333					},
334					None => Err((Error::MissingField, fields_span)),
335				}
336			}
337		},
338		Data::Union(u) => Err((Error::UnexpectedUnion, u.union_token.span())),
339		Data::Enum(e) => Err((Error::UnexpectedEnum, e.enum_token.span())),
340	}
341}
342
343fn generate_regular_grammar(input: DeriveInput) -> Result<TokenStream, (Error, Span)> {
344	match input.generics.params.iter().next() {
345		Some(param) => Err((Error::UnexpectedTypeParameter, param.span())),
346		None => {
347			let (data, attrs) = extract_grammar_data(input)?;
348			match data.token {
349				TokenType::Bytes => generate_typed::<u8>(data, attrs),
350				TokenType::String => generate_typed::<char>(data, attrs),
351			}
352		}
353	}
354}
355
356fn generate_typed<T: Token>(
357	data: GrammarData,
358	attrs: Vec<syn::Attribute>,
359) -> Result<TokenStream, (Error, Span)> {
360	let automaton = if data.options.disable {
361		let mut aut = DetAutomaton::new(0);
362		aut.declare_state(0);
363		aut.add_final_state(0);
364		aut
365	} else {
366		let (grammar, hash) = extract_grammar::<T>(
367			&data.options.cache_path,
368			data.options.file.as_deref(),
369			data.options.entry_point.as_deref(),
370			attrs,
371		)
372		.map_err(|(e, span)| (Error::Grammar(e), span))?;
373		let cached = grammar.is_cached();
374		let automaton = grammar.build_automaton();
375
376		if !cached {
377			if let Err(e) = Grammar::<T>::save_to_file(&data.options.cache_path, hash, &automaton) {
378				eprintln!("unable to cache regular automaton: {e}")
379			}
380		}
381
382		automaton
383	};
384
385	let contains_empty = automaton.contains_empty();
386
387	let vis = data.vis;
388	let ident = data.ident;
389	let ascii = data.options.ascii && T::is_ascii(&automaton);
390	let name = data
391		.options
392		.name
393		.or(data.options.entry_point)
394		.unwrap_or_else(|| ident.to_string());
395
396	if data.options.ascii && !ascii {
397		return Err((Error::NotAscii, Span::call_site()));
398	}
399
400	let as_ref = format_ident!("as_{}", SnakeCase(&ident.to_string()));
401
402	let token_type = T::rust_type();
403	let string_type = T::rust_string_type();
404	let iterator_method = T::rust_iterator_method();
405
406	let as_inner = T::rust_as_inner_method();
407
408	let error = format_ident!("Invalid{}", ident);
409	let error_msg = format!("Invalid {name} `{{0}}`");
410
411	let new_doc = format!("Creates a new {name} by parsing the `input` value");
412	let new_unchecked_doc = formatdoc!(
413		r#"
414        Creates a new {name} from the `input` value without validation.
415        
416        # Safety
417        
418        The input data *must* be a valid {name}."#
419	);
420
421	let validate_doc = format!("Checks that the input iterator produces a valid {name}");
422	let validate_body = if data.options.disable {
423		quote! {
424			panic!("automaton not generated")
425		}
426	} else {
427		generate_validation_function::<T>(&automaton)
428	};
429
430	let mut tokens = quote! {
431		#[derive(Debug)]
432		#vis struct #error<T>(pub T);
433
434		impl<T: ::core::fmt::Display> ::core::fmt::Display for #error<T> {
435			fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
436				write!(f, #error_msg, self.0)
437			}
438		}
439
440		impl<T: ::core::fmt::Debug + ::core::fmt::Display> ::std::error::Error for #error<T> {}
441
442		impl #ident {
443			#[doc = #new_doc]
444			pub fn new<T: ?Sized + AsRef<#string_type>>(input: &T) -> Result<&Self, #error<&T>> {
445				let input_ref = input.as_ref();
446				if Self::validate(input_ref.#iterator_method) {
447					Ok(unsafe {
448						Self::new_unchecked(input_ref)
449					})
450				} else {
451					Err(#error(input))
452				}
453			}
454
455			#[doc = #new_unchecked_doc]
456			pub const unsafe fn new_unchecked(input: &#string_type) -> &Self {
457				::core::mem::transmute(input)
458			}
459
460			#[doc = #validate_doc]
461			#[allow(unreachable_code)]
462			pub fn validate(mut input: impl Iterator<Item = #token_type>) -> bool {
463				#validate_body
464			}
465		}
466
467		impl AsRef<#ident> for #ident {
468			fn as_ref(&self) -> &#ident {
469				self
470			}
471		}
472
473		impl<'a> TryFrom<&'a #string_type> for &'a #ident {
474			type Error = #error<&'a #string_type>;
475
476			fn try_from(input: &'a #string_type) -> Result<&'a #ident, #error<&'a #string_type>> {
477				#ident::new(input)
478			}
479		}
480
481		impl<'a> From<&'a #ident> for &'a #string_type {
482			fn from(value: &'a #ident) -> &'a #string_type {
483				&value.0
484			}
485		}
486	};
487
488	if contains_empty {
489		let empty_string = T::rust_empty_string();
490		tokens.extend(quote! {
491			impl #ident {
492				pub const EMPTY: &'static Self = unsafe {
493					Self::new_unchecked(#empty_string)
494				};
495			}
496		})
497	}
498
499	if !data.options.no_borrow {
500		let as_bytes = T::rust_inner_as_bytes_method().map(|as_bytes| {
501			quote! {
502				pub fn as_bytes(&self) -> &[u8] {
503					self.0.#as_bytes()
504				}
505			}
506		});
507
508		let borrow_bytes = T::rust_inner_as_bytes_method().map(|as_bytes| {
509			quote! {
510				impl ::core::convert::AsRef<[u8]> for #ident {
511					fn as_ref(&self) -> &[u8] {
512						self.0.#as_bytes()
513					}
514				}
515			}
516		});
517
518		tokens.extend(quote! {
519			impl #ident {
520				pub fn #as_inner(&self) -> &#string_type {
521					&self.0
522				}
523
524				#as_bytes
525			}
526
527			impl ::core::borrow::Borrow<#string_type> for #ident {
528				fn borrow(&self) -> &#string_type {
529					&self.0
530				}
531			}
532
533			impl ::core::convert::AsRef<#string_type> for #ident {
534				fn as_ref(&self) -> &#string_type {
535					&self.0
536				}
537			}
538
539			#borrow_bytes
540		});
541
542		if ascii {
543			if let Some(as_ascii) = T::rust_inner_as_ascii_method_body() {
544				tokens.extend(quote! {
545					impl #ident {
546						pub fn as_str(&self) -> &str {
547							#as_ascii
548						}
549					}
550
551					impl ::core::convert::AsRef<str> for #ident {
552						fn as_ref(&self) -> &str {
553							#as_ascii
554						}
555					}
556
557					impl<'a> TryFrom<&'a str> for &'a #ident {
558						type Error = #error<&'a str>;
559
560						fn try_from(input: &'a str) -> Result<&'a #ident, #error<&'a str>> {
561							#ident::new(input.as_bytes()).map_err(|_| #error(input))
562						}
563					}
564
565					impl<'a> From<&'a #ident> for &'a str {
566						fn from(value: &'a #ident) -> &'a str {
567							value.as_str()
568						}
569					}
570				})
571			}
572		}
573	}
574
575	if !data.options.no_deref {
576		match T::rust_inner_as_ascii_method_body() {
577			Some(as_ascii) if ascii => tokens.extend(quote! {
578				impl ::core::ops::Deref for #ident {
579					type Target = str;
580
581					fn deref(&self) -> &str {
582						#as_ascii
583					}
584				}
585			}),
586			_ => tokens.extend(quote! {
587				impl ::core::ops::Deref for #ident {
588					type Target = #string_type;
589
590					fn deref(&self) -> &#string_type {
591						&self.0
592					}
593				}
594			}),
595		}
596	}
597
598	if T::UNICODE || ascii {
599		tokens.extend(quote! {
600			impl ::core::fmt::Display for #ident {
601				fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
602					::core::fmt::Display::fmt(self.as_str(), f)
603				}
604			}
605
606			impl ::core::fmt::Debug for #ident {
607				fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
608					::core::fmt::Debug::fmt(self.as_str(), f)
609				}
610			}
611		});
612	}
613
614	if data.options.serde {
615		let serialize = if T::UNICODE || ascii {
616			quote! {
617				serializer.serialize_str(self.as_str())
618			}
619		} else {
620			quote! {
621				serializer.serialize_bytes(self.as_bytes())
622			}
623		};
624
625		let visit_bytes = if T::UNICODE {
626			quote! {
627				match std::str::from_utf8(v) {
628					Ok(s) => #ident::new(s).map_err(|_| ()),
629					Err(e) => Err(())
630				}
631			}
632		} else {
633			quote! {
634				#ident::new(v)
635			}
636		};
637
638		let expected = format!("some {ident}");
639
640		tokens.extend(quote! {
641			impl ::serde::Serialize for #ident {
642				fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
643				where
644					S: ::serde::Serializer
645				{
646					#serialize
647				}
648			}
649
650			impl<'a, 'de: 'a> ::serde::Deserialize<'de> for &'a #ident {
651				fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
652				where
653					D: ::serde::Deserializer<'de>
654				{
655					struct Visitor;
656
657					impl<'de> ::serde::de::Visitor<'de> for Visitor {
658						type Value = &'de #ident;
659
660						fn expecting(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
661							write!(f, #expected)
662						}
663
664						fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
665						where
666							E: ::serde::de::Error
667						{
668							#ident::new(v).map_err(|_| E::invalid_value(::serde::de::Unexpected::Str(v), &self))
669						}
670
671						fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
672						where
673							E: ::serde::de::Error
674						{
675							#visit_bytes.map_err(|_| E::invalid_value(::serde::de::Unexpected::Bytes(v), &self))
676						}
677					}
678
679					deserializer.deserialize_str(Visitor)
680				}
681			}
682		})
683	}
684
685	if let Some(buffer) = data.options.sized {
686		let buffer_ident = buffer.ident;
687		let owned_string_type = T::rust_owned_string_type();
688
689		let owned_doc = format!("Owned {name}.");
690		let owned_new_doc = format!("Creates a new owned {name} by parsing the `input` value");
691		let owned_new_unchecked_doc = formatdoc!(
692			r#"
693			Creates a new owned {name} from the `input` value without validation.
694			
695			# Safety
696			
697			The input data *must* be a valid {name}."#
698		);
699
700		tokens.extend(quote! {
701			#[doc = #owned_doc]
702			#[derive(Clone)]
703			#vis struct #buffer_ident(#owned_string_type);
704
705			impl #buffer_ident {
706				#[doc = #owned_new_doc]
707				pub fn new(input: #owned_string_type) -> Result<Self, #error<#owned_string_type>> {
708					if #ident::validate(input.#iterator_method) {
709						Ok(Self(input))
710					} else {
711						Err(#error(input))
712					}
713				}
714
715				#[doc = #owned_new_unchecked_doc]
716				pub const unsafe fn new_unchecked(input: #owned_string_type) -> Self {
717					Self(input)
718				}
719
720				pub fn #as_ref(&self) -> &#ident {
721					unsafe {
722						#ident::new_unchecked(&self.0)
723					}
724				}
725			}
726
727			impl ::core::ops::Deref for #buffer_ident {
728				type Target = #ident;
729
730				fn deref(&self) -> &Self::Target {
731					self.#as_ref()
732				}
733			}
734
735			impl ::core::borrow::Borrow<#ident> for #buffer_ident {
736				fn borrow(&self) -> &#ident {
737					self.#as_ref()
738				}
739			}
740
741			impl ::core::convert::AsRef<#ident> for #buffer_ident {
742				fn as_ref(&self) -> &#ident {
743					self.#as_ref()
744				}
745			}
746
747			impl ::std::borrow::ToOwned for #ident {
748				type Owned = #buffer_ident;
749
750				fn to_owned(&self) -> #buffer_ident {
751					unsafe {
752						#buffer_ident::new_unchecked(
753							self.0.to_owned()
754						)
755					}
756				}
757			}
758
759			impl TryFrom<#owned_string_type> for #buffer_ident {
760				type Error = #error<#owned_string_type>;
761
762				fn try_from(input: #owned_string_type) -> Result<#buffer_ident, #error<#owned_string_type>> {
763					#buffer_ident::new(input)
764				}
765			}
766
767			impl From<#buffer_ident> for #owned_string_type {
768				fn from(value: #buffer_ident) -> #owned_string_type {
769					value.0
770				}
771			}
772		});
773
774		if !T::UNICODE && ascii {
775			tokens.extend(quote! {
776				impl #buffer_ident {
777					pub fn into_string(self) -> ::std::string::String {
778						unsafe {
779							::std::string::String::from_utf8_unchecked(self.0)
780						}
781					}
782				}
783
784				impl TryFrom<::std::string::String> for #buffer_ident {
785					type Error = #error<::std::string::String>;
786
787					fn try_from(input: ::std::string::String) -> Result<#buffer_ident, #error<::std::string::String>> {
788						let bytes = input.into_bytes();
789						#buffer_ident::new(bytes).map_err(|#error(bytes)| unsafe {
790							#error(::std::string::String::from_utf8_unchecked(bytes))
791						})
792					}
793				}
794
795				impl From<#buffer_ident> for String {
796					fn from(value: #buffer_ident) -> String {
797						value.into_string()
798					}
799				}
800			})
801		}
802
803		if T::UNICODE || ascii {
804			tokens.extend(quote! {
805				impl ::std::str::FromStr for #buffer_ident {
806					type Err = #error<::std::string::String>;
807
808					fn from_str(s: &str) -> Result<Self, #error<::std::string::String>> {
809						let buffer = s.to_string();
810						buffer.try_into()
811					}
812				}
813			})
814		}
815
816		if contains_empty {
817			tokens.extend(quote! {
818				impl ::core::default::Default for #buffer_ident {
819					fn default() -> Self {
820						unsafe {
821							Self::new_unchecked(::core::default::Default::default())
822						}
823					}
824				}
825			})
826		}
827
828		if !data.options.no_borrow {
829			let as_bytes = T::rust_inner_as_bytes_method().map(|as_bytes| {
830				quote! {
831					pub fn as_bytes(&self) -> &[u8] {
832						self.0.#as_bytes()
833					}
834				}
835			});
836
837			let into_bytes = T::rust_inner_into_bytes_method().map(|into_bytes| {
838				quote! {
839					pub fn into_bytes(self) -> Vec<u8> {
840						self.0.#into_bytes()
841					}
842				}
843			});
844
845			let borrow_bytes = T::rust_inner_as_bytes_method().map(|as_bytes| {
846				quote! {
847					impl ::core::convert::AsRef<[u8]> for #buffer_ident {
848						fn as_ref(&self) -> &[u8] {
849							self.0.#as_bytes()
850						}
851					}
852				}
853			});
854
855			let borrow_ascii = T::rust_inner_as_ascii_method_body().map(|as_ascii| {
856				quote! {
857					impl ::core::convert::AsRef<str> for #buffer_ident {
858						fn as_ref(&self) -> &str {
859							#as_ascii
860						}
861					}
862				}
863			});
864
865			let into_inner = T::rust_into_inner_method();
866
867			tokens.extend(quote! {
868				impl #buffer_ident {
869					pub fn #as_inner(&self) -> &#string_type {
870						&self.0
871					}
872
873					pub fn #into_inner(self) -> #owned_string_type {
874						self.0
875					}
876
877					#as_bytes
878					#into_bytes
879				}
880
881				impl ::core::borrow::Borrow<#string_type> for #buffer_ident {
882					fn borrow(&self) -> &#string_type {
883						&self.0
884					}
885				}
886
887				impl ::core::convert::AsRef<#string_type> for #buffer_ident {
888					fn as_ref(&self) -> &#string_type {
889						&self.0
890					}
891				}
892
893				#borrow_bytes
894				#borrow_ascii
895			});
896		}
897
898		if buffer.derives.debug {
899			tokens.extend(quote! {
900				impl ::core::fmt::Debug for #buffer_ident {
901					fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
902						<#ident as ::core::fmt::Debug>::fmt(self.#as_ref(), f)
903					}
904				}
905			});
906		}
907
908		if buffer.derives.display {
909			tokens.extend(quote! {
910				impl ::core::fmt::Display for #buffer_ident {
911					fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
912						<#ident as ::core::fmt::Display>::fmt(self.#as_ref(), f)
913					}
914				}
915			});
916		}
917
918		if buffer.derives.partial_eq {
919			tokens.extend(quote! {
920				impl ::core::cmp::PartialEq for #buffer_ident {
921					fn eq(&self, other: &Self) -> bool {
922						<#ident as ::core::cmp::PartialEq>::eq(self.#as_ref(), other.#as_ref())
923					}
924				}
925
926				impl ::core::cmp::PartialEq<#ident> for #buffer_ident {
927					fn eq(&self, other: &#ident) -> bool {
928						<#ident as ::core::cmp::PartialEq>::eq(self.#as_ref(), other)
929					}
930				}
931
932				impl<'a> ::core::cmp::PartialEq<&'a #ident> for #buffer_ident {
933					fn eq(&self, other: &&'a #ident) -> bool {
934						<#ident as ::core::cmp::PartialEq>::eq(self.#as_ref(), *other)
935					}
936				}
937			});
938		}
939
940		if buffer.derives.eq {
941			tokens.extend(quote! {
942				impl ::core::cmp::Eq for #buffer_ident {}
943			});
944		}
945
946		if buffer.derives.partial_ord {
947			if buffer.derives.ord {
948				tokens.extend(quote! {
949					impl ::core::cmp::Ord for #buffer_ident {
950						fn cmp(&self, other: &Self) -> ::core::cmp::Ordering {
951							<#ident as ::core::cmp::Ord>::cmp(self.#as_ref(), other.#as_ref())
952						}
953					}
954
955					impl ::core::cmp::PartialOrd for #buffer_ident {
956						fn partial_cmp(&self, other: &Self) -> Option<::core::cmp::Ordering> {
957							Some(<#buffer_ident as ::core::cmp::Ord>::cmp(self, other))
958						}
959					}
960				});
961			} else {
962				tokens.extend(quote! {
963					impl ::core::cmp::PartialOrd for #buffer_ident {
964						fn partial_cmp(&self, other: &Self) -> Option<::core::cmp::Ordering> {
965							<#ident as ::core::cmp::PartialOrd>::partial_cmp(self.#as_ref(), other.#as_ref())
966						}
967					}
968				});
969			}
970
971			tokens.extend(quote! {
972				impl ::core::cmp::PartialOrd<#ident> for #buffer_ident {
973					fn partial_cmp(&self, other: &#ident) -> Option<::core::cmp::Ordering> {
974						<#ident as ::core::cmp::PartialOrd>::partial_cmp(self.#as_ref(), other)
975					}
976				}
977
978				impl<'a> ::core::cmp::PartialOrd<&'a #ident> for #buffer_ident {
979					fn partial_cmp(&self, other: &&'a #ident) -> Option<::core::cmp::Ordering> {
980						<#ident as ::core::cmp::PartialOrd>::partial_cmp(self.#as_ref(), *other)
981					}
982				}
983			});
984
985			if buffer.derives.hash {
986				tokens.extend(quote! {
987					impl ::core::hash::Hash for #buffer_ident {
988						fn hash<H: ::core::hash::Hasher>(&self, state: &mut H) {
989							<#ident as ::core::hash::Hash>::hash(self.#as_ref(), state)
990						}
991					}
992				});
993			}
994		}
995
996		if data.options.serde {
997			let serialize = if T::UNICODE || ascii {
998				quote! {
999					serializer.serialize_str(self.as_str())
1000				}
1001			} else {
1002				quote! {
1003					serializer.serialize_bytes(self.as_bytes())
1004				}
1005			};
1006
1007			let (visit_str, visit_bytes) = if T::UNICODE {
1008				(
1009					quote! {
1010						#buffer_ident::new(v)
1011					},
1012					quote! {
1013						match ::std::string::String::from_utf8(v) {
1014							Ok(s) => #buffer_ident::new(s).map_err(|#error(s)| #error(s.into_bytes())),
1015							Err(e) => Err(#error(e.into_bytes()))
1016						}
1017					},
1018				)
1019			} else {
1020				(
1021					quote! {
1022						#buffer_ident::new(v.into_bytes()).map_err(|#error(bytes)| unsafe {
1023							#error(::std::string::String::from_utf8_unchecked(bytes))
1024						})
1025					},
1026					quote! {
1027						#buffer_ident::new(v)
1028					},
1029				)
1030			};
1031
1032			let expected = format!("some {ident}");
1033
1034			tokens.extend(quote! {
1035				impl ::serde::Serialize for #buffer_ident {
1036					fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1037					where
1038						S: ::serde::Serializer
1039					{
1040						#serialize
1041					}
1042				}
1043
1044				impl<'de> ::serde::Deserialize<'de> for #buffer_ident {
1045					fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1046					where
1047						D: ::serde::Deserializer<'de>
1048					{
1049						struct Visitor;
1050
1051						impl<'de> ::serde::de::Visitor<'de> for Visitor {
1052							type Value = #buffer_ident;
1053
1054							fn expecting(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
1055								write!(f, #expected)
1056							}
1057
1058							fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1059							where
1060								E: ::serde::de::Error
1061							{
1062								self.visit_string(v.to_string())
1063							}
1064
1065							fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
1066							where
1067								E: ::serde::de::Error
1068							{
1069								self.visit_byte_buf(v.to_vec())
1070							}
1071
1072							fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
1073							where
1074								E: ::serde::de::Error
1075							{
1076								#visit_str.map_err(|#error(v)| E::invalid_value(::serde::de::Unexpected::Str(&v), &self))
1077							}
1078
1079							fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
1080							where
1081								E: ::serde::de::Error
1082							{
1083								#visit_bytes.map_err(|#error(v)| E::invalid_value(::serde::de::Unexpected::Bytes(&v), &self))
1084							}
1085						}
1086
1087						deserializer.deserialize_str(Visitor)
1088					}
1089				}
1090			})
1091		}
1092	}
1093
1094	Ok(tokens)
1095}
1096
1097fn generate_validation_function<T: Token>(automaton: &DetAutomaton<u32, T::Set>) -> TokenStream {
1098	let initial_state = *automaton.initial_state();
1099
1100	let states = automaton.transitions().iter().map(|(q, transitions)| {
1101		let transitions = transitions.iter().map(|(set, target)| {
1102			let set = T::Set::rust_set(set);
1103			quote! {
1104				Some(#set) => #target
1105			}
1106		});
1107
1108		let is_final = automaton.is_final_state(q);
1109
1110		quote! {
1111			#q => match input.next() {
1112				#(#transitions,)*
1113				Some(_) => break false,
1114				None => break #is_final
1115			}
1116		}
1117	});
1118
1119	quote! {
1120		let mut state = #initial_state;
1121		loop {
1122			state = match state {
1123				#(#states,)*
1124				_ => unreachable!()
1125			}
1126		}
1127	}
1128}
static_regular_grammar/lib.rs

static_regular_grammar/
lib.rs