Skip to main content

sscanf_macro/
lib.rs

1#![deny(
2    missing_debug_implementations,
3    trivial_casts,
4    trivial_numeric_casts,
5    unsafe_code,
6    unstable_features,
7    unused_import_braces,
8    unused_qualifications
9)]
10//
11// set of clippy pedantic lints that I disagree with
12#![allow(
13    clippy::wildcard_imports,
14    clippy::enum_glob_use,
15    clippy::manual_assert, // I don't want the "assertion failed" text in the panic message
16    clippy::items_after_statements // if an item is only used locally, define it where it is needed
17)]
18//
19//! Procedural macros for the [`sscanf`](https://crates.io/crates/sscanf) crate. Not usable as a standalone crate.
20
21use proc_macro::TokenStream as TokenStream1;
22pub(crate) use proc_macro2::{Span, TokenStream};
23pub(crate) use quote::{ToTokens, quote, quote_spanned};
24pub(crate) use syn::{
25    Error, Result, Token,
26    parse::{Parse, ParseStream},
27    spanned::Spanned,
28};
29
30mod attribute;
31mod error;
32mod format_string;
33mod sequence_matcher;
34mod str_lit;
35mod ty;
36mod utils;
37
38pub(crate) use attribute::*;
39pub(crate) use error::*;
40pub(crate) use format_string::*;
41pub(crate) use sequence_matcher::*;
42pub(crate) use str_lit::*;
43pub(crate) use ty::*;
44pub(crate) use utils::*;
45
46mod derive;
47
48/// Input string, format string, and types for `sscanf` and `sscanf_with_regex`.
49struct Sscanf {
50    /// input to run the `sscanf` on
51    input: syn::Expr,
52    /// format string and types
53    parser: SscanfParser,
54}
55
56struct SscanfParser {
57    /// the format string
58    fmt: StrLit,
59    /// Types after the format string
60    type_tokens: Vec<Type<'static>>,
61}
62
63impl Parse for Sscanf {
64    fn parse(tokens: ParseStream) -> Result<Self> {
65        // All of these special cases have to be handled separately, because syn's default
66        // behavior when something is missing is to point at the entire macro invocation with
67        // an error message that says "expected <missing thing>". But if a user sees the entire
68        // thing underlined with the message "expected a comma", they will assume that they
69        // should replace that macro call with a comma or something similar. They would not
70        // guess that the actual meaning is:
71        // "this macro requires more parameters than I have given it, and the next
72        // parameter should be separated with a comma from the current ones which is why the
73        // macro expected a comma, and it would point to the end of the input where the comma
74        // was expected, but since there is nothing there it has no span to point to so it
75        // just points at the entire thing."
76        assert_or_bail!(!tokens.is_empty(), Span::call_site() => "sscanf: at least 2 Parameters required: Input and format string");
77
78        let input: syn::Expr = tokens.parse()?;
79        assert_or_bail!(!tokens.is_empty(), input.end_span() => "sscanf: at least 2 Parameters required: Missing format string");
80
81        let comma = tokens.parse::<Token![,]>()?;
82        // Addition to the comment above: here we actually have a comma to point to to say:
83        // "Hey, you put a comma here, put something after it". syn doesn't do this
84        // because it cannot rewind the input stream to check this.
85        assert_or_bail!(!tokens.is_empty(), comma.end_span() => "at least 2 Parameters required: Missing format string");
86
87        let parser = tokens.parse::<SscanfParser>()?;
88
89        Ok(Sscanf { input, parser })
90    }
91}
92
93impl Parse for SscanfParser {
94    fn parse(tokens: ParseStream) -> Result<Self> {
95        assert_or_bail!(!tokens.is_empty(), Span::call_site() => "sscanf_parser requires at least a format string");
96
97        let fmt = tokens.parse::<StrLit>()?;
98
99        let type_tokens = if tokens.is_empty() {
100            vec![]
101        } else {
102            tokens.parse::<Token![,]>()?; // the comma after the format string
103
104            tokens
105                .parse_terminated(Type::parse, Token![,])?
106                .into_iter()
107                .collect()
108        };
109
110        Ok(SscanfParser { fmt, type_tokens })
111    }
112}
113
114#[proc_macro]
115pub fn sscanf(input: TokenStream1) -> TokenStream1 {
116    let input = syn::parse_macro_input!(input as Sscanf);
117    sscanf_internal(input, true).into_token_stream_1()
118}
119
120#[proc_macro]
121pub fn sscanf_with_regex(input: TokenStream1) -> TokenStream1 {
122    let input = syn::parse_macro_input!(input as Sscanf);
123    sscanf_internal(input, false).into_token_stream_1()
124}
125
126#[proc_macro]
127pub fn sscanf_parser(input: TokenStream1) -> TokenStream1 {
128    let input = syn::parse_macro_input!(input as SscanfParser);
129    sscanf_parser_internal(&input, true).into_token_stream_1()
130}
131
132#[proc_macro]
133pub fn sscanf_parser_with_regex(input: TokenStream1) -> TokenStream1 {
134    let input = syn::parse_macro_input!(input as SscanfParser);
135    sscanf_parser_internal(&input, false).into_token_stream_1()
136}
137
138#[proc_macro_derive(FromScanf, attributes(sscanf))]
139pub fn derive_from_sscanf(input: TokenStream1) -> TokenStream1 {
140    let syn::DeriveInput {
141        ident,
142        generics,
143        data,
144        attrs,
145        ..
146    } = syn::parse_macro_input!(input as syn::DeriveInput);
147
148    let res = match data {
149        syn::Data::Struct(data) => derive::parse_struct(&ident, &generics, attrs, data),
150        syn::Data::Enum(data) => derive::parse_enum(&ident, &generics, attrs, data),
151        syn::Data::Union(data) => derive::parse_union(&ident, &generics, attrs, data),
152    };
153    match res {
154        Ok(res) => res.into(),
155        Err(err) => err.into_compile_error().into(),
156    }
157}
158
159/// Internal function implementing the `sscanf` and `sscanf_with_regex` macros.
160fn sscanf_internal(input: Sscanf, escape_input: bool) -> Result<TokenStream> {
161    let parser = sscanf_parser_internal(&input.parser, escape_input)?;
162
163    let src_str = {
164        let start_span = input.input.span().stable_start();
165        let mut src_str = quote_spanned! {start_span=> &};
166        input.input.to_tokens(&mut src_str);
167        src_str
168    };
169
170    let ret = quote! { #parser.parse(#src_str) };
171    Ok(ret)
172}
173
174/// Internal function to generate a `Parser` from `SscanfParser`.
175fn sscanf_parser_internal(input: &SscanfParser, escape_input: bool) -> Result<TokenStream> {
176    let format = FormatString::new(input.fmt.to_slice(), escape_input)?;
177
178    // inner function to use early return. This should be a closure, but those can't have lifetimes
179    fn find_ph_type<'a>(
180        ph: &Placeholder<'a>,
181        visited: &mut [bool],
182        ph_index: &mut usize,
183        external_types: &[Type<'a>],
184    ) -> Result<Type<'a>> {
185        let n = if let Some(name) = ph.ident.as_ref() {
186            if let Ok(n) = name.text().parse::<usize>() {
187                assert_or_bail!(n < visited.len(), name => "type index {} out of range of {} types", n, visited.len());
188                n
189            } else {
190                return Type::from_str(*name).map_err(|err| {
191                    let hint =  "The syntax for placeholders is {<type>} or {<type>:<config>}. Make sure <type> is a valid type or index.";
192                    let hint2 = "If you want syntax highlighting and better errors, place the type in the arguments after the format string while debugging";
193                    let msg = format!("invalid type in placeholder: {err}.\nHint: {hint}\n{hint2}");
194                    name.error(msg)
195                });
196            }
197        } else {
198            let n = *ph_index;
199            *ph_index += 1;
200            assert_or_bail!(n < visited.len(), ph => "more placeholders than types provided");
201            n
202        };
203        visited[n] = true;
204        Ok(external_types[n].clone())
205    }
206
207    let mut ph_index = 0;
208    let mut visited = vec![false; input.type_tokens.len()];
209    let mut types = vec![];
210    let mut error = ErrorBuilder::new();
211
212    for ph in &format.placeholders {
213        match find_ph_type(ph, &mut visited, &mut ph_index, &input.type_tokens) {
214            Ok(ty) => types.push(ty),
215            Err(e) => error.push(e),
216        }
217    }
218
219    for (visited, ty) in visited.iter().zip(&input.type_tokens) {
220        if !*visited {
221            error.with_spanned(ty, "unused type");
222        }
223    }
224
225    error.ok_or_build()?;
226
227    let sequence_matcher = SequenceMatcher::new(&format, &types, escape_input);
228
229    let matcher = sequence_matcher.get_matcher();
230    let expected_parts = sequence_matcher.num_parts();
231    let parsers = sequence_matcher.parsers;
232    let ret = quote! {
233        ::sscanf::Parser::from_matcher(
234            #matcher,
235            |src| {
236                let src = src.as_seq();
237                assert_eq!(src.num_children(), #expected_parts, "sscanf: internal error: unexpected number of parts");
238
239                #[allow(unused_parens, reason = "The code is autogenerated, so it can't check if it could be simplified")]
240                #[allow(clippy::needless_question_mark, reason = "The code is autogenerated, so it can't check if it could be simplified")]
241                #[allow(clippy::double_parens, reason = "The code is autogenerated, so it can't check if it could be simplified")]
242                ::std::option::Option::Some(( #(#parsers),* ))
243            }
244        )
245    };
246    Ok(ret)
247}