sscanf_macro/
lib.rs

1//! Procedural macros for the [`sscanf`](https://crates.io/crates/sscanf) crate. Not usable as a standalone crate.
2
3use proc_macro::TokenStream as TokenStream1;
4pub(crate) use proc_macro2::{Span, TokenStream};
5pub(crate) use quote::{quote, ToTokens};
6pub(crate) use syn::{
7    parse::{Parse, ParseStream},
8    spanned::Spanned,
9    Token,
10};
11
12mod attribute;
13mod error;
14mod format_option;
15mod format_string;
16mod placeholder;
17mod regex_parts;
18mod str_lit;
19mod ty;
20mod utils;
21
22pub(crate) use attribute::*;
23pub(crate) use error::*;
24pub(crate) use format_option::*;
25pub(crate) use format_string::*;
26pub(crate) use placeholder::*;
27pub(crate) use regex_parts::*;
28pub(crate) use str_lit::*;
29pub(crate) use ty::*;
30pub(crate) use utils::*;
31
32mod derive;
33
34/// Format string and types for `sscanf_get_regex`. Shared by `sscanf` and `sscanf_unescaped`
35struct ScanfInner {
36    /// the format string
37    fmt: StrLit,
38    /// Types after the format string
39    type_tokens: Vec<Type<'static>>,
40}
41/// Input string, format string and types for `sscanf` and `sscanf_unescaped`
42struct Scanf {
43    /// input to run the `sscanf` on
44    src_str: syn::Expr,
45    /// format string and types
46    inner: ScanfInner,
47}
48
49impl Parse for ScanfInner {
50    fn parse(input: ParseStream) -> syn::Result<Self> {
51        if input.is_empty() {
52            let msg = "missing parameter: format string";
53            return Err(syn::Error::new(Span::call_site(), msg)); // checked in tests/fail/missing_params.rs
54        }
55
56        let fmt = input.parse::<StrLit>()?;
57
58        let type_tokens = if input.is_empty() {
59            vec![]
60        } else {
61            input.parse::<Token![,]>()?; // the comma after the format string
62
63            input
64                .parse_terminated(Type::parse, Token![,])?
65                .into_iter()
66                .collect()
67        };
68
69        Ok(ScanfInner { fmt, type_tokens })
70    }
71}
72impl Parse for Scanf {
73    fn parse(input: ParseStream) -> syn::Result<Self> {
74        if input.is_empty() {
75            // All of these special cases have to be handled separately, because syn's default
76            // behavior when something is missing is to point at the entire macro invocation with
77            // an error message that says "expected <missing thing>". But if a user sees the entire
78            // thing underlined with the message "expected a comma", they will assume that they
79            // should replace that macro call with a comma or something similar. They would not
80            // guess that the actual meaning is:
81            // "this macro requires more parameters than I have given it, and the next
82            // parameter should be separated with a comma from the current ones which is why the
83            // macro expected a comma, and it would point to the end of the input where the comma
84            // was expected, but since there is nothing there it has no span to point to so it
85            // just points at the entire thing."
86            // I love writing error messages in proc macros :D (not)
87            let msg = "at least 2 Parameters required: Input and format string";
88            return Err(syn::Error::new(Span::call_site(), msg)); // checked in tests/fail/missing_params.rs
89        }
90        let src_str = input.parse()?;
91        if input.is_empty() {
92            let msg = "at least 2 Parameters required: Missing format string";
93            return Err(syn::Error::new_spanned(src_str, msg)); // checked in tests/fail/missing_params.rs
94        }
95        let comma = input.parse::<Token![,]>()?;
96        if input.is_empty() {
97            // Addition to the comment above: here we actually have a comma to point to to say:
98            // "Hey, you put a comma here, put something after it". syn doesn't do this
99            // because it cannot rewind the input stream to check this.
100            let msg = "at least 2 Parameters required: Missing format string";
101            return Err(syn::Error::new_spanned(comma, msg)); // checked in tests/fail/missing_params.rs
102        }
103        let inner = input.parse()?;
104
105        Ok(Scanf { src_str, inner })
106    }
107}
108
109#[proc_macro]
110pub fn sscanf(input: TokenStream1) -> TokenStream1 {
111    let input = syn::parse_macro_input!(input as Scanf);
112    sscanf_internal(input, true)
113}
114
115#[proc_macro]
116pub fn sscanf_unescaped(input: TokenStream1) -> TokenStream1 {
117    let input = syn::parse_macro_input!(input as Scanf);
118    sscanf_internal(input, false)
119}
120
121#[proc_macro]
122pub fn sscanf_get_regex(input: TokenStream1) -> TokenStream1 {
123    let input = syn::parse_macro_input!(input as ScanfInner);
124    let (regex, _) = match generate_regex(&input, true) {
125        Ok(v) => v,
126        Err(e) => return e.into(),
127    };
128    let ret = quote! {{
129        #regex
130        &REGEX
131    }};
132    ret.into()
133}
134
135#[proc_macro_derive(FromScanf, attributes(sscanf))]
136pub fn derive_from_sscanf(input: TokenStream1) -> TokenStream1 {
137    let syn::DeriveInput {
138        ident,
139        generics,
140        data,
141        attrs,
142        ..
143    } = syn::parse_macro_input!(input as syn::DeriveInput);
144
145    let res = match data {
146        syn::Data::Struct(data) => derive::parse_struct(&ident, &generics, attrs, data),
147        syn::Data::Enum(data) => derive::parse_enum(&ident, &generics, attrs, data),
148        syn::Data::Union(data) => derive::parse_union(&ident, &generics, attrs, data),
149    };
150    match res {
151        Ok(res) => res.into(),
152        Err(err) => err.into(),
153    }
154}
155
156fn sscanf_internal(input: Scanf, escape_input: bool) -> TokenStream1 {
157    let (regex, matcher) = match generate_regex(&input.inner, escape_input) {
158        Ok(v) => v,
159        Err(e) => return e.into(),
160    };
161    let src_str = {
162        let src_str = input.src_str;
163        let span = FullSpan::from_spanned(&src_str);
164        let param = span.apply(quote! { & }, quote! { (#src_str) });
165
166        // wrapping the input in a manual call to str::get ensures that the user
167        // gets an appropriate error message if they try to use a non-string input
168        quote! { ::std::primitive::str::get(#param, ..).unwrap() }
169    };
170    let ret = quote! {{
171        #regex
172        #[allow(clippy::needless_borrow)]
173        let input: &str = #src_str;
174        #[allow(clippy::needless_question_mark)]
175        REGEX.captures(input)
176            .ok_or_else(|| ::sscanf::errors::Error::MatchFailed)
177            .and_then(|cap| {
178                let mut src = cap.iter();
179                let src = &mut src;
180                src.next().unwrap(); // skip the whole match
181
182                let mut matcher = || -> ::std::result::Result<_, ::std::boxed::Box<dyn ::std::error::Error>> {
183                    ::std::result::Result::Ok( ( #(#matcher),* ) )
184                };
185                let res = matcher().map_err(|e| ::sscanf::errors::Error::ParsingFailed(e));
186
187                if res.is_ok() && src.len() != 0 {
188                    panic!("sscanf: {} captures generated, but {} were taken",
189                        REGEX.captures_len(), REGEX.captures_len() - src.len()
190                    );
191                }
192                res
193            })
194    }};
195    ret.into()
196}
197
198fn generate_regex(input: &ScanfInner, escape_input: bool) -> Result<(TokenStream, Vec<Matcher>)> {
199    let mut format = FormatString::new(input.fmt.to_slice(), escape_input)?;
200    format.parts[0].insert(0, '^');
201    format.parts.last_mut().unwrap().push('$');
202
203    // inner function to use ?-operator. This should be a closure, but those can't have lifetimes
204    fn find_ph_type<'a>(
205        ph: &Placeholder<'a>,
206        visited: &mut [bool],
207        ph_index: &mut usize,
208        external_types: &[Type<'a>],
209    ) -> Result<Type<'a>> {
210        let n = if let Some(name) = ph.ident.as_ref() {
211            if let Ok(n) = name.text().parse::<usize>() {
212                if n >= visited.len() {
213                    let msg = format!("type index {} out of range of {} types", n, visited.len());
214                    return name.err(&msg); // checked in tests/fail/<channel>/invalid_type_in_placeholder.rs
215                }
216                n
217            } else {
218                return Type::from_str(name.clone()).map_err(|err| {
219                    let hint =  "The syntax for placeholders is {<type>} or {<type>:<config>}. Make sure <type> is a valid type or index.";
220                    let hint2 = "If you want syntax highlighting and better errors, place the type in the arguments after the format string while debugging";
221                    let msg = format!("invalid type in placeholder: {}.\nHint: {}\n{}", err, hint, hint2);
222                    name.error(msg) // checked in tests/fail/<channel>/invalid_type_in_placeholder.rs
223                });
224            }
225        } else {
226            let n = *ph_index;
227            *ph_index += 1;
228            if n >= visited.len() {
229                let msg = "more placeholders than types provided";
230                return ph.src.err(msg); // checked in tests/fail/<channel>/missing_type.rs
231            }
232            n
233        };
234        visited[n] = true;
235        Ok(external_types[n].clone())
236    }
237
238    let mut ph_index = 0;
239    let mut visited = vec![false; input.type_tokens.len()];
240    let mut types = vec![];
241    let mut error = Error::builder();
242
243    for ph in &format.placeholders {
244        match find_ph_type(ph, &mut visited, &mut ph_index, &input.type_tokens) {
245            Ok(ty) => types.push(ty),
246            Err(e) => error.push(e),
247        }
248    }
249
250    for (visited, ty) in visited.iter().zip(&input.type_tokens) {
251        if !*visited {
252            error.with_spanned(ty, "unused type"); // checked in tests/fail/missing_placeholder.rs
253        }
254    }
255
256    error.ok_or_build()?;
257
258    let regex_parts = RegexParts::new(&format, &types)?;
259
260    let regex = regex_parts.regex();
261    let num_captures = regex_parts.num_captures();
262    let regex = quote! { ::sscanf::lazy_static::lazy_static! {
263        static ref REGEX: ::sscanf::regex::Regex = {
264            let regex_str = #regex;
265            let regex = ::sscanf::regex::Regex::new(regex_str)
266                .expect("sscanf: Cannot generate Regex");
267
268            const NUM_CAPTURES: ::std::primitive::usize = #num_captures;
269
270            if regex.captures_len() != NUM_CAPTURES {
271                panic!(
272                    "sscanf: Regex has {} capture groups, but {} were expected.{}",
273                    regex.captures_len(), NUM_CAPTURES, ::sscanf::errors::WRONG_CAPTURES_HINT
274                );
275            }
276            regex
277        };
278    }};
279
280    Ok((regex, regex_parts.matchers))
281}