scrapelect_filter_proc_macro/
lib.rs

1use proc_macro::{Span, TokenStream};
2use quote::{quote, ToTokens};
3use syn::{
4    punctuated::Punctuated, Data, DeriveInput, GenericParam, Lifetime, LifetimeParam, Pat, PatIdent,
5};
6
7/// Procedural macro to derive [`scrapelect::interpreter::filter::Args`] on a structure.
8///
9/// If you need to use the value lifetime, use `'doc`, otherwise the generator will get confused.
10///
11/// # Panics
12/// `#[derive(Args)]` must be called on a *valid* structure with named fields (not a tuple struct).
13/// If not, it will panic and fail.
14#[proc_macro_derive(Args)]
15pub fn derive_args(input: TokenStream) -> TokenStream {
16    let ast = syn::parse(input).expect("token stream should be valid");
17
18    derive_args_impl(&ast)
19}
20
21fn derive_args_impl(ast: &DeriveInput) -> TokenStream {
22    let name = &ast.ident;
23
24    // add 'doc if it is not already present
25    let generics = &ast.generics;
26    let (_, ty_generics, where_clause) = generics.split_for_impl();
27    let mut added_generics = generics.clone();
28    if !generics.params.iter().any(|x| match x {
29        GenericParam::Lifetime(lt) => lt.lifetime.ident == "doc",
30        _ => false,
31    }) {
32        added_generics
33            .params
34            .push(GenericParam::Lifetime(LifetimeParam {
35                attrs: vec![],
36                bounds: Punctuated::new(),
37                colon_token: None,
38                lifetime: Lifetime::new("'doc", Span::call_site().into()),
39            }));
40    }
41
42    let (impl_generics, _, _) = added_generics.split_for_impl();
43
44    let Data::Struct(s) = &ast.data else {
45        return quote! {
46            compile_error!("#[derive(Args)] on a non-struct is not supported.");
47        }
48        .into();
49    };
50
51    let field = s.fields.iter().map(|x| {
52        if let Some(id) = &x.ident {
53            id
54        } else {
55            panic!("#[derive(Args)] not supported on a tuple struct")
56        }
57    });
58
59    let field_extract = field
60        .clone()
61        .filter(|x| !x.to_string().starts_with("_marker"));
62
63    let field_assign = field.clone().map(|x| {
64        if x.to_string().starts_with("_marker") {
65            quote! { #x: Default::default() }
66        } else {
67            quote! { #x }
68        }
69    });
70
71    quote! {
72        impl #impl_generics crate::interpreter::filter::Args<'doc> for #name #ty_generics #where_clause {
73            fn try_deserialize<'ast>(
74                mut args: ::std::collections::BTreeMap<&'ast str, crate::interpreter::value::EValue<'doc>>
75            ) -> anyhow::Result<Self> {
76                #(
77                    let #field_extract = crate::interpreter::value::TryFromValue::try_from_option(args.remove(stringify!(#field_extract)))?;
78                )*
79
80                if !args.is_empty() {
81                    anyhow::bail!("Found unexpected arguments {args:?}");
82                }
83
84                Ok(Self {
85                    #(#field_assign),*
86                })
87            }
88        }
89    }
90    .into()
91}
92
93/// Procedural macro that makes a function that generates a stateless `impl Filter`
94/// filter using the function body as the method for `apply`.
95///
96/// Parameter conversion: We generate an `impl Args<'doc>` struct using the parameters
97/// to the function.  The value/ctx lifetime must be `'doc` for compatibility with
98/// `#[derive(Args)]`.  Here is how the arguments are converted
99///
100/// - `value: T` - this must be present, and `T` must be `TryFromValue<'doc>`
101/// - `ctx: ElementContext<'_, 'doc>`: this is optionally present.  If it is present, it must have the given type.
102/// - `...other_arg: T`: For all other args, they will be put in `Self::Args`, and `T` must be `TryFromValue<'doc>`.
103///
104/// Note that patterns are not supported beyond `(mut)? x: T`
105///
106/// The return type must be `crate::interpreter::Result<Value<'doc>>`
107///
108/// # Panics
109/// Panics if the token stream is not valid or the function signature is not as specified.
110#[proc_macro_attribute]
111pub fn filter_fn(_attr: TokenStream, item: TokenStream) -> TokenStream {
112    let func: syn::ItemFn = syn::parse(item).expect("token stream should be valid");
113    let inner = func.clone();
114    let name = func.sig.ident;
115    let vis = func.vis;
116
117    let (value, args) = func
118        .sig
119        .inputs
120        .into_iter()
121        .map(|arg| match arg {
122            syn::FnArg::Receiver(_) => panic!("Calling #[filter_fn] on a method"),
123            syn::FnArg::Typed(x) => match *x.pat {
124                Pat::Ident(PatIdent {
125                    ident,
126                    subpat: None,
127                    ..
128                }) => (ident, x.ty),
129                other => panic!("I don't know what to do with pattern {other:?}"),
130            },
131        })
132        .partition::<Vec<_>, _>(|(ident, _)| ident == "value");
133    let (ctx, args) = args
134        .into_iter()
135        .partition::<Vec<_>, _>(|(ident, _)| ident == "ctx");
136
137    let [(value, vty)]: [_; 1] = value.try_into().expect("expected exactly 1 value arg");
138
139    let arg = args.iter().map(|(id, _)| id);
140    let ty = args.iter().map(|(_, ty)| ty);
141
142    let (ctx, _cty) = if let Some(x) = ctx.into_iter().next() {
143        (Some(x.0), Some(x.1))
144    } else {
145        (None, None)
146    };
147
148    let call_args = std::iter::once(value.clone().into_token_stream())
149        .chain(arg.clone().map(|arg| quote! {args.#arg}))
150        .chain(ctx.clone().into_iter().map(|x| quote! {#x }));
151
152    quote! {
153        #[doc(cfg(feature = "filter_doc"))]
154        #[cfg(any(doc, feature = "filter_doc"))]
155        #inner
156
157        #[cfg(not(any(doc, feature = "filter_doc")))]
158        #vis fn #name() -> impl crate::interpreter::filter::Filter {
159            #[derive(Debug, crate::interpreter::filter::Args)]
160            pub struct Args<'doc> {
161                _marker: core::marker::PhantomData<&'doc ()>,
162                #(#arg: #ty),*
163            }
164
165            #[derive(Debug)]
166            pub struct Filter;
167
168            impl crate::interpreter::filter::Filter for Filter {
169                type Args<'doc> = Args<'doc>;
170                type Value<'doc> = #vty;
171
172                fn apply<'ctx>(
173                    #value: Self::Value<'ctx>,
174                    args: Self::Args<'ctx>,
175                    #[allow(unused)]
176                    ctx: &mut crate::interpreter::ElementContext<'_, 'ctx>
177                ) -> anyhow::Result<crate::interpreter::value::PValue<'ctx>> {
178                    // we can't elide the 'doc lifetime here because it needs to
179                    // also be in the struct, unless we make a smarter macro
180                    // (i.e., lifetime-aware)
181                    #[allow(clippy::needless_lifetimes, clippy::needless_pass_by_value, clippy::unnecessary_wraps)]
182                    #inner
183
184                    #name (#(#call_args),*)
185                }
186            }
187
188            Filter
189        }
190    }
191    .into()
192}