crabler_tokio_derive/
lib.rs1extern crate proc_macro;
2
3use proc_macro::TokenStream;
4use proc_macro2;
5use proc_macro_error::*;
6use quote::quote;
7use syn::{parse_macro_input, DeriveInput};
8
9#[proc_macro_derive(WebScraper, attributes(on_html, on_page, on_response))]
10#[proc_macro_error]
11pub fn web_scraper_derive(input: TokenStream) -> TokenStream {
19 let ast: syn::DeriveInput = parse_macro_input!(input as DeriveInput);
20
21 match ast.data {
22 syn::Data::Struct(syn::DataStruct { .. }) => impl_web_scraper(&ast),
23 _ => abort_call_site!("#[WebScraper] only supports structs"),
24 }
25}
26
27fn impl_web_scraper(ast: &syn::DeriveInput) -> TokenStream {
28 use syn::*;
29
30 let name = &ast.ident;
31
32 let mut pages = vec![];
33 let mut selectors = vec![];
34 let mut matches = vec![];
35 let mut responses = vec![];
36
37 for attr in &ast.attrs {
38 let meta = attr.parse_meta();
39
40 match meta {
41 Ok(Meta::List(MetaList { path, nested, .. }))
42 if path.segments[0].ident == "on_page" =>
43 {
44 let page = handle_on_page_attr(nested);
45 pages.push(page);
46 }
47 Ok(Meta::List(MetaList { path, nested, .. }))
48 if path.segments[0].ident == "on_html" =>
49 {
50 let (selector, match_clause) = handle_on_html_attr(nested);
51 selectors.push(selector);
52 matches.push(match_clause);
53 }
54 Ok(Meta::List(MetaList { path, nested, .. }))
55 if path.segments[0].ident == "on_response" =>
56 {
57 let response = handle_on_response_attr(nested);
58 responses.push(response);
59 }
60 Err(err) => {
61 abort_call_site!("Failed to parse attribute: {}", err);
62 }
63 _ => {
64 abort_call_site!("Unsupported arguments on attribute");
65 }
66 }
67 }
68
69 let gen = quote! {
70 #[async_trait(?Send)]
71 impl WebScraper for #name {
72 async fn dispatch_on_page(
73 &mut self,
74 page: String,
75 ) -> std::result::Result<(), CrablerError> {
76 #( #pages; )*
77
78 Ok(())
79 }
80
81 async fn dispatch_on_html(
82 &mut self,
83 selector: &str,
84 request: Response,
85 element: Element,
86 ) -> std::result::Result<(), CrablerError> {
87
88 match selector {
89 #( #matches, )*
90 _ => panic!("Failed to dispatch {}", selector),
91 };
92
93 Ok(())
94 }
95
96 fn all_html_selectors(&self) -> Vec<&str> {
97 vec![#( #selectors ),*]
98 }
99
100 async fn dispatch_on_response(
101 &mut self,
102 request: Response,
103 ) -> std::result::Result<(), CrablerError> {
104 #( #responses; )*
105
106 Ok(())
107 }
108
109 async fn run(
110 self,
111 opts: Opts,
112 ) -> std::result::Result<(), CrablerError> {
113 use crabler_tokio::Crabler;
114
115 let mut crabler = Crabler::new(self, &opts);
116
117 for url in &opts.urls {
118 crabler.navigate(url).await?;
119 }
120
121 for _ in 0..opts.threads {
122 crabler.start_worker();
123 }
124
125 crabler.run().await
126 }
127 }
128 };
129
130 gen.into()
131}
132
133fn handle_on_page_attr(
134 nested: syn::punctuated::Punctuated<syn::NestedMeta, syn::token::Comma>,
135) -> proc_macro2::TokenStream {
136 use syn::*;
137
138 let l = nested.len();
139 if l < 1 {
140 abort_call_site!("Not enough arguments provided to on_page attribute: {}", l);
141 }
142
143 let f = match &nested[0] {
144 NestedMeta::Meta(Meta::Path(Path { segments, .. })) => &segments[0].ident,
145 _ => abort_call_site!("Can't find on_page method"),
146 };
147
148 quote! { self.#f(page).await? }
149}
150
151fn handle_on_html_attr(
152 nested: syn::punctuated::Punctuated<syn::NestedMeta, syn::token::Comma>,
153) -> (proc_macro2::TokenStream, proc_macro2::TokenStream) {
154 use syn::*;
155
156 let l = nested.len();
157 if l < 2 {
158 abort_call_site!("Not enough arguments provided to on_html attribute: {}", l);
159 }
160
161 let token = match &nested[0] {
162 NestedMeta::Lit(Lit::Str(lit_str)) => lit_str,
163 _ => abort_call_site!("Can't find on_html selector"),
164 };
165
166 let f = match &nested[1] {
167 NestedMeta::Meta(Meta::Path(Path { segments, .. })) => &segments[0].ident,
168 _ => abort_call_site!("Can't find on_html method"),
169 };
170
171 let selector = quote! { #token };
172 let match_clause = quote! { #token => self.#f(request, element).await? };
173
174 (selector, match_clause)
175}
176
177fn handle_on_response_attr(
178 nested: syn::punctuated::Punctuated<syn::NestedMeta, syn::token::Comma>,
179) -> proc_macro2::TokenStream {
180 use syn::*;
181
182 let l = nested.len();
183 if l < 1 {
184 abort_call_site!(
185 "Not enough arguments provided to on_response attribute: {}",
186 l
187 );
188 }
189
190 let f = match &nested[0] {
191 NestedMeta::Meta(Meta::Path(Path { segments, .. })) => &segments[0].ident,
192 _ => abort_call_site!("Can't find on_response method"),
193 };
194
195 quote! { self.#f(request).await? }
196}