scraper_macros/
lib.rs

1//! The macro which converts a struct or tuple into one which is able to be scraped easily.
2//!
3//! An example of this would be here:
4//! ```rust
5//! #[derive(Scraper)]
6//! pub struct RedditListItem {
7//!     #[scrape(xpath = r#"//a[@data-click-id="body"]/@href"#)]
8//!     pub urls: Vec<String>
9//! }
10//! ```
11
12#[macro_use] extern crate syn;
13#[macro_use] extern crate quote;
14
15use proc_macro::TokenStream;
16use quote::__private::Span;
17use symbol::Symbol;
18use syn::{Attribute, Data, DeriveInput, ExprAssign, Fields, Meta, NestedMeta, spanned::Spanned, Path, __private::TokenStream2, Result};
19
20mod symbol;
21
22
23// https://doc.rust-lang.org/reference/procedural-macros.html
24
25/// The macro which converts a struct or tuple into one which is able to be scraped easily.
26///
27/// An example of this would be here:
28/// ```rust
29/// #[derive(Scraper)]
30/// pub struct RedditListItem {
31///     #[scrape(xpath = r#"//a[@data-click-id="body"]/@href"#)]
32///     pub urls: Vec<String>
33/// }
34/// ```
35#[proc_macro_derive(Scraper, attributes(scrape))]
36pub fn derive_scraper(input: TokenStream) -> TokenStream {
37	let mut input = parse_macro_input!(input as DeriveInput);
38
39	let body = define_body(&mut input.data);
40
41	let name = input.ident;
42
43	TokenStream::from(quote! {
44		impl ::scraper_main::ScraperMain for #name {
45			fn scrape(doc: &::scraper_main::Document, container: Option<&::scraper_main::Node>) -> ::scraper_main::Result<Self> {
46				fn wrap_values<T>(name: &'static str, value: ::scraper_main::Result<T>) -> ::scraper_main::Result<T> {
47					match value {
48						Ok(v) => Ok(v),
49						Err(v) => {
50							Err(::scraper_main::Error::FieldValueError(name, Box::new(v)))
51						}
52					}
53				}
54
55				Ok(#body)
56			}
57		}
58	})
59}
60
61fn define_body(data: &mut Data) -> TokenStream2 {
62	match data {
63		Data::Struct(s) => {
64			define_fields(&mut s.fields)
65		}
66
67		Data::Enum(_) => unimplemented!("Enum"),
68		Data::Union(_) => unimplemented!("Union"),
69	}
70}
71
72fn define_fields(field_types: &mut Fields) -> TokenStream2 {
73	match field_types {
74		Fields::Named(fields) => {
75			let recurse = fields.named.iter().map(|field| {
76				let name = field.ident.as_ref().unwrap();
77
78				let scrape = ScrapeField::new(field.span(), &field.attrs);
79
80				let eval = scrape.generate_evaluation(name.to_string())
81					.unwrap_or_else(syn::Error::into_compile_error);
82
83				quote! {
84					#name: #eval
85				}
86			}).collect::<Vec<_>>();
87
88			quote! {
89				Self {
90					#(#recurse),*
91				}
92			}
93		}
94
95		Fields::Unnamed(fields) => {
96			let recurse = fields.unnamed.iter()
97				.enumerate()
98				.map(|(index, field)|
99					ScrapeField::new(field.span(), &field.attrs)
100					.generate_evaluation(index.to_string())
101					.unwrap_or_else(syn::Error::into_compile_error)
102				)
103				.collect::<Vec<_>>();
104
105			quote! {
106				Self(
107					#(#recurse),*
108				)
109			}
110		}
111
112		Fields::Unit => unimplemented!("Unimplemented Field")
113	}
114}
115
116
117struct ScrapeField {
118	span: Span,
119
120	is_default: bool,
121
122	xpath: Option<String>,
123	transform_fn: Option<String>
124}
125
126impl ScrapeField {
127	pub fn new(span: Span, attributes: &[Attribute]) -> Self {
128		Self {
129			span,
130			is_default: does_attribute_exist(symbol::DEFAULT, attributes),
131			xpath: get_scrape_attr_value(symbol::XPATH, attributes),
132			transform_fn: get_scrape_attr_value(symbol::TRANSFORM, attributes)
133		}
134	}
135
136	pub fn generate_evaluation(self, field_name: String) -> Result<TokenStream2> {
137		if self.is_default {
138			Ok(quote! {
139				Default::default()
140			})
141		} else {
142			let span = self.span;
143
144			let Some(xpath) = self.xpath else {
145				return Err(syn::Error::new(span, "Expected #[scrape(TYPE_OF)] eg. xpath = \"\""));
146			};
147
148			if let Some(transform_fn) = self.transform_fn {
149				let transform_ident = format_ident!("{}", transform_fn);
150				// TODO: I don't even know if I'm using span correctly.
151				Ok(quote_spanned! {span=>
152					#transform_ident(wrap_values(#field_name, ::scraper_main::evaluate(#xpath, doc, container).convert_from(doc))?)
153				})
154			} else {
155				Ok(quote_spanned! {span=>
156					wrap_values(#field_name, ::scraper_main::evaluate(#xpath, doc, container).convert_from(doc))?
157				})
158			}
159		}
160	}
161}
162
163
164fn get_scrape_attr_value(attr_name: Symbol, attributes: &[Attribute]) -> Option<String> {
165	for attr in attributes {
166		if attr.path == symbol::BASE_SCRAPE {
167			let parsed = parse_attr(attr)?;
168
169			if parsed.0 == attr_name {
170				return Some(parsed.1);
171			}
172		}
173	}
174
175	None
176}
177
178fn does_attribute_exist(name: Symbol, attributes: &[Attribute]) -> bool {
179	for attr in attributes {
180		if attr.path == symbol::BASE_SCRAPE {
181			if let Some(parsed) = parse_attr_name(attr) {
182				if parsed == name {
183					return true;
184				}
185			}
186		}
187	}
188
189	false
190}
191
192
193fn parse_attr(attr: &Attribute) -> Option<(Path, String)> {
194	let stream = attr.parse_args::<ExprAssign>().ok()?;
195
196	let left = if let syn::Expr::Path(value) = *stream.left {
197		value
198	} else {
199		return None;
200	};
201
202	let right = if let syn::Expr::Lit(value) = *stream.right {
203		value
204	} else {
205		return None;
206	};
207
208	let right_value = if let syn::Lit::Str(value) = right.lit {
209		value.value()
210	} else {
211		return None;
212	};
213
214	Some((left.path, right_value))
215}
216
217fn parse_attr_name(attr: &Attribute) -> Option<Path> {
218	// TODO: Actually use parse_meta() for all attributes instead of just this one.
219
220	let parse = attr.parse_meta().expect("--------------------------------------------");
221
222	if let Meta::List(val) = parse {
223		let ret = val.nested.into_iter().next();
224
225		if let NestedMeta::Meta(Meta::Path(path)) = ret? {
226			return Some(path);
227		}
228	}
229
230	None
231}