1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
#[allow(unused_imports)] use crate::internal_prelude::*; use crate::types as rt; pub type Result = rt::ExtResult<()>; pub trait Expander<JS: rt::JobStateValues, TS: rt::TaskStateValues> { fn name(&self) -> String { String::from("no name") } fn expand( &self, ctx: &mut rt::JobCtx<JS, TS>, task: &rt::Task, status: &rt::HttpStatus, document: &select::document::Document, ) -> Result; } pub struct FollowLinks { link_target: rt::LinkTarget, } impl<JS: rt::JobStateValues, TS: rt::TaskStateValues> Expander<JS, TS> for FollowLinks { name! {} fn expand( &self, ctx: &mut rt::JobCtx<JS, TS>, task: &rt::Task, _status: &rt::HttpStatus, document: &select::document::Document, ) -> Result { let links: Vec<rt::Link> = document .find(select::predicate::Name("a")) .filter_map(|n| { rt::Link::new( n.attr("href").unwrap_or(""), n.attr("rel").unwrap_or(""), n.attr("alt").unwrap_or(""), &n.text(), 0, self.link_target, &task.link, ) .ok() }) .collect(); ctx.push_links(links); Ok(()) } } impl FollowLinks { struct_name! {} pub fn new(link_target: rt::LinkTarget) -> Self { Self { link_target } } } pub struct LoadImages { link_target: rt::LinkTarget, } impl<JS: rt::JobStateValues, TS: rt::TaskStateValues> Expander<JS, TS> for LoadImages { name! {} fn expand( &self, ctx: &mut rt::JobCtx<JS, TS>, task: &rt::Task, _status: &rt::HttpStatus, document: &select::document::Document, ) -> Result { let links: Vec<rt::Link> = document .find(select::predicate::Name("img")) .filter_map(|n| { rt::Link::new( n.attr("src").unwrap_or(""), n.attr("rel").unwrap_or(""), n.attr("alt").unwrap_or(""), &n.text(), 0, self.link_target, &task.link, ) .ok() }) .collect(); ctx.push_links(links); Ok(()) } } impl LoadImages { struct_name! {} pub fn new(link_target: rt::LinkTarget) -> Self { Self { link_target } } }