progscrape_scrapers/backends/
def.rs1use crate::ScrapeId;
2
3use super::*;
4
5pub trait ScrapeSourceDef {
7 type Config: ScrapeConfigSource;
8 type Scrape: ScrapeStory;
9 type Scraper: Scraper<Config = Self::Config, Output = Self::Scrape>;
10
11 fn comments_url(id: &str, subsource: Option<&str>) -> String;
12 fn id_from_comments_url(url: &str) -> Option<(&str, Option<&str>)>;
13 fn is_comments_host(host: &str) -> bool;
14}
15
16pub trait ScrapeStory {
17 const TYPE: ScrapeSource;
18
19 fn merge(&mut self, other: Self);
20}
21
22pub trait Scraper: Default {
23 type Config: ScrapeConfigSource;
24 type Output: ScrapeStory;
25
26 fn scrape(
28 &self,
29 args: &Self::Config,
30 input: &str,
31 ) -> Result<(Vec<GenericScrape<Self::Output>>, Vec<String>), ScrapeError>;
32
33 fn extract_core<'a>(
35 &self,
36 args: &Self::Config,
37 input: &'a GenericScrape<Self::Output>,
38 ) -> ScrapeCore<'a>;
39}
40
41pub trait ScrapeConfigSource {
42 fn subsources(&self) -> Vec<String>;
43 fn provide_urls(&self, subsources: Vec<String>) -> Vec<String>;
44}
45
46#[derive(Clone, Debug)]
47pub struct ScrapeCore<'a> {
48 pub source: &'a ScrapeId,
50
51 pub title: Cow<'a, str>,
53
54 pub url: &'a StoryUrl,
56
57 pub date: StoryDate,
59
60 pub tags: Vec<Cow<'a, str>>,
62
63 pub rank: Option<usize>,
65}
66
67#[derive(Clone, Debug, Serialize, Deserialize)]
68pub struct ScrapeShared {
69 pub id: ScrapeId,
70 pub url: StoryUrl,
71 pub raw_title: String,
72 pub date: StoryDate,
73}
74
75#[derive(Clone, Debug, Serialize, Deserialize)]
76pub struct GenericScrape<T: ScrapeStory> {
77 #[serde(flatten)]
78 pub shared: ScrapeShared,
79 #[serde(flatten)]
80 pub data: T,
81}
82
83impl<T: ScrapeStory> std::ops::Deref for GenericScrape<T> {
84 type Target = ScrapeShared;
85 fn deref(&self) -> &Self::Target {
86 &self.shared
87 }
88}
89
90impl<T: ScrapeStory> std::ops::DerefMut for GenericScrape<T> {
91 fn deref_mut(&mut self) -> &mut Self::Target {
92 &mut self.shared
93 }
94}
95
96impl<T: ScrapeStory> GenericScrape<T> {
97 pub fn merge_generic(&mut self, _other: Self) {}
98}
99
100macro_rules! scrape_story {
101 ( $name:ident { $( $id:ident : $type:ty ),* $(,)? } ) => {
102 #[derive(Serialize, Deserialize, Clone, Debug, Default)]
103 pub struct $name {
104 $( pub $id : $type ),*
105 }
106
107 impl $name {
108 #[allow(clippy::too_many_arguments)]
109 pub fn new<'a, S: Clone + Into<Cow<'a, str>>>(id: S, date: StoryDate, raw_title: S, url: StoryUrl, $( $id: $type ),*) -> GenericScrape<$name> {
110 GenericScrape {
111 shared: ScrapeShared {
112 id: ScrapeId::new(<$name as ScrapeStory>::TYPE, None, id.into().into()), date, raw_title: raw_title.into().into(), url
113 },
114 data: $name {
115 $($id),*
116 }
117 }
118 }
119
120 #[allow(clippy::too_many_arguments)]
121 pub fn new_subsource<'a, S: Clone + Into<Cow<'a, str>>>(id: S, subsource: S, date: StoryDate, raw_title: S, url: StoryUrl, $( $id: $type ),*) -> GenericScrape<$name> {
122 GenericScrape {
123 shared: ScrapeShared {
124 id: ScrapeId::new(<$name as ScrapeStory>::TYPE, Some(subsource.into().into()), id.into().into()), date, raw_title: raw_title.into().into(), url
125 },
126 data: $name {
127 $($id),*
128 }
129 }
130 }
131
132 #[allow(clippy::too_many_arguments)]
133 pub fn new_with_defaults<'a, S: Clone + Into<Cow<'a, str>>>(id: S, date: StoryDate, raw_title: S, url: StoryUrl) -> GenericScrape<$name> {
134 GenericScrape {
135 shared: ScrapeShared {
136 id: ScrapeId::new(<$name as ScrapeStory>::TYPE, None, id.into().into()), date, raw_title: raw_title.into().into(), url
137 },
138 data: $name {
139 $($id : Default::default() ),*
140 }
141 }
142 }
143
144 #[allow(clippy::too_many_arguments)]
145 pub fn new_subsource_with_defaults<'a, S: Clone + Into<Cow<'a, str>>>(id: S, subsource: S, date: StoryDate, raw_title: S, url: StoryUrl) -> GenericScrape<$name> {
146 GenericScrape {
147 shared: ScrapeShared {
148 id: ScrapeId::new(<$name as ScrapeStory>::TYPE, Some(subsource.into().into()), id.into().into()), date, raw_title: raw_title.into().into(), url
149 },
150 data: $name {
151 $($id : Default::default() ),*
152 }
153 }
154 }
155 }
156 };
157}
158
159pub(crate) use scrape_story;