progscrape_scrapers/
scrapers.rs1use std::collections::HashMap;
3
4use serde::Serialize;
5
6use crate::{ScrapeConfig, ScrapeSource, TypedScrape, backends::scrape};
7
8#[derive(Serialize)]
10pub struct ScraperPossibilities {
11 pub scrapes: HashMap<ScrapeSource, Vec<String>>,
12}
13
14#[derive(Serialize)]
15pub enum ScraperHttpResponseInput {
16 HTTPError(u16, String),
17 Ok(String),
18}
19
20#[derive(Serialize)]
21pub enum ScraperHttpResult {
22 Err(ScraperHttpResponseInput, String),
23 Ok(String, Vec<TypedScrape>),
24}
25
26pub struct Scrapers {
27 config: ScrapeConfig,
28}
29
30impl Scrapers {
32 pub fn new(config: &ScrapeConfig) -> Self {
33 Self {
34 config: config.clone(),
35 }
36 }
37
38 pub fn compute_scrape_possibilities(&self) -> ScraperPossibilities {
40 let mut scrapes = HashMap::new();
41 for source in ScrapeSource::all() {
42 if let Some(config) = self.config.get(*source) {
43 let subsources = config.subsources();
44 scrapes.insert(*source, subsources);
45 }
46 }
47 ScraperPossibilities { scrapes }
48 }
49
50 pub fn compute_scrape_subsources(&self, source: ScrapeSource) -> Vec<String> {
52 if let Some(config) = self.config.get(source) {
53 let subsources = config.subsources();
54 return subsources;
55 }
56 vec![]
57 }
58
59 pub fn compute_scrape_url_demands(
61 &self,
62 source: ScrapeSource,
63 subsources: Vec<String>,
64 ) -> Vec<String> {
65 if let Some(scrape) = self.config.get(source) {
66 scrape.provide_urls(subsources)
67 } else {
68 vec![]
69 }
70 }
71
72 pub fn scrape_http_result(
74 &self,
75 source: ScrapeSource,
76 input: ScraperHttpResponseInput,
77 ) -> ScraperHttpResult {
78 match input {
79 ScraperHttpResponseInput::Ok(s) => match scrape(&self.config, source, &s) {
80 Ok((scrapes, _warnings)) => ScraperHttpResult::Ok(s, scrapes),
81 Err(e) => ScraperHttpResult::Err(ScraperHttpResponseInput::Ok(s), format!("{e:?}")),
82 },
83 error @ ScraperHttpResponseInput::HTTPError(..) => {
84 ScraperHttpResult::Err(error, "HTTP Error".to_string())
85 }
86 }
87 }
88}