progscrape_scrapers/types/
id.rs

1use serde::{Deserialize, Serialize};
2use std::{
3    fmt::{Debug, Display},
4    marker::PhantomData,
5};
6
7use crate::{StoryUrl, backends::ScrapeSource};
8
9/// Identify a scrape by source an ID.
10#[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
11pub struct ScrapeId {
12    pub source: ScrapeSource,
13    pub subsource: Option<String>,
14    pub id: String,
15    _noinit: PhantomData<()>,
16}
17
18impl ScrapeId {
19    pub fn new(source: ScrapeSource, subsource: Option<String>, id: String) -> Self {
20        Self {
21            source,
22            subsource,
23            id,
24            _noinit: Default::default(),
25        }
26    }
27
28    /// Given a URL, determines if that URL would make this story a self-post. The current heuristic for
29    /// this is whether the url's host looks like a comments host, and the url itself contains the scrape's
30    /// ID. The latter heuristic isn't perfect, but the failure modes are pretty harmless.
31    pub fn is_likely_self_post(&self, url: &StoryUrl) -> bool {
32        self.source.is_comments_host(url.host()) && url.raw().contains(&self.id)
33    }
34
35    /// Generate a comments URL for this scrape.
36    pub fn comments_url(&self) -> String {
37        self.source
38            .comments_url(&self.id, self.subsource.as_deref())
39    }
40
41    pub fn from_string<S: AsRef<str>>(s: S) -> Option<Self> {
42        if let Some((head, rest)) = s.as_ref().split_once('-') {
43            if let Some(source) = ScrapeSource::try_from_str(head) {
44                if let Some((subsource, id)) = rest.split_once('-') {
45                    Some(source.subsource_id(subsource, id))
46                } else {
47                    Some(source.id(rest))
48                }
49            } else {
50                None
51            }
52        } else {
53            None
54        }
55    }
56}
57
58impl Display for ScrapeId {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        if let Some(subsource) = &self.subsource {
61            f.write_fmt(format_args!(
62                "{}-{}-{}",
63                self.source.into_str(),
64                subsource,
65                self.id
66            ))
67        } else {
68            f.write_fmt(format_args!("{}-{}", self.source.into_str(), self.id))
69        }
70    }
71}
72
73impl Debug for ScrapeId {
74    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75        <Self as Display>::fmt(self, f)
76    }
77}
78
79impl Serialize for ScrapeId {
80    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
81    where
82        S: serde::Serializer,
83    {
84        if let Some(subsource) = &self.subsource {
85            format!("{}-{}-{}", self.source.into_str(), subsource, self.id)
86        } else {
87            format!("{}-{}", self.source.into_str(), self.id)
88        }
89        .serialize(serializer)
90    }
91}
92
93impl<'de> Deserialize<'de> for ScrapeId {
94    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
95    where
96        D: serde::Deserializer<'de>,
97    {
98        let s = String::deserialize(deserializer)?;
99        if let Some((head, rest)) = s.split_once('-') {
100            let source = ScrapeSource::try_from_str(head)
101                .ok_or(serde::de::Error::custom("Invalid source"))?;
102            if let Some((subsource, id)) = rest.split_once('-') {
103                Ok(source.subsource_id(subsource, id))
104            } else {
105                Ok(source.id(rest))
106            }
107        } else {
108            Err(serde::de::Error::custom("Invalid format"))
109        }
110    }
111}