preview_rs/
lib.rs

1use reqwest::blocking;
2use scraper::{ElementRef, Html, Selector};
3
4use std::{fmt, future::Future};
5
6#[derive(Debug)]
7pub struct Preview {
8    pub url: String,
9    pub document: Html,
10}
11
12#[derive(Debug)]
13pub struct PreviewResponse {
14    pub description: Option<String>,
15    pub title: Option<String>,
16    pub url: Option<String>,
17    pub name: Option<String>,
18    pub image: Option<String>,
19}
20
21impl fmt::Display for PreviewResponse {
22    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
23        writeln!(
24            f,
25            "\nUrl >> {}\nName >> {}\nTitle >> {}\nDescription >> {}\nImage >> {}",
26            self.url
27                .as_ref()
28                .unwrap_or(&"Url not Avaliable".to_string()),
29            self.name
30                .as_ref()
31                .unwrap_or(&"Name not Avaliable".to_string()),
32            self.title
33                .as_ref()
34                .unwrap_or(&"Title not Avaliable".to_string()),
35            self.description
36                .as_ref()
37                .unwrap_or(&"Description not Avaliable".to_string()),
38            self.image
39                .as_ref()
40                .unwrap_or(&"Image not Avaliable".to_string())
41        )
42    }
43}
44
45// implement future for struct PreviewResponse
46impl Preview {
47    pub fn new(url: &str) -> Preview {
48        let document = Html::parse_document(&blocking::get(url).unwrap().text().unwrap());
49
50        Preview {
51            url: url.to_owned(),
52            document,
53        }
54    }
55
56    pub async fn async_new(url: &str) -> Preview {
57        let document =
58            Html::parse_document(&reqwest::get(url).await.unwrap().text().await.unwrap());
59
60        Preview {
61            url: url.to_owned(),
62            document,
63        }
64    }
65
66    pub async fn async_fetch_preview(&self) -> Result<PreviewResponse, ()> {
67        Ok(self.fetch_preview())
68    }
69
70    /// Fetch preview fetches all the supported properties
71    pub fn fetch_preview(&self) -> PreviewResponse {
72        let site_description = self.extract_description();
73        let site_title = self.extract_title();
74        let site_name = self.extract_site_name();
75        let site_image = self.extract_image();
76        let site_url = self.extract_site_url(&self.url);
77
78        PreviewResponse {
79            description: site_description,
80            image: site_image,
81            name: site_name,
82            url: site_url,
83            title: site_title,
84        }
85    }
86
87    pub(crate) fn extract_description(&self) -> Option<String> {
88        let og_description =
89            self.extract_from_tag(&self.document, "meta", "property", "og:description");
90
91        if og_description.is_none() {
92            let meta_description =
93                self.extract_from_tag(&self.document, "meta", "name", "description");
94            if meta_description.is_none() {
95                return None;
96            }
97            return Some(
98                meta_description
99                    .unwrap()
100                    .value()
101                    .attr("content")
102                    .unwrap()
103                    .to_owned(),
104            );
105        }
106        return Some(
107            og_description
108                .unwrap()
109                .value()
110                .attr("content")
111                .unwrap()
112                .to_owned(),
113        );
114    }
115
116    pub(crate) fn extract_title(&self) -> Option<String> {
117        let og_title = match self.extract_from_tag(&self.document, "meta", "property", "og:title") {
118            Some(title) => title.value().attr("content").unwrap(),
119            None => {
120                let meta_title = self.extract_from_tag(&self.document, "meta", "name", "title");
121                if meta_title.is_none() {
122                    let tag_title = self.extract_from_element(&self.document, "title");
123                    if tag_title.is_none() {
124                        return None;
125                    }
126                    return Some(tag_title.unwrap().inner_html());
127                }
128                return Some(
129                    meta_title
130                        .unwrap()
131                        .value()
132                        .attr("content")
133                        .unwrap()
134                        .to_owned(),
135                );
136            }
137        };
138        Some(og_title.to_owned())
139    }
140
141    pub(crate) fn extract_site_name(&self) -> Option<String> {
142        let og_site_name =
143            match self.extract_from_tag(&self.document, "meta", "property", "og:site_name") {
144                Some(site_name) => site_name.value().attr("content").unwrap(),
145                None => {
146                    let meta_site_name =
147                        self.extract_from_tag(&self.document, "meta", "name", "title");
148                    if meta_site_name.is_none() {
149                        let tag_title = self.extract_from_element(&self.document, "title");
150                        if tag_title.is_none() {
151                            return None;
152                        }
153                        return Some(tag_title.unwrap().inner_html());
154                    };
155                    return Some(
156                        meta_site_name
157                            .unwrap()
158                            .value()
159                            .attr("content")
160                            .unwrap()
161                            .to_owned(),
162                    );
163                }
164            };
165        Some(og_site_name.to_owned())
166    }
167
168    pub(crate) fn extract_image(&self) -> Option<String> {
169        let og_image = match self.extract_from_tag(&self.document, "meta", "property", "og:image") {
170            Some(img) => img.value().attr("content"),
171            None => {
172                let meta_image = self.extract_from_tag(&self.document, "link", "rel", "image_src");
173                if meta_image.is_none() {
174                    return None;
175                }
176                return Some(
177                    meta_image
178                        .unwrap()
179                        .value()
180                        .attr("content")
181                        .unwrap()
182                        .to_owned(),
183                );
184            }
185        };
186        Some(og_image.unwrap().to_owned())
187    }
188
189    pub(crate) fn extract_site_url(&self, link: &str) -> Option<String> {
190        let og_site_url = match self.extract_from_tag(&self.document, "meta", "property", "og:url")
191        {
192            Some(og_url) => og_url.value().attr("content"),
193            None => {
194                let meta_site_url =
195                    match self.extract_from_tag(&self.document, "link", "rel", "canonical") {
196                        Some(meta_url) => meta_url.value().attr("content"),
197                        None => {
198                            return Some(link.to_owned());
199                        }
200                    };
201                return Some(meta_site_url.unwrap().to_owned());
202            }
203        };
204        Some(og_site_url.unwrap().to_owned())
205    }
206
207    pub(crate) fn extract_from_tag<'a>(
208        &self,
209        document: &'a Html,
210        element_name: &'a str,
211        attribute: &'a str,
212        attribute_name: &'a str,
213    ) -> Option<ElementRef<'a>> {
214        let formtted_attr = format!("{}[{}='{}']", element_name, attribute, attribute_name);
215        let selector = Selector::parse(&&formtted_attr).unwrap();
216        let result = document.select(&selector).next();
217        return result;
218    }
219
220    pub(crate) fn extract_from_element<'a>(
221        &self,
222        document: &'a Html,
223        element: &'a str,
224    ) -> Option<ElementRef<'a>> {
225        let selector = Selector::parse(element).unwrap();
226        let val = document.select(&selector).next();
227        return val;
228    }
229}