1use reqwest::blocking;
2use scraper::{ElementRef, Html, Selector};
3
4use std::{fmt, future::Future};
5
6#[derive(Debug)]
7pub struct Preview {
8 pub url: String,
9 pub document: Html,
10}
11
12#[derive(Debug)]
13pub struct PreviewResponse {
14 pub description: Option<String>,
15 pub title: Option<String>,
16 pub url: Option<String>,
17 pub name: Option<String>,
18 pub image: Option<String>,
19}
20
21impl fmt::Display for PreviewResponse {
22 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
23 writeln!(
24 f,
25 "\nUrl >> {}\nName >> {}\nTitle >> {}\nDescription >> {}\nImage >> {}",
26 self.url
27 .as_ref()
28 .unwrap_or(&"Url not Avaliable".to_string()),
29 self.name
30 .as_ref()
31 .unwrap_or(&"Name not Avaliable".to_string()),
32 self.title
33 .as_ref()
34 .unwrap_or(&"Title not Avaliable".to_string()),
35 self.description
36 .as_ref()
37 .unwrap_or(&"Description not Avaliable".to_string()),
38 self.image
39 .as_ref()
40 .unwrap_or(&"Image not Avaliable".to_string())
41 )
42 }
43}
44
45impl Preview {
47 pub fn new(url: &str) -> Preview {
48 let document = Html::parse_document(&blocking::get(url).unwrap().text().unwrap());
49
50 Preview {
51 url: url.to_owned(),
52 document,
53 }
54 }
55
56 pub async fn async_new(url: &str) -> Preview {
57 let document =
58 Html::parse_document(&reqwest::get(url).await.unwrap().text().await.unwrap());
59
60 Preview {
61 url: url.to_owned(),
62 document,
63 }
64 }
65
66 pub async fn async_fetch_preview(&self) -> Result<PreviewResponse, ()> {
67 Ok(self.fetch_preview())
68 }
69
70 pub fn fetch_preview(&self) -> PreviewResponse {
72 let site_description = self.extract_description();
73 let site_title = self.extract_title();
74 let site_name = self.extract_site_name();
75 let site_image = self.extract_image();
76 let site_url = self.extract_site_url(&self.url);
77
78 PreviewResponse {
79 description: site_description,
80 image: site_image,
81 name: site_name,
82 url: site_url,
83 title: site_title,
84 }
85 }
86
87 pub(crate) fn extract_description(&self) -> Option<String> {
88 let og_description =
89 self.extract_from_tag(&self.document, "meta", "property", "og:description");
90
91 if og_description.is_none() {
92 let meta_description =
93 self.extract_from_tag(&self.document, "meta", "name", "description");
94 if meta_description.is_none() {
95 return None;
96 }
97 return Some(
98 meta_description
99 .unwrap()
100 .value()
101 .attr("content")
102 .unwrap()
103 .to_owned(),
104 );
105 }
106 return Some(
107 og_description
108 .unwrap()
109 .value()
110 .attr("content")
111 .unwrap()
112 .to_owned(),
113 );
114 }
115
116 pub(crate) fn extract_title(&self) -> Option<String> {
117 let og_title = match self.extract_from_tag(&self.document, "meta", "property", "og:title") {
118 Some(title) => title.value().attr("content").unwrap(),
119 None => {
120 let meta_title = self.extract_from_tag(&self.document, "meta", "name", "title");
121 if meta_title.is_none() {
122 let tag_title = self.extract_from_element(&self.document, "title");
123 if tag_title.is_none() {
124 return None;
125 }
126 return Some(tag_title.unwrap().inner_html());
127 }
128 return Some(
129 meta_title
130 .unwrap()
131 .value()
132 .attr("content")
133 .unwrap()
134 .to_owned(),
135 );
136 }
137 };
138 Some(og_title.to_owned())
139 }
140
141 pub(crate) fn extract_site_name(&self) -> Option<String> {
142 let og_site_name =
143 match self.extract_from_tag(&self.document, "meta", "property", "og:site_name") {
144 Some(site_name) => site_name.value().attr("content").unwrap(),
145 None => {
146 let meta_site_name =
147 self.extract_from_tag(&self.document, "meta", "name", "title");
148 if meta_site_name.is_none() {
149 let tag_title = self.extract_from_element(&self.document, "title");
150 if tag_title.is_none() {
151 return None;
152 }
153 return Some(tag_title.unwrap().inner_html());
154 };
155 return Some(
156 meta_site_name
157 .unwrap()
158 .value()
159 .attr("content")
160 .unwrap()
161 .to_owned(),
162 );
163 }
164 };
165 Some(og_site_name.to_owned())
166 }
167
168 pub(crate) fn extract_image(&self) -> Option<String> {
169 let og_image = match self.extract_from_tag(&self.document, "meta", "property", "og:image") {
170 Some(img) => img.value().attr("content"),
171 None => {
172 let meta_image = self.extract_from_tag(&self.document, "link", "rel", "image_src");
173 if meta_image.is_none() {
174 return None;
175 }
176 return Some(
177 meta_image
178 .unwrap()
179 .value()
180 .attr("content")
181 .unwrap()
182 .to_owned(),
183 );
184 }
185 };
186 Some(og_image.unwrap().to_owned())
187 }
188
189 pub(crate) fn extract_site_url(&self, link: &str) -> Option<String> {
190 let og_site_url = match self.extract_from_tag(&self.document, "meta", "property", "og:url")
191 {
192 Some(og_url) => og_url.value().attr("content"),
193 None => {
194 let meta_site_url =
195 match self.extract_from_tag(&self.document, "link", "rel", "canonical") {
196 Some(meta_url) => meta_url.value().attr("content"),
197 None => {
198 return Some(link.to_owned());
199 }
200 };
201 return Some(meta_site_url.unwrap().to_owned());
202 }
203 };
204 Some(og_site_url.unwrap().to_owned())
205 }
206
207 pub(crate) fn extract_from_tag<'a>(
208 &self,
209 document: &'a Html,
210 element_name: &'a str,
211 attribute: &'a str,
212 attribute_name: &'a str,
213 ) -> Option<ElementRef<'a>> {
214 let formtted_attr = format!("{}[{}='{}']", element_name, attribute, attribute_name);
215 let selector = Selector::parse(&&formtted_attr).unwrap();
216 let result = document.select(&selector).next();
217 return result;
218 }
219
220 pub(crate) fn extract_from_element<'a>(
221 &self,
222 document: &'a Html,
223 element: &'a str,
224 ) -> Option<ElementRef<'a>> {
225 let selector = Selector::parse(element).unwrap();
226 let val = document.select(&selector).next();
227 return val;
228 }
229}