1use std::str::FromStr;
2use std::string::FromUtf8Error;
3
4use scraper::Html;
5use thiserror::Error;
6use url::Url;
7
8#[cfg(feature = "serde")]
9use serde::{Deserialize, Serialize};
10
11use crate::html::{find_link, find_meta_tag, first_inner_html};
12use crate::providers::og::{find_og_tag, OpenGraphTag};
13use crate::providers::schema::{find_schema_tag, SchemaMetaTag};
14use crate::providers::twitter::{find_twitter_tag, TwitterMetaTag};
15
16#[derive(Error, Debug)]
17pub enum Error {
18 #[error("The provided byte slice contains invalid UTF-8 characters")]
19 InvalidUtf8(FromUtf8Error),
20}
21
22#[derive(Clone, Debug)]
24#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25pub struct LinkPreview {
26 pub title: Option<String>,
27 pub description: Option<String>,
28 pub domain: Option<String>,
29 pub image_url: Option<Url>,
30}
31
32impl LinkPreview {
33 pub fn image_url_str(&self) -> Option<String> {
35 if let Some(image_url) = self.image_url.clone() {
36 return Some(image_url.to_string());
37 }
38
39 None
40 }
41
42 pub fn find_first_domain(html: &Html) -> Option<String> {
47 if let Some(domain) = find_link(html, "canonical") {
48 return LinkPreview::domain_from_string(domain);
49 }
50
51 if let Some(domain) = find_og_tag(html, OpenGraphTag::Url) {
52 return LinkPreview::domain_from_string(domain);
53 }
54
55 None
56 }
57
58 fn domain_from_string(value: String) -> Option<String> {
65 let url = Url::parse(&value).ok()?;
66
67 url.domain().map(|domain| domain.to_string())
68 }
69
70 pub fn find_first_image_url(html: &Html) -> Option<Url> {
77 if let Some(image_url) = find_og_tag(html, OpenGraphTag::Image) {
78 return Url::parse(&image_url).ok();
79 }
80
81 if let Some(image_url) = find_link(html, "image_src") {
82 return Url::parse(&image_url).ok();
83 }
84
85 if let Some(image_url) = find_schema_tag(html, SchemaMetaTag::Image) {
86 return Url::parse(&image_url).ok();
87 }
88
89 if let Some(image_url) = find_twitter_tag(html, TwitterMetaTag::Image) {
90 return Url::parse(&image_url).ok();
91 }
92
93 None
94 }
95
96 pub fn find_first_description(html: &Html) -> Option<String> {
104 if let Some(description) = find_og_tag(html, OpenGraphTag::Description) {
105 return Some(description);
106 }
107
108 if let Some(description) = find_twitter_tag(html, TwitterMetaTag::Description) {
109 return Some(description);
110 }
111
112 if let Some(description) = find_schema_tag(html, SchemaMetaTag::Description) {
113 return Some(description);
114 }
115
116 if let Some(description) = find_meta_tag(html, "description") {
117 return Some(description);
118 }
119
120 if let Some(description) = first_inner_html(html, "p") {
121 return Some(description);
122 }
123
124 None
125 }
126
127 pub fn find_first_title(html: &Html) -> Option<String> {
136 if let Some(title) = find_og_tag(html, OpenGraphTag::Title) {
137 return Some(title);
138 }
139
140 if let Some(title) = find_twitter_tag(html, TwitterMetaTag::Title) {
141 return Some(title);
142 }
143
144 if let Some(title) = find_schema_tag(html, SchemaMetaTag::Name) {
145 return Some(title);
146 }
147
148 if let Some(title) = first_inner_html(html, "title") {
149 return Some(title);
150 }
151
152 if let Some(title) = first_inner_html(html, "h1") {
153 return Some(title);
154 }
155
156 if let Some(title) = first_inner_html(html, "h2") {
157 return Some(title);
158 }
159
160 None
161 }
162}
163
164impl From<Html> for LinkPreview {
165 fn from(html: Html) -> Self {
166 let image_url: Option<Url> = LinkPreview::find_first_image_url(&html);
167 let domain = LinkPreview::find_first_domain(&html);
168
169 LinkPreview {
170 title: LinkPreview::find_first_title(&html),
171 description: LinkPreview::find_first_description(&html),
172 domain,
173 image_url,
174 }
175 }
176}
177
178impl From<&Html> for LinkPreview {
179 fn from(html: &Html) -> Self {
180 let image_url: Option<Url> = LinkPreview::find_first_image_url(html);
181 let domain: Option<String> = LinkPreview::find_first_domain(html);
182
183 LinkPreview {
184 title: LinkPreview::find_first_title(html),
185 description: LinkPreview::find_first_description(html),
186 domain,
187 image_url,
188 }
189 }
190}
191
192impl FromStr for LinkPreview {
193 type Err = Error;
194
195 fn from_str(html: &str) -> Result<Self, Self::Err> {
196 let html = Html::parse_document(html);
197 let image_url: Option<Url> = LinkPreview::find_first_image_url(&html);
198 let domain: Option<String> = LinkPreview::find_first_domain(&html);
199
200 Ok(LinkPreview {
201 title: LinkPreview::find_first_title(&html),
202 description: LinkPreview::find_first_description(&html),
203 domain,
204 image_url,
205 })
206 }
207}
208
209pub fn html_from_bytes(value: &[u8]) -> Result<Html, Error> {
212 let utf8 = String::from_utf8(value.to_vec()).map_err(Error::InvalidUtf8)?;
213
214 Ok(Html::parse_document(utf8.as_str()))
215}
216
217#[cfg(test)]
218mod tests {
219 use std::str::FromStr;
220
221 use crate::html_from_bytes;
222 use crate::tests::FULL_FEATURED_HTML;
223
224 use super::LinkPreview;
225
226 #[test]
227 fn creates_instance_of_link_preview_from_html_instance() {
228 let html = html_from_bytes(FULL_FEATURED_HTML).unwrap();
229 let link_preview = LinkPreview::from(&html);
230
231 assert_eq!(
232 link_preview.title.unwrap(),
233 "SEO Strategies for a better web"
234 );
235 assert_eq!(link_preview.description.unwrap(), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
236 assert_eq!(
237 link_preview.image_url.unwrap().to_string(),
238 "https://www.apple.com/ac/structured-data/images/open_graph_logo.png?201809210816"
239 );
240 assert_eq!(link_preview.domain.unwrap().to_string(), "en.wikipedia.com");
241 }
242
243 #[test]
244 fn creates_instance_of_link_preview_from_str_instance() {
245 let html = String::from_utf8(FULL_FEATURED_HTML.to_vec()).unwrap();
246 let link_preview = LinkPreview::from_str(&html).unwrap();
247
248 assert_eq!(
249 link_preview.title.unwrap(),
250 "SEO Strategies for a better web"
251 );
252 assert_eq!(link_preview.description.unwrap(), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
253 assert_eq!(
254 link_preview.image_url.unwrap().to_string(),
255 "https://www.apple.com/ac/structured-data/images/open_graph_logo.png?201809210816"
256 );
257 assert_eq!(link_preview.domain.unwrap().to_string(), "en.wikipedia.com");
258 }
259
260 #[test]
261 fn finds_first_title() {
262 let html = html_from_bytes(FULL_FEATURED_HTML).unwrap();
263 let title = LinkPreview::find_first_title(&html);
264
265 assert_eq!(title.unwrap(), "SEO Strategies for a better web");
266 }
267
268 #[test]
269 fn finds_first_description() {
270 let html = html_from_bytes(FULL_FEATURED_HTML).unwrap();
271 let title = LinkPreview::find_first_description(&html);
272
273 assert_eq!(title.unwrap(), "John Appleseed tells you his secrets on SEO for a better web experience by taking advantage of OpenGraph\'s Tags!");
274 }
275
276 #[test]
277 fn finds_first_image_url() {
278 let html = html_from_bytes(FULL_FEATURED_HTML).unwrap();
279 let image_url: Option<String> =
280 LinkPreview::find_first_image_url(&html).map(|url| url.to_string());
281
282 assert_eq!(
283 image_url.unwrap(),
284 "https://www.apple.com/ac/structured-data/images/open_graph_logo.png?201809210816"
285 );
286 }
287
288 #[test]
289 fn finds_first_domain() {
290 let html = html_from_bytes(FULL_FEATURED_HTML).unwrap();
291 let domain = LinkPreview::find_first_domain(&html).map(|url| url.to_string());
292
293 assert_eq!(domain.unwrap(), "en.wikipedia.com");
294 }
295}