1use crate::{request::Request, utils};
2use bytes::Bytes;
3use dashmap::{DashMap, DashSet};
4use linkify::{LinkFinder, LinkKind};
5use reqwest::StatusCode;
6use reqwest::header::HeaderMap;
7use scraper::{Html, Selector};
8use serde::de::DeserializeOwned;
9use serde_json;
10use std::borrow::Cow;
11use url::Url;
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
15pub enum LinkType {
16 Page,
18 Script,
20 Stylesheet,
22 Image,
24 Media,
26 Other(String),
28}
29
30#[derive(Debug, Clone, PartialEq, Eq, Hash)]
32pub struct Link {
33 pub url: Url,
35 pub link_type: LinkType,
37}
38
39#[derive(Debug, Clone)]
41pub struct Response {
42 pub url: Url,
44 pub status: StatusCode,
46 pub headers: HeaderMap,
48 pub body: Bytes,
50 pub request_url: Url,
52 pub meta: DashMap<Cow<'static, str>, serde_json::Value>,
54}
55
56impl Response {
57 pub fn request_from_response(&self) -> Request {
59 let mut request = Request::new(self.request_url.clone());
60 request.meta = self.meta.clone();
61 request
62 }
63
64 pub fn json<T: DeserializeOwned>(&self) -> Result<T, serde_json::Error> {
66 serde_json::from_slice(&self.body)
67 }
68
69 pub fn to_html(&self) -> Result<Html, std::str::Utf8Error> {
71 let body_str = std::str::from_utf8(&self.body)?;
72 Ok(Html::parse_document(body_str))
73 }
74
75 pub fn links(&self) -> DashSet<Link> {
77 let links = DashSet::new();
78
79 if let Ok(html) = self.to_html() {
80 let selectors = vec![
81 ("a[href]", "href"),
82 ("link[href]", "href"),
83 ("script[src]", "src"),
84 ("img[src]", "src"),
85 ("audio[src]", "src"),
86 ("video[src]", "src"),
87 ("source[src]", "src"),
88 ];
89
90 for (selector_str, attr_name) in selectors {
91 if let Ok(selector) = Selector::parse(selector_str) {
92 for element in html.select(&selector) {
93 if let Some(attr_value) = element.value().attr(attr_name)
94 && let Ok(url) = self.url.join(attr_value)
95 && utils::is_same_site(&url, &self.url)
96 {
97 let link_type = match element.value().name() {
98 "a" => LinkType::Page,
99 "link" => {
100 if let Some(rel) = element.value().attr("rel") {
101 if rel.eq_ignore_ascii_case("stylesheet") {
102 LinkType::Stylesheet
103 } else {
104 LinkType::Other(rel.to_string())
105 }
106 } else {
107 LinkType::Other("link".to_string())
108 }
109 }
110 "script" => LinkType::Script,
111 "img" => LinkType::Image,
112 "audio" | "video" | "source" => LinkType::Media,
113 _ => LinkType::Other(element.value().name().to_string()),
114 };
115 links.insert(Link { url, link_type });
116 }
117 }
118 }
119 }
120
121 let finder = LinkFinder::new();
122 for text_node in html.tree.values().filter_map(|node| node.as_text()) {
123 for link in finder.links(text_node) {
124 if link.kind() == &LinkKind::Url
125 && let Ok(url) = self.url.join(link.as_str())
126 && utils::is_same_site(&url, &self.url)
127 {
128 links.insert(Link {
129 url,
130 link_type: LinkType::Page,
131 });
132 }
133 }
134 }
135 }
136
137 links
138 }
139}