1use crate::request::Request;
15use crate::selector_cache::get_cached_selector;
16use crate::utils;
17use bytes::Bytes;
18use dashmap::{DashMap, DashSet};
19use linkify::{LinkFinder, LinkKind};
20use reqwest::StatusCode;
21use reqwest::header::HeaderMap;
22use scraper::Html;
23use serde::de::DeserializeOwned;
24use serde_json::{self, Value};
25use std::{borrow::Cow, str::Utf8Error, str::from_utf8};
26use url::Url;
27
28#[derive(Debug, Clone, PartialEq, Eq, Hash)]
30pub enum LinkType {
31 Page,
33 Script,
35 Stylesheet,
37 Image,
39 Media,
41 Other(String),
43}
44
45#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47pub struct Link {
48 pub url: Url,
50 pub link_type: LinkType,
52}
53
54#[derive(Debug)]
56pub struct Response {
57 pub url: Url,
59 pub status: StatusCode,
61 pub headers: HeaderMap,
63 pub body: Bytes,
65 pub request_url: Url,
67 pub meta: DashMap<Cow<'static, str>, Value>,
69 pub cached: bool,
71}
72
73
74impl Response {
75 pub fn request_from_response(&self) -> Request {
77 let mut request = Request::new(self.request_url.clone());
78 request.meta = self.meta.clone();
79 request
80 }
81
82 pub fn json<T: DeserializeOwned>(&self) -> Result<T, serde_json::Error> {
84 serde_json::from_slice(&self.body)
85 }
86
87 pub fn to_html(&self) -> Result<Html, Utf8Error> {
89 let body_str = from_utf8(&self.body)?;
90 Ok(Html::parse_document(body_str))
91 }
92
93 pub fn lazy_html(&self) -> Result<impl Fn() -> Result<Html, Utf8Error> + '_, Utf8Error> {
95 let body_bytes = &self.body;
96 Ok(move || {
97 let body_str = from_utf8(body_bytes)?;
98 Ok(Html::parse_document(body_str))
99 })
100 }
101
102 pub fn links(&self) -> DashSet<Link> {
104 let links = DashSet::new();
105
106 if let Ok(html_fn) = self.lazy_html()
107 && let Ok(html) = html_fn()
108 {
109 let selectors = vec![
110 ("a[href]", "href"),
111 ("link[href]", "href"),
112 ("script[src]", "src"),
113 ("img[src]", "src"),
114 ("audio[src]", "src"),
115 ("video[src]", "src"),
116 ("source[src]", "src"),
117 ];
118
119 for (selector_str, attr_name) in selectors {
120 if let Some(selector) = get_cached_selector(selector_str) {
121 for element in html.select(&selector) {
122 if let Some(attr_value) = element.value().attr(attr_name)
123 && let Ok(url) = self.url.join(attr_value)
124 && utils::is_same_site(&url, &self.url)
125 {
126 let link_type = match element.value().name() {
127 "a" => LinkType::Page,
128 "link" => {
129 if let Some(rel) = element.value().attr("rel") {
130 if rel.eq_ignore_ascii_case("stylesheet") {
131 LinkType::Stylesheet
132 } else {
133 LinkType::Other(rel.to_string())
134 }
135 } else {
136 LinkType::Other("link".to_string())
137 }
138 }
139 "script" => LinkType::Script,
140 "img" => LinkType::Image,
141 "audio" | "video" | "source" => LinkType::Media,
142 _ => LinkType::Other(element.value().name().to_string()),
143 };
144 links.insert(Link { url, link_type });
145 }
146 }
147 }
148 }
149
150 let finder = LinkFinder::new();
151 for text_node in html.tree.values().filter_map(|node| node.as_text()) {
152 for link in finder.links(text_node) {
153 if link.kind() == &LinkKind::Url
154 && let Ok(url) = self.url.join(link.as_str())
155 && utils::is_same_site(&url, &self.url)
156 {
157 links.insert(Link {
158 url,
159 link_type: LinkType::Page,
160 });
161 }
162 }
163 }
164 }
165
166 links
167 }
168
169}
170
171impl Clone for Response {
172 fn clone(&self) -> Self {
173 Response {
174 url: self.url.clone(),
175 status: self.status,
176 headers: self.headers.clone(),
177 body: self.body.clone(),
178 request_url: self.request_url.clone(),
179 meta: self.meta.clone(),
180 cached: self.cached,
181 }
182 }
183}