1use thiserror::Error;
4
5use crate::url_parser;
6use crate::utils;
7
8#[derive(Clone, PartialEq, Debug)]
10pub enum RequestType {
11 Beacon,
12 Csp,
13 Document,
14 Dtd,
15 Fetch,
16 Font,
17 Image,
18 Media,
19 Object,
20 Other,
21 Ping,
22 Script,
23 Stylesheet,
24 Subdocument,
25 Websocket,
26 Xlst,
27 Xmlhttprequest,
28}
29
30#[derive(Debug, Error, PartialEq)]
32pub enum RequestError {
33 #[error("hostname parsing failed")]
34 HostnameParseError,
35 #[error("source hostname parsing failed")]
36 SourceHostnameParseError,
37 #[error("invalid Unicode provided")]
38 UnicodeDecodingError,
39}
40
41impl From<idna::Errors> for RequestError {
42 fn from(_err: idna::Errors) -> RequestError {
43 RequestError::UnicodeDecodingError
44 }
45}
46
47impl From<url::ParseError> for RequestError {
48 fn from(_err: url::ParseError) -> RequestError {
49 RequestError::HostnameParseError
50 }
51}
52
53fn cpt_match_type(cpt: &str) -> RequestType {
54 match cpt {
55 "beacon" => RequestType::Ping,
56 "csp_report" => RequestType::Csp,
57 "document" | "main_frame" => RequestType::Document,
58 "font" => RequestType::Font,
59 "image" | "imageset" => RequestType::Image,
60 "media" => RequestType::Media,
61 "object" | "object_subrequest" => RequestType::Object,
62 "ping" => RequestType::Ping,
63 "script" => RequestType::Script,
64 "stylesheet" => RequestType::Stylesheet,
65 "sub_frame" | "subdocument" => RequestType::Subdocument,
66 "websocket" => RequestType::Websocket,
67 "xhr" | "xmlhttprequest" => RequestType::Xmlhttprequest,
68 "other" => RequestType::Other,
69 "speculative" => RequestType::Other,
70 "web_manifest" => RequestType::Other,
71 "xbl" => RequestType::Other,
72 "xml_dtd" => RequestType::Other,
73 "xslt" => RequestType::Other,
74 _ => RequestType::Other,
75 }
76}
77
78#[derive(Clone, Debug)]
80pub struct Request {
81 pub request_type: RequestType,
82
83 pub is_http: bool,
84 pub is_https: bool,
85 pub is_supported: bool,
86 pub is_third_party: bool,
87 pub url: String,
88 pub hostname: String,
89 pub source_hostname_hashes: Option<Vec<utils::Hash>>,
90
91 pub(crate) url_lower_cased: String,
92 pub(crate) request_tokens: Vec<utils::Hash>,
93 pub(crate) original_url: String,
94}
95
96impl Request {
97 pub(crate) fn get_url(&self, case_sensitive: bool) -> &str {
98 if case_sensitive {
99 &self.url
100 } else {
101 &self.url_lower_cased
102 }
103 }
104
105 pub fn get_tokens_for_match(&self) -> impl Iterator<Item = &utils::Hash> {
106 self.source_hostname_hashes
109 .as_ref()
110 .into_iter()
111 .flatten()
112 .chain(self.get_tokens())
113 }
114
115 pub fn get_tokens(&self) -> &Vec<utils::Hash> {
116 &self.request_tokens
117 }
118
119 #[allow(clippy::too_many_arguments)]
120 fn from_detailed_parameters(
121 raw_type: &str,
122 url: &str,
123 schema: &str,
124 hostname: &str,
125 source_hostname: &str,
126 third_party: bool,
127 original_url: String,
128 ) -> Request {
129 let is_http: bool;
130 let is_https: bool;
131 let is_supported: bool;
132 let request_type: RequestType;
133
134 if schema.is_empty() {
135 is_https = true;
137 is_http = false;
138 is_supported = true;
139 request_type = cpt_match_type(raw_type);
140 } else {
141 is_http = schema == "http";
142 is_https = !is_http && schema == "https";
143
144 let is_websocket = !is_http && !is_https && (schema == "ws" || schema == "wss");
145 is_supported = is_http || is_https || is_websocket;
146 if is_websocket {
147 request_type = RequestType::Websocket;
148 } else {
149 request_type = cpt_match_type(raw_type);
150 }
151 }
152
153 let source_hostname_hashes = if !source_hostname.is_empty() {
154 let mut hashes = Vec::with_capacity(4);
155 hashes.push(utils::fast_hash(source_hostname));
156 for (i, c) in source_hostname.char_indices() {
157 if c == '.' && i + 1 < source_hostname.len() {
158 hashes.push(utils::fast_hash(&source_hostname[i + 1..]));
159 }
160 }
161 Some(hashes)
162 } else {
163 None
164 };
165
166 let url_lower_cased = url.to_ascii_lowercase();
167
168 Request {
169 request_type,
170 url: url.to_owned(),
171 url_lower_cased: url_lower_cased.to_owned(),
172 hostname: hostname.to_owned(),
173 request_tokens: calculate_tokens(&url_lower_cased),
174 source_hostname_hashes,
175 is_third_party: third_party,
176 is_http,
177 is_https,
178 is_supported,
179 original_url,
180 }
181 }
182
183 pub fn new(url: &str, source_url: &str, request_type: &str) -> Result<Request, RequestError> {
185 if let Some(parsed_url) = url_parser::parse_url(url) {
186 if let Some(parsed_source) = url_parser::parse_url(source_url) {
187 let source_domain = parsed_source.domain();
188
189 let third_party = source_domain != parsed_url.domain();
190
191 Ok(Request::from_detailed_parameters(
192 request_type,
193 &parsed_url.url,
194 parsed_url.schema(),
195 parsed_url.hostname(),
196 parsed_source.hostname(),
197 third_party,
198 url.to_string(),
199 ))
200 } else {
201 Ok(Request::from_detailed_parameters(
202 request_type,
203 &parsed_url.url,
204 parsed_url.schema(),
205 parsed_url.hostname(),
206 "",
207 true,
208 url.to_string(),
209 ))
210 }
211 } else {
212 Err(RequestError::HostnameParseError)
213 }
214 }
215
216 pub fn preparsed(
220 url: &str,
221 hostname: &str,
222 source_hostname: &str,
223 request_type: &str,
224 third_party: bool,
225 ) -> Request {
226 let splitter = memchr::memchr(b':', url.as_bytes()).unwrap_or(0);
227 let schema: &str = &url[..splitter];
228
229 Request::from_detailed_parameters(
230 request_type,
231 url,
232 schema,
233 hostname,
234 source_hostname,
235 third_party,
236 url.to_string(),
237 )
238 }
239}
240
241fn calculate_tokens(url_lower_cased: &str) -> Vec<utils::Hash> {
242 let mut tokens = utils::TokensBuffer::default();
243 utils::tokenize_pooled(url_lower_cased, &mut tokens);
244 tokens.push(0);
246 tokens.into_iter().collect()
247}
248
249#[cfg(test)]
250#[path = "../tests/unit/request.rs"]
251mod unit_tests;