adblock/
request.rs

1//! Contains structures needed to describe network requests.
2
3use thiserror::Error;
4
5use crate::url_parser;
6use crate::utils;
7
8/// The type of resource requested from the URL endpoint.
9#[derive(Clone, PartialEq, Debug)]
10pub enum RequestType {
11    Beacon,
12    Csp,
13    Document,
14    Dtd,
15    Fetch,
16    Font,
17    Image,
18    Media,
19    Object,
20    Other,
21    Ping,
22    Script,
23    Stylesheet,
24    Subdocument,
25    Websocket,
26    Xlst,
27    Xmlhttprequest,
28}
29
30/// Possible failure reasons when creating a [`Request`].
31#[derive(Debug, Error, PartialEq)]
32pub enum RequestError {
33    #[error("hostname parsing failed")]
34    HostnameParseError,
35    #[error("source hostname parsing failed")]
36    SourceHostnameParseError,
37    #[error("invalid Unicode provided")]
38    UnicodeDecodingError,
39}
40
41impl From<idna::Errors> for RequestError {
42    fn from(_err: idna::Errors) -> RequestError {
43        RequestError::UnicodeDecodingError
44    }
45}
46
47impl From<url::ParseError> for RequestError {
48    fn from(_err: url::ParseError) -> RequestError {
49        RequestError::HostnameParseError
50    }
51}
52
53fn cpt_match_type(cpt: &str) -> RequestType {
54    match cpt {
55        "beacon" => RequestType::Ping,
56        "csp_report" => RequestType::Csp,
57        "document" | "main_frame" => RequestType::Document,
58        "font" => RequestType::Font,
59        "image" | "imageset" => RequestType::Image,
60        "media" => RequestType::Media,
61        "object" | "object_subrequest" => RequestType::Object,
62        "ping" => RequestType::Ping,
63        "script" => RequestType::Script,
64        "stylesheet" => RequestType::Stylesheet,
65        "sub_frame" | "subdocument" => RequestType::Subdocument,
66        "websocket" => RequestType::Websocket,
67        "xhr" | "xmlhttprequest" => RequestType::Xmlhttprequest,
68        "other" => RequestType::Other,
69        "speculative" => RequestType::Other,
70        "web_manifest" => RequestType::Other,
71        "xbl" => RequestType::Other,
72        "xml_dtd" => RequestType::Other,
73        "xslt" => RequestType::Other,
74        _ => RequestType::Other,
75    }
76}
77
78/// A network [`Request`], used as an interface for network blocking in the [`crate::Engine`].
79#[derive(Clone, Debug)]
80pub struct Request {
81    pub request_type: RequestType,
82
83    pub is_http: bool,
84    pub is_https: bool,
85    pub is_supported: bool,
86    pub is_third_party: bool,
87    pub url: String,
88    pub hostname: String,
89    pub source_hostname_hashes: Option<Vec<utils::Hash>>,
90
91    pub(crate) url_lower_cased: String,
92    pub(crate) request_tokens: Vec<utils::Hash>,
93    pub(crate) original_url: String,
94}
95
96impl Request {
97    pub(crate) fn get_url(&self, case_sensitive: bool) -> &str {
98        if case_sensitive {
99            &self.url
100        } else {
101            &self.url_lower_cased
102        }
103    }
104
105    pub fn get_tokens_for_match(&self) -> impl Iterator<Item = &utils::Hash> {
106        // We start matching with source_hostname_hashes for optimization,
107        // as it contains far fewer elements.
108        self.source_hostname_hashes
109            .as_ref()
110            .into_iter()
111            .flatten()
112            .chain(self.get_tokens())
113    }
114
115    pub fn get_tokens(&self) -> &Vec<utils::Hash> {
116        &self.request_tokens
117    }
118
119    #[allow(clippy::too_many_arguments)]
120    fn from_detailed_parameters(
121        raw_type: &str,
122        url: &str,
123        schema: &str,
124        hostname: &str,
125        source_hostname: &str,
126        third_party: bool,
127        original_url: String,
128    ) -> Request {
129        let is_http: bool;
130        let is_https: bool;
131        let is_supported: bool;
132        let request_type: RequestType;
133
134        if schema.is_empty() {
135            // no ':' was found
136            is_https = true;
137            is_http = false;
138            is_supported = true;
139            request_type = cpt_match_type(raw_type);
140        } else {
141            is_http = schema == "http";
142            is_https = !is_http && schema == "https";
143
144            let is_websocket = !is_http && !is_https && (schema == "ws" || schema == "wss");
145            is_supported = is_http || is_https || is_websocket;
146            if is_websocket {
147                request_type = RequestType::Websocket;
148            } else {
149                request_type = cpt_match_type(raw_type);
150            }
151        }
152
153        let source_hostname_hashes = if !source_hostname.is_empty() {
154            let mut hashes = Vec::with_capacity(4);
155            hashes.push(utils::fast_hash(source_hostname));
156            for (i, c) in source_hostname.char_indices() {
157                if c == '.' && i + 1 < source_hostname.len() {
158                    hashes.push(utils::fast_hash(&source_hostname[i + 1..]));
159                }
160            }
161            Some(hashes)
162        } else {
163            None
164        };
165
166        let url_lower_cased = url.to_ascii_lowercase();
167
168        Request {
169            request_type,
170            url: url.to_owned(),
171            url_lower_cased: url_lower_cased.to_owned(),
172            hostname: hostname.to_owned(),
173            request_tokens: calculate_tokens(&url_lower_cased),
174            source_hostname_hashes,
175            is_third_party: third_party,
176            is_http,
177            is_https,
178            is_supported,
179            original_url,
180        }
181    }
182
183    /// Construct a new [`Request`].
184    pub fn new(url: &str, source_url: &str, request_type: &str) -> Result<Request, RequestError> {
185        if let Some(parsed_url) = url_parser::parse_url(url) {
186            if let Some(parsed_source) = url_parser::parse_url(source_url) {
187                let source_domain = parsed_source.domain();
188
189                let third_party = source_domain != parsed_url.domain();
190
191                Ok(Request::from_detailed_parameters(
192                    request_type,
193                    &parsed_url.url,
194                    parsed_url.schema(),
195                    parsed_url.hostname(),
196                    parsed_source.hostname(),
197                    third_party,
198                    url.to_string(),
199                ))
200            } else {
201                Ok(Request::from_detailed_parameters(
202                    request_type,
203                    &parsed_url.url,
204                    parsed_url.schema(),
205                    parsed_url.hostname(),
206                    "",
207                    true,
208                    url.to_string(),
209                ))
210            }
211        } else {
212            Err(RequestError::HostnameParseError)
213        }
214    }
215
216    /// If you're building a [`Request`] in a context that already has access to parsed
217    /// representations of the input URLs, you can use this constructor to avoid extra lookups from
218    /// the public suffix list. Take care to pass data correctly.
219    pub fn preparsed(
220        url: &str,
221        hostname: &str,
222        source_hostname: &str,
223        request_type: &str,
224        third_party: bool,
225    ) -> Request {
226        let splitter = memchr::memchr(b':', url.as_bytes()).unwrap_or(0);
227        let schema: &str = &url[..splitter];
228
229        Request::from_detailed_parameters(
230            request_type,
231            url,
232            schema,
233            hostname,
234            source_hostname,
235            third_party,
236            url.to_string(),
237        )
238    }
239}
240
241fn calculate_tokens(url_lower_cased: &str) -> Vec<utils::Hash> {
242    let mut tokens = utils::TokensBuffer::default();
243    utils::tokenize_pooled(url_lower_cased, &mut tokens);
244    // Add zero token as a fallback to wildcard rule bucket
245    tokens.push(0);
246    tokens.into_iter().collect()
247}
248
249#[cfg(test)]
250#[path = "../tests/unit/request.rs"]
251mod unit_tests;