scrapling_browser/
intercept.rs1use std::collections::HashSet;
20
21use crate::constants::EXTRA_RESOURCES;
22
23pub fn should_block_resource(resource_type: &str, disable_resources: bool) -> bool {
29 if !disable_resources {
30 return false;
31 }
32 EXTRA_RESOURCES.contains(&resource_type)
33}
34
35pub fn is_domain_blocked(hostname: &str, blocked_domains: &HashSet<String>) -> bool {
41 if blocked_domains.is_empty() {
42 return false;
43 }
44
45 if blocked_domains.contains(hostname) {
46 return true;
47 }
48
49 let mut parts: &str = hostname;
52 while let Some(pos) = parts.find('.') {
53 parts = &parts[pos + 1..];
54 if blocked_domains.contains(parts) {
55 return true;
56 }
57 }
58
59 false
60}
61
62pub fn should_block_request(
68 resource_type: &str,
69 url: &str,
70 disable_resources: bool,
71 blocked_domains: &HashSet<String>,
72) -> bool {
73 if should_block_resource(resource_type, disable_resources) {
74 return true;
75 }
76
77 if !blocked_domains.is_empty() {
78 if let Ok(parsed) = url::Url::parse(url) {
79 if let Some(host) = parsed.host_str() {
80 return is_domain_blocked(&host.to_lowercase(), blocked_domains);
81 }
82 }
83 }
84
85 false
86}
87
88#[cfg(test)]
89mod tests {
90 use super::*;
91
92 #[test]
93 fn resource_blocking() {
94 assert!(should_block_resource("font", true));
95 assert!(should_block_resource("image", true));
96 assert!(should_block_resource("stylesheet", true));
97 assert!(!should_block_resource("document", true));
98 assert!(!should_block_resource("font", false));
99 }
100
101 #[test]
102 fn domain_blocking_exact() {
103 let mut domains = HashSet::new();
104 domains.insert("ads.example.com".to_owned());
105 assert!(is_domain_blocked("ads.example.com", &domains));
106 assert!(!is_domain_blocked("example.com", &domains));
107 }
108
109 #[test]
110 fn domain_blocking_suffix() {
111 let mut domains = HashSet::new();
112 domains.insert("doubleclick.net".to_owned());
113 assert!(is_domain_blocked("ad.doubleclick.net", &domains));
114 assert!(is_domain_blocked("sub.ad.doubleclick.net", &domains));
115 assert!(is_domain_blocked("doubleclick.net", &domains));
116 assert!(!is_domain_blocked("notdoubleclick.net", &domains));
117 }
118
119 #[test]
120 fn domain_blocking_empty() {
121 let domains = HashSet::new();
122 assert!(!is_domain_blocked("anything.com", &domains));
123 }
124
125 #[test]
126 fn should_block_request_combined() {
127 let mut domains = HashSet::new();
128 domains.insert("tracker.com".to_owned());
129
130 assert!(should_block_request(
131 "document",
132 "https://tracker.com/pixel",
133 false,
134 &domains
135 ));
136 assert!(should_block_request(
137 "font",
138 "https://cdn.com/font.woff",
139 true,
140 &domains
141 ));
142 assert!(!should_block_request(
143 "document",
144 "https://example.com",
145 false,
146 &domains
147 ));
148 }
149}