1use std::convert::TryFrom;
4
5use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterError};
6use crate::filters::network::{NetworkFilter, NetworkFilterError};
7use crate::resources::PermissionMask;
8
9use itertools::{Either, Itertools};
10use memchr::memchr as find_char;
11use serde::{Deserialize, Serialize};
12use thiserror::Error;
13
14#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
16pub enum RuleTypes {
17 #[default]
18 All,
19 NetworkOnly,
20 CosmeticOnly,
21}
22
23impl RuleTypes {
24 pub fn loads_network_rules(&self) -> bool {
25 matches!(self, Self::All | Self::NetworkOnly)
26 }
27
28 pub fn loads_cosmetic_rules(&self) -> bool {
29 matches!(self, Self::All | Self::CosmeticOnly)
30 }
31}
32
33#[derive(Copy, Clone, Deserialize)]
45pub struct ParseOptions {
46 #[serde(default)]
48 pub format: FilterFormat,
49 #[serde(default)]
55 pub rule_types: RuleTypes,
56 #[serde(default)]
59 pub permissions: PermissionMask,
60}
61
62impl Default for ParseOptions {
63 fn default() -> Self {
64 ParseOptions {
65 format: FilterFormat::Standard,
66 rule_types: RuleTypes::All,
67 permissions: PermissionMask::default(),
68 }
69 }
70}
71
72#[derive(Clone)]
78pub struct FilterSet {
79 debug: bool,
80 pub(crate) network_filters: Vec<NetworkFilter>,
81 pub(crate) cosmetic_filters: Vec<CosmeticFilter>,
82}
83
84pub fn read_list_metadata(list: &str) -> FilterListMetadata {
86 let mut metadata = FilterListMetadata::default();
87
88 let mut cutoff = list.len().min(1024);
90
91 while !list.is_char_boundary(cutoff) {
92 cutoff -= 1;
93 }
94
95 for line in list[0..cutoff].lines() {
97 if line.starts_with('!') {
98 metadata.try_add(line);
99 } else if line.starts_with('[') {
100 continue;
101 } else {
102 break;
103 }
104 }
105
106 metadata
107}
108
109impl Default for FilterSet {
110 fn default() -> Self {
113 #[cfg(not(test))]
114 let debug = false;
115
116 #[cfg(test)]
117 let debug = true;
118
119 Self::new(debug)
120 }
121}
122
123#[derive(Debug, PartialEq, Serialize)]
125pub enum ExpiresInterval {
126 Hours(u16),
127 Days(u8),
128}
129
130impl TryFrom<&str> for ExpiresInterval {
131 type Error = ();
132
133 fn try_from(v: &str) -> Result<Self, ()> {
134 const DAYS_MAX: u8 = 14;
135 const HOURS_MAX: u16 = DAYS_MAX as u16 * 24;
136
137 let mut v_split = v.split(' ');
139 let amount = v_split.next().ok_or(())?;
140 let unit = v_split.next().ok_or(())?;
141 if amount.starts_with('+') {
143 return Err(());
144 }
145 match unit {
147 "hour" | "hours" => {
148 let amount = amount.parse::<u16>().map_err(|_| ())?;
149 if (1..=HOURS_MAX).contains(&amount) {
150 return Ok(Self::Hours(amount));
151 }
152 }
153 "day" | "days" => {
154 let amount = amount.parse::<u8>().map_err(|_| ())?;
155 if (1..=DAYS_MAX).contains(&amount) {
156 return Ok(Self::Days(amount));
157 }
158 }
159 _ => (),
160 }
161 Err(())
162 }
163}
164
165#[derive(Default, Serialize)]
168pub struct FilterListMetadata {
169 pub homepage: Option<String>,
172 pub title: Option<String>,
175 pub expires: Option<ExpiresInterval>,
181 pub redirect: Option<String>,
188}
189
190impl FilterListMetadata {
191 fn try_add(&mut self, line: &str) {
195 if let Some(kv) = line.strip_prefix("! ") {
196 if let Some((key, value)) = kv.split_once(": ") {
197 match key {
198 "Homepage" if self.homepage.is_none() => {
199 self.homepage = Some(value.to_string())
200 }
201 "Title" if self.title.is_none() => self.title = Some(value.to_string()),
202 "Expires" if self.expires.is_none() => {
203 if let Ok(expires) = ExpiresInterval::try_from(value) {
204 self.expires = Some(expires);
205 }
206 }
207 "Redirect" if self.redirect.is_none() => {
208 self.redirect = Some(value.to_string())
209 }
210 _ => (),
211 }
212 }
213 }
214 }
215}
216
217impl FilterSet {
218 pub fn new(debug: bool) -> Self {
222 Self {
223 debug,
224 network_filters: Vec::new(),
225 cosmetic_filters: Vec::new(),
226 }
227 }
228
229 #[doc(hidden)]
231 pub fn new_with_rules(
232 network_filters: Vec<NetworkFilter>,
233 cosmetic_filters: Vec<CosmeticFilter>,
234 debug: bool,
235 ) -> Self {
236 Self {
237 debug,
238 network_filters,
239 cosmetic_filters,
240 }
241 }
242
243 pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) -> FilterListMetadata {
247 self.add_filters(filter_list.lines(), opts)
248 }
249
250 pub fn add_filters(
253 &mut self,
254 filters: impl IntoIterator<Item = impl AsRef<str>>,
255 opts: ParseOptions,
256 ) -> FilterListMetadata {
257 let (metadata, parsed_network_filters, parsed_cosmetic_filters) =
258 parse_filters_with_metadata(filters, self.debug, opts);
259 self.network_filters.extend(parsed_network_filters);
260 self.cosmetic_filters.extend(parsed_cosmetic_filters);
261 metadata
262 }
263
264 pub fn add_filter(&mut self, filter: &str, opts: ParseOptions) -> Result<(), FilterParseError> {
266 let filter_parsed = parse_filter(filter, self.debug, opts);
267 match filter_parsed? {
268 ParsedFilter::Network(filter) => self.network_filters.push(filter),
269 ParsedFilter::Cosmetic(filter) => self.cosmetic_filters.push(filter),
270 }
271 Ok(())
272 }
273
274 #[cfg(feature = "content-blocking")]
283 #[allow(clippy::result_unit_err)]
284 pub fn into_content_blocking(
285 self,
286 ) -> Result<(Vec<crate::content_blocking::CbRule>, Vec<String>), ()> {
287 use crate::content_blocking;
288 use crate::filters::network::NetworkFilterMaskHelper;
289 use std::collections::HashSet;
290
291 if !self.debug {
292 return Err(());
293 }
294
295 let mut bad_filter_ids = HashSet::new();
297 for filter in self.network_filters.iter() {
298 if filter.is_badfilter() {
299 bad_filter_ids.insert(filter.get_id_without_badfilter());
300 }
301 }
302
303 let mut ignore_previous_rules = vec![];
304 let mut other_rules = vec![];
305
306 let mut filters_used = vec![];
307
308 self.network_filters.into_iter().for_each(|filter| {
309 if bad_filter_ids.contains(&filter.get_id()) || filter.is_badfilter() {
311 return;
312 }
313 let original_rule = *filter
314 .raw_line
315 .clone()
316 .expect("All rules should be in debug mode");
317 if let Ok(equivalent) = TryInto::<content_blocking::CbRuleEquivalent>::try_into(filter)
318 {
319 filters_used.push(original_rule);
320 equivalent
321 .into_iter()
322 .for_each(|cb_rule| match &cb_rule.action.typ {
323 content_blocking::CbType::IgnorePreviousRules => {
324 ignore_previous_rules.push(cb_rule)
325 }
326 _ => other_rules.push(cb_rule),
327 });
328 }
329 });
330
331 let add_fp_document_exception = !filters_used.is_empty();
332
333 self.cosmetic_filters.into_iter().for_each(|filter| {
334 let original_rule = *filter
335 .raw_line
336 .clone()
337 .expect("All rules should be in debug mode");
338 if let Ok(cb_rule) = TryInto::<content_blocking::CbRule>::try_into(filter) {
339 filters_used.push(original_rule);
340 match &cb_rule.action.typ {
341 content_blocking::CbType::IgnorePreviousRules => {
342 ignore_previous_rules.push(cb_rule)
343 }
344 _ => other_rules.push(cb_rule),
345 }
346 }
347 });
348
349 other_rules.extend(ignore_previous_rules);
350
351 if add_fp_document_exception {
352 other_rules.push(content_blocking::ignore_previous_fp_documents());
353 }
354
355 Ok((other_rules, filters_used))
356 }
357}
358
359#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
361pub enum FilterFormat {
362 Standard,
364 Hosts,
375}
376
377impl Default for FilterFormat {
379 fn default() -> Self {
380 Self::Standard
381 }
382}
383
384#[derive(Debug, PartialEq)]
386pub enum FilterType {
387 Network,
389 Cosmetic,
391 NotSupported,
393}
394
395pub enum ParsedFilter {
397 Network(NetworkFilter),
398 Cosmetic(CosmeticFilter),
399}
400
401impl From<NetworkFilter> for ParsedFilter {
402 fn from(v: NetworkFilter) -> Self {
403 ParsedFilter::Network(v)
404 }
405}
406
407impl From<CosmeticFilter> for ParsedFilter {
408 fn from(v: CosmeticFilter) -> Self {
409 ParsedFilter::Cosmetic(v)
410 }
411}
412
413#[derive(Debug, Error)]
415pub enum FilterParseError {
416 #[error("network filter error: {0}")]
417 Network(#[source] NetworkFilterError),
418 #[error("cosmetic filter error: {0}")]
419 Cosmetic(#[source] CosmeticFilterError),
420 #[error("unsupported")]
421 Unsupported,
422 #[error("empty")]
423 Empty,
424}
425
426impl From<NetworkFilterError> for FilterParseError {
427 fn from(v: NetworkFilterError) -> Self {
428 FilterParseError::Network(v)
429 }
430}
431
432impl From<CosmeticFilterError> for FilterParseError {
433 fn from(v: CosmeticFilterError) -> Self {
434 FilterParseError::Cosmetic(v)
435 }
436}
437
438pub fn parse_filter(
440 line: &str,
441 debug: bool,
442 opts: ParseOptions,
443) -> Result<ParsedFilter, FilterParseError> {
444 let filter = line.trim();
445
446 if filter.is_empty() {
447 return Err(FilterParseError::Empty);
448 }
449
450 match opts.format {
451 FilterFormat::Standard => match (detect_filter_type(filter), opts.rule_types) {
452 (FilterType::Network, RuleTypes::All | RuleTypes::NetworkOnly) => {
453 NetworkFilter::parse(filter, debug, opts)
454 .map(|f| f.into())
455 .map_err(|e| e.into())
456 }
457 (FilterType::Cosmetic, RuleTypes::All | RuleTypes::CosmeticOnly) => {
458 CosmeticFilter::parse(filter, debug, opts.permissions)
459 .map(|f| f.into())
460 .map_err(|e| e.into())
461 }
462 _ => Err(FilterParseError::Unsupported),
463 },
464 FilterFormat::Hosts => {
465 if !opts.rule_types.loads_network_rules() {
467 return Err(FilterParseError::Unsupported);
468 }
469 if filter.starts_with('!') {
470 return Err(FilterParseError::Unsupported);
471 }
472 let filter = if let Some(hash_loc) = find_char(b'#', filter.as_bytes()) {
474 let filter = &filter[..hash_loc];
475 let filter = filter.trim();
476
477 if filter.is_empty() {
478 return Err(FilterParseError::Unsupported);
479 }
480
481 filter
482 } else {
483 filter
484 };
485
486 let mut filter_parts = filter.split_whitespace();
488 let hostname = match (
489 filter_parts.next(),
490 filter_parts.next(),
491 filter_parts.next(),
492 ) {
493 (None, None, None) => return Err(FilterParseError::Unsupported),
494 (Some(hostname), None, None) => hostname,
495 (Some(_ip), Some(hostname), None) => hostname,
496 (Some(_), Some(_), Some(_)) => return Err(FilterParseError::Unsupported),
497 _ => unreachable!(),
498 };
499
500 if hostname == "localhost" {
504 return Err(FilterParseError::Unsupported);
505 }
506
507 NetworkFilter::parse_hosts_style(hostname, debug)
508 .map(|f| f.into())
509 .map_err(|e| e.into())
510 }
511 }
512}
513
514pub fn parse_filters(
516 list: impl IntoIterator<Item = impl AsRef<str>>,
517 debug: bool,
518 opts: ParseOptions,
519) -> (Vec<NetworkFilter>, Vec<CosmeticFilter>) {
520 let (_metadata, network_filters, cosmetic_filters) =
521 parse_filters_with_metadata(list, debug, opts);
522
523 (network_filters, cosmetic_filters)
524}
525
526pub fn parse_filters_with_metadata(
528 list: impl IntoIterator<Item = impl AsRef<str>>,
529 debug: bool,
530 opts: ParseOptions,
531) -> (FilterListMetadata, Vec<NetworkFilter>, Vec<CosmeticFilter>) {
532 let mut metadata = FilterListMetadata::default();
533
534 let list_iter = list.into_iter();
535
536 let (network_filters, cosmetic_filters): (Vec<_>, Vec<_>) = list_iter
537 .map(|line| {
538 metadata.try_add(line.as_ref());
539 parse_filter(line.as_ref(), debug, opts)
540 })
541 .filter_map(Result::ok)
542 .partition_map(|filter| match filter {
543 ParsedFilter::Network(f) => Either::Left(f),
544 ParsedFilter::Cosmetic(f) => Either::Right(f),
545 });
546
547 (metadata, network_filters, cosmetic_filters)
548}
549
550fn detect_filter_type(filter: &str) -> FilterType {
555 if filter.len() == 1
557 || filter.starts_with('!')
558 || (filter.starts_with('#') && filter[1..].starts_with(char::is_whitespace))
559 || filter.starts_with("[Adblock")
560 {
561 return FilterType::NotSupported;
562 }
563
564 if filter.starts_with('|') || filter.starts_with("@@|") {
565 return FilterType::Network;
566 }
567
568 if let Some(sharp_index) = find_char(b'#', filter.as_bytes()) {
570 let after_sharp_index = sharp_index + 1;
571
572 if find_char(
576 b'#',
577 &filter.as_bytes()[after_sharp_index..(after_sharp_index + 4).min(filter.len())],
578 )
579 .is_some()
580 {
581 return FilterType::Cosmetic;
582 }
583 }
584
585 if filter.contains("$$") {
587 return FilterType::NotSupported;
588 }
589
590 FilterType::Network
592}
593
594#[cfg(test)]
595#[path = "../tests/unit/lists.rs"]
596mod unit_tests;