use std::convert::TryFrom;
use crate::filters::network::{NetworkFilter, NetworkFilterError};
use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterError};
use itertools::{Either, Itertools};
use memchr::memchr as find_char;
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum RuleTypes {
All,
NetworkOnly,
CosmeticOnly,
}
impl Default for RuleTypes {
fn default() -> Self {
Self::All
}
}
impl RuleTypes {
fn loads_network_rules(&self) -> bool {
matches!(self, Self::All | Self::NetworkOnly)
}
fn loads_cosmetic_rules(&self) -> bool {
matches!(self, Self::All | Self::CosmeticOnly)
}
}
#[derive(Copy, Clone, Deserialize)]
pub struct ParseOptions {
#[serde(default)]
pub format: FilterFormat,
#[serde(default)]
pub rule_types: RuleTypes,
}
impl Default for ParseOptions {
fn default() -> Self {
ParseOptions {
format: FilterFormat::Standard,
rule_types: RuleTypes::All,
}
}
}
#[derive(Clone)]
pub struct FilterSet {
debug: bool,
pub(crate) network_filters: Vec<NetworkFilter>,
pub(crate) cosmetic_filters: Vec<CosmeticFilter>,
}
pub fn read_list_metadata(list: &str) -> FilterListMetadata {
let mut metadata = FilterListMetadata::default();
let mut cutoff = list.len().min(1024);
while !list.is_char_boundary(cutoff) {
cutoff -= 1;
}
for line in list[0..cutoff].lines() {
if line.starts_with('!') {
metadata.try_add(line);
} else if line.starts_with('[') {
continue;
} else {
break;
}
}
metadata
}
impl Default for FilterSet {
fn default() -> Self {
#[cfg(not(test))]
let debug = false;
#[cfg(test)]
let debug = true;
Self::new(debug)
}
}
#[derive(Debug, PartialEq, Serialize)]
pub enum ExpiresInterval {
Hours(u16),
Days(u8),
}
impl TryFrom<&str> for ExpiresInterval {
type Error = ();
fn try_from(v: &str) -> Result<Self, ()> {
const DAYS_MAX: u8 = 14;
const HOURS_MAX: u16 = DAYS_MAX as u16 * 24;
let mut v_split = v.split(' ');
let amount = v_split.next().ok_or(())?;
let unit = v_split.next().ok_or(())?;
if amount.starts_with('+') {
return Err(());
}
match unit {
"hour" | "hours" => {
let amount = amount.parse::<u16>().map_err(|_| ())?;
if (1..=HOURS_MAX).contains(&amount) {
return Ok(Self::Hours(amount));
}
},
"day" | "days" => {
let amount = amount.parse::<u8>().map_err(|_| ())?;
if (1..=DAYS_MAX).contains(&amount) {
return Ok(Self::Days(amount))
}
}
_ => ()
}
Err(())
}
}
#[derive(Default, Serialize)]
pub struct FilterListMetadata {
pub homepage: Option<String>,
pub title: Option<String>,
pub expires: Option<ExpiresInterval>,
pub redirect: Option<String>,
}
impl FilterListMetadata {
fn try_add(&mut self, line: &str) {
if let Some(kv) = line.strip_prefix("! ") {
if let Some((key, value)) = kv.split_once(": ") {
match key {
"Homepage" if self.homepage.is_none() => self.homepage = Some(value.to_string()),
"Title" if self.title.is_none() => self.title = Some(value.to_string()),
"Expires" if self.expires.is_none() => {
if let Ok(expires) = ExpiresInterval::try_from(value) {
self.expires = Some(expires);
}
}
"Redirect" if self.redirect.is_none() => self.redirect = Some(value.to_string()),
_ => (),
}
}
}
}
}
impl FilterSet {
pub fn new(debug: bool) -> Self {
Self {
debug,
network_filters: Vec::new(),
cosmetic_filters: Vec::new(),
}
}
pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) -> FilterListMetadata {
let rules = filter_list.lines().map(str::to_string).collect::<Vec<_>>();
self.add_filters(&rules, opts)
}
pub fn add_filters(&mut self, filters: &[String], opts: ParseOptions) -> FilterListMetadata {
let (metadata, mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters_with_metadata(filters, self.debug, opts);
self.network_filters.append(&mut parsed_network_filters);
self.cosmetic_filters.append(&mut parsed_cosmetic_filters);
metadata
}
pub fn add_filter(&mut self, filter: &str, opts: ParseOptions) -> Result<(), FilterParseError> {
let filter_parsed = parse_filter(filter, self.debug, opts);
match filter_parsed? {
ParsedFilter::Network(filter) => self.network_filters.push(filter),
ParsedFilter::Cosmetic(filter) => self.cosmetic_filters.push(filter),
}
Ok(())
}
#[cfg(feature = "content-blocking")]
pub fn into_content_blocking(self) -> Result<(Vec<crate::content_blocking::CbRule>, Vec<String>), ()> {
use std::convert::TryInto;
use crate::content_blocking;
if !self.debug {
return Err(())
}
let mut ignore_previous_rules = vec![];
let mut other_rules = vec![];
let mut filters_used = vec![];
self.network_filters.into_iter().for_each(|filter| {
let original_rule = *filter.raw_line.clone().expect("All rules should be in debug mode");
if let Ok(equivalent) = TryInto::<content_blocking::CbRuleEquivalent>::try_into(filter) {
filters_used.push(original_rule);
equivalent.into_iter().for_each(|cb_rule| {
match &cb_rule.action.typ {
content_blocking::CbType::IgnorePreviousRules => ignore_previous_rules.push(cb_rule),
_ => other_rules.push(cb_rule),
}
});
}
});
let add_fp_document_exception = !filters_used.is_empty();
self.cosmetic_filters.into_iter().for_each(|filter| {
let original_rule = *filter.raw_line.clone().expect("All rules should be in debug mode");
if let Ok(cb_rule) = TryInto::<content_blocking::CbRule>::try_into(filter) {
filters_used.push(original_rule);
match &cb_rule.action.typ {
content_blocking::CbType::IgnorePreviousRules => ignore_previous_rules.push(cb_rule),
_ => other_rules.push(cb_rule),
}
}
});
other_rules.append(&mut ignore_previous_rules);
if add_fp_document_exception {
other_rules.push(content_blocking::ignore_previous_fp_documents());
}
Ok((other_rules, filters_used))
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum FilterFormat {
Standard,
Hosts,
}
impl Default for FilterFormat {
fn default() -> Self {
Self::Standard
}
}
#[derive(Debug, PartialEq)]
pub enum FilterType {
Network,
Cosmetic,
NotSupported,
}
pub enum ParsedFilter {
Network(NetworkFilter),
Cosmetic(CosmeticFilter),
}
impl From<NetworkFilter> for ParsedFilter {
fn from(v: NetworkFilter) -> Self {
ParsedFilter::Network(v)
}
}
impl From<CosmeticFilter> for ParsedFilter {
fn from(v: CosmeticFilter) -> Self {
ParsedFilter::Cosmetic(v)
}
}
#[derive(Debug, Error)]
pub enum FilterParseError {
#[error("network filter error: {0}")]
Network(#[source] NetworkFilterError),
#[error("cosmetic filter error: {0}")]
Cosmetic(#[source] CosmeticFilterError),
#[error("unsupported")]
Unsupported,
#[error("empty")]
Empty,
}
impl From<NetworkFilterError> for FilterParseError {
fn from(v: NetworkFilterError) -> Self {
FilterParseError::Network(v)
}
}
impl From<CosmeticFilterError> for FilterParseError {
fn from(v: CosmeticFilterError) -> Self {
FilterParseError::Cosmetic(v)
}
}
pub fn parse_filter(
line: &str,
debug: bool,
opts: ParseOptions,
) -> Result<ParsedFilter, FilterParseError> {
let filter = line.trim();
if filter.is_empty() {
return Err(FilterParseError::Empty);
}
match opts.format {
FilterFormat::Standard => {
match (detect_filter_type(filter), opts.rule_types) {
(FilterType::Network, RuleTypes::All | RuleTypes::NetworkOnly) => NetworkFilter::parse(filter, debug, opts)
.map(|f| f.into())
.map_err(|e| e.into()),
(FilterType::Cosmetic, RuleTypes::All | RuleTypes::CosmeticOnly) => CosmeticFilter::parse(filter, debug)
.map(|f| f.into())
.map_err(|e| e.into()),
_ => Err(FilterParseError::Unsupported),
}
}
FilterFormat::Hosts => {
if !opts.rule_types.loads_network_rules() {
return Err(FilterParseError::Unsupported);
}
if filter.starts_with('!') {
return Err(FilterParseError::Unsupported);
}
let filter = if let Some(hash_loc) = find_char(b'#', filter.as_bytes()) {
let filter = &filter[..hash_loc];
let filter = filter.trim();
if filter.is_empty() {
return Err(FilterParseError::Unsupported);
}
filter
} else {
filter
};
let mut filter_parts = filter.split_whitespace();
let hostname = match (filter_parts.next(), filter_parts.next(), filter_parts.next()) {
(None, None, None) => return Err(FilterParseError::Unsupported),
(Some(hostname), None, None) => hostname,
(Some(_ip), Some(hostname), None) => hostname,
(Some(_), Some(_), Some(_)) => return Err(FilterParseError::Unsupported),
_ => unreachable!(),
};
if hostname == "localhost" {
return Err(FilterParseError::Unsupported);
}
NetworkFilter::parse_hosts_style(hostname, debug)
.map(|f| f.into())
.map_err(|e| e.into())
}
}
}
pub fn parse_filters(
list: &[String],
debug: bool,
opts: ParseOptions,
) -> (Vec<NetworkFilter>, Vec<CosmeticFilter>) {
let (_metadata, network_filters, cosmetic_filters) = parse_filters_with_metadata(
list,
debug,
opts,
);
(network_filters, cosmetic_filters)
}
pub fn parse_filters_with_metadata(
list: &[String],
debug: bool,
opts: ParseOptions,
) -> (FilterListMetadata, Vec<NetworkFilter>, Vec<CosmeticFilter>) {
let mut metadata = FilterListMetadata::default();
let list_iter = list.iter();
let (network_filters, cosmetic_filters): (Vec<_>, Vec<_>) = list_iter
.map(|line| {
metadata.try_add(line);
parse_filter(line, debug, opts)
})
.filter_map(Result::ok)
.partition_map(|filter| match filter {
ParsedFilter::Network(f) => Either::Left(f),
ParsedFilter::Cosmetic(f) => Either::Right(f),
});
(metadata, network_filters, cosmetic_filters)
}
fn detect_filter_type(filter: &str) -> FilterType {
if filter.len() == 1
|| filter.starts_with('!')
|| (filter.starts_with('#') && filter[1..].starts_with(char::is_whitespace))
|| filter.starts_with("[Adblock")
{
return FilterType::NotSupported;
}
if filter.starts_with('|') || filter.starts_with("@@|") {
return FilterType::Network;
}
if let Some(sharp_index) = find_char(b'#', filter.as_bytes()) {
let after_sharp_index = sharp_index + 1;
if find_char(b'#', &filter.as_bytes()[after_sharp_index..(after_sharp_index+4).min(filter.len())]).is_some() {
return FilterType::Cosmetic;
}
}
if filter.contains("$$") {
return FilterType::NotSupported;
}
FilterType::Network
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_hosts_style() {
{
let input = "www.malware.com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_ok());
}
{
let input = "www.malware.com/virus.txt";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
{
let input = "127.0.0.1 www.malware.com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_ok());
}
{
let input = "127.0.0.1\t\twww.malware.com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_ok());
}
{
let input = "0.0.0.0 www.malware.com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_ok());
}
{
let input = "0.0.0.0 www.malware.com # replace after issue #289336 is addressed";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_ok());
}
{
let input = "! Title: list.txt";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
{
let input = "127.0.0.1 localhost";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
{
let input = "127.0.0.1 com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
{
let input = ".com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
{
let input = "*.com";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
{
let input = "www.";
let result = parse_filter(input, true, ParseOptions { format: FilterFormat::Hosts, ..Default::default() });
assert!(result.is_err());
}
}
#[test]
fn adguard_cosmetic_detection() {
{
let input = r#"example.org$$script[data-src="banner"]"#;
let result = parse_filter(input, true, Default::default());
assert!(result.is_err());
}
{
let input = "example.org##+js(set-local-storage-item, Test, $$remove$$)";
let result = parse_filter(input, true, Default::default());
assert!(result.is_ok());
}
}
#[test]
fn parse_filter_failed_fuzz_1() {
let input = "Ѥ";
let result = parse_filter(input, true, Default::default());
assert!(result.is_ok());
}
#[test]
fn parse_filter_failed_fuzz_2() {
assert!(parse_filter(r#"###\\\00DB \008D"#, true, Default::default()).is_ok());
assert!(parse_filter(r#"###\Û"#, true, Default::default()).is_ok());
}
#[test]
fn parse_filter_failed_fuzz_3() {
let input = "||$3p=/";
let result = parse_filter(input, true, Default::default());
assert!(result.is_ok());
}
#[test]
fn parse_filter_failed_fuzz_4() {
let parsed = parse_filter(
&String::from_utf8(vec![92, 35, 35, 43, 106, 115, 40, 44, 221, 141]).unwrap(),
true,
Default::default(),
);
#[cfg(feature = "css-validation")]
assert!(parsed.is_err());
#[cfg(not(feature = "css-validation"))]
assert!(parsed.is_ok());
}
#[test]
#[cfg(feature = "css-validation")]
fn parse_filter_opening_comment() {
assert!(parse_filter(
"##input,input/*",
true,
Default::default(),
).is_err());
}
#[test]
fn test_parse_expires_interval() {
assert_eq!(ExpiresInterval::try_from("0 hour"), Err(()));
assert_eq!(ExpiresInterval::try_from("0 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("1 hour"), Ok(ExpiresInterval::Hours(1)));
assert_eq!(ExpiresInterval::try_from("1 hours"), Ok(ExpiresInterval::Hours(1)));
assert_eq!(ExpiresInterval::try_from("2 hours"), Ok(ExpiresInterval::Hours(2)));
assert_eq!(ExpiresInterval::try_from("2 hour"), Ok(ExpiresInterval::Hours(2)));
assert_eq!(ExpiresInterval::try_from("3.5 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("336 hours"), Ok(ExpiresInterval::Hours(336)));
assert_eq!(ExpiresInterval::try_from("337 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("0 day"), Err(()));
assert_eq!(ExpiresInterval::try_from("0 days"), Err(()));
assert_eq!(ExpiresInterval::try_from("1 day"), Ok(ExpiresInterval::Days(1)));
assert_eq!(ExpiresInterval::try_from("1 days"), Ok(ExpiresInterval::Days(1)));
assert_eq!(ExpiresInterval::try_from("2 days"), Ok(ExpiresInterval::Days(2)));
assert_eq!(ExpiresInterval::try_from("2 day"), Ok(ExpiresInterval::Days(2)));
assert_eq!(ExpiresInterval::try_from("3.5 days"), Err(()));
assert_eq!(ExpiresInterval::try_from("14 days"), Ok(ExpiresInterval::Days(14)));
assert_eq!(ExpiresInterval::try_from("15 days"), Err(()));
assert_eq!(ExpiresInterval::try_from("-5 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("+5 hours"), Err(()));
assert_eq!(ExpiresInterval::try_from("2 days (update frequency)"), Ok(ExpiresInterval::Days(2)));
assert_eq!(ExpiresInterval::try_from("2 hours (update frequency)"), Ok(ExpiresInterval::Hours(2)));
}
#[test]
fn test_parsing_list_metadata() {
let list = [
"[Adblock Plus 2.0]".to_string(),
"! Title: 0131 Block List".to_string(),
"! Homepage: https://austinhuang.me/0131-block-list".to_string(),
"! Licence: https://creativecommons.org/licenses/by-sa/4.0/".to_string(),
"! Expires: 7 days".to_string(),
"! Version: 20220411".to_string(),
"".to_string(),
"! => https://austinhuang.me/0131-block-list/list.txt".to_string(),
];
let mut filter_set = FilterSet::new(false);
let metadata = filter_set.add_filters(&list[..], ParseOptions::default());
assert_eq!(metadata.title, Some("0131 Block List".to_string()));
assert_eq!(metadata.homepage, Some("https://austinhuang.me/0131-block-list".to_string()));
assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7)));
assert_eq!(metadata.redirect, None);
}
#[test]
fn test_parsing_list_best_effort() {
let list = [
"[Adblock Plus 2]".to_string(),
"!-----------------------------------".to_string(),
"! ABOUT".to_string(),
"!-----------------------------------".to_string(),
"! Version: 1.2.0.0".to_string(),
"! Title: ABPVN Advanced".to_string(),
"! Last modified: 09/03/2021".to_string(),
"! Expires: 7 days (update frequency)".to_string(),
"! Homepage: https://www.haopro.net/".to_string(),
];
let mut filter_set = FilterSet::new(false);
let metadata = filter_set.add_filters(&list[..], ParseOptions::default());
assert_eq!(metadata.title, Some("ABPVN Advanced".to_string()));
assert_eq!(metadata.homepage, Some("https://www.haopro.net/".to_string()));
assert_eq!(metadata.expires, Some(ExpiresInterval::Days(7)));
assert_eq!(metadata.redirect, None);
}
#[test]
fn test_read_metadata() {
{
let list =
r##"! Title: uBlock₀ filters – Annoyances
! Description: Filters optimized for uBlock Origin, to be used with Fanboy's
! and/or Adguard's "Annoyances" list(s)
! Expires: 4 days
! Last modified: %timestamp%
! License: https://github.com/uBlockOrigin/uAssets/blob/master/LICENSE
! Homepage: https://github.com/uBlockOrigin/uAssets
! Forums: https://github.com/uBlockOrigin/uAssets/issues"##;
let metadata = read_list_metadata(&list);
assert_eq!(metadata.title, Some("uBlock₀ filters – Annoyances".to_string()));
assert_eq!(metadata.homepage, Some("https://github.com/uBlockOrigin/uAssets".to_string()));
assert_eq!(metadata.expires, Some(ExpiresInterval::Days(4)));
assert_eq!(metadata.redirect, None);
}
{
let list =
r##"[uBlock Origin]
! Title: PersianBlocker
! Description: سرانجام، یک لیست بهینه و گسترده برای مسدودسازی تبلیغ ها و ردیاب ها در سایت های پارسی زبان!
! Expires: 2 days
! Last modified: 2022-12-11
! Homepage: https://github.com/MasterKia/PersianBlocker
! License: AGPLv3 (https://github.com/MasterKia/PersianBlocker/blob/main/LICENSE)
! مشکل/پیشنهاد: https://github.com/MasterKia/PersianBlocker/issues
! مشارکت: https://github.com/MasterKia/PersianBlocker/pulls
! لیستی برای برگرداندن آزادی کاربران، چون هر کاربر این آزادی را دارد که چه چیزی وارد مرورگرش میشود و چه چیزی وارد نمیشود
!-------------------------v Experimental Generic Filters v-----------------------!
! applicationha.com, androidgozar.com, downloadkral.com, gold-team.org, iranecar.com, icoff.ee, koolakmag.ir,
!! mybia4music.com, my-film.pw, pedal.ir, vgdl.ir, sakhamusic.ir
/wp-admin/admin-ajax.php?postviews_id=$xhr
"##;
let metadata = read_list_metadata(&list);
assert_eq!(metadata.title, Some("PersianBlocker".to_string()));
assert_eq!(metadata.homepage, Some("https://github.com/MasterKia/PersianBlocker".to_string()));
assert_eq!(metadata.expires, Some(ExpiresInterval::Days(2)));
assert_eq!(metadata.redirect, None);
}
}
#[test]
fn parse_cosmetic_variants() {
{
let input = "example.com##.selector";
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..))));
}
{
let input = "9gag.com#?#article:-abp-has(.promoted)";
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Ok(ParsedFilter::Cosmetic(..))));
}
#[cfg(feature = "css-validation")]
{
let input = "sportowefakty.wp.pl#@?#body > [class]:not([id]):matches-css(position: fixed):matches-css(top: 0px)";
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::InvalidCssSelector))));
}
{
let input = r#"odkrywamyzakryte.com#%#//scriptlet("abort-on-property-read", "sc_adv_out")"#;
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax))));
}
{
let input = "bikeradar.com,spiegel.de#@%#!function(){function b(){}function a(a){return{get:function(){return a},set:b}}function c(a)";
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax))));
}
{
let input = "nczas.com#$#.adsbygoogle { position: absolute!important; left: -3000px!important; }";
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax))));
}
{
let input = "kurnik.pl#@$#.adsbygoogle { height: 1px !important; width: 1px !important; }";
let result = parse_filter(input, true, Default::default());
assert!(matches!(result, Err(FilterParseError::Cosmetic(CosmeticFilterError::UnsupportedSyntax))));
}
}
}