html-filter 0.2.2

Crate to parse, filter, search and edit an HTML file.
Documentation
//! Keeps track of rules applied on attributes or tags. They can
//! either be blacklisted or whitelisted by the user. This module handles the
//! logic for the combination of these rules.

use std::collections::HashMap;

use crate::types::tag::Attribute;

/// Stores the status of an element, i.e., whether it ought to be kept or
/// removed.
///
/// This contains only the explicit rules given by the user at the definition of
/// [`super::Filter`].
///
/// It contains a `whitelist` and a `blacklist` to keep track of the filtering
/// parameters.
#[derive(Debug)]
pub struct BlackWhiteList {
    /// Default behaviour
    ///
    /// Only is used when checking for emptiness
    default: bool,
    /// Contains the elements and their status
    ///
    /// The hashmap maps a name to a target, and a bool. The boolean is `true`
    /// if the item is whitelisted, and `false` if the item is blacklisted.
    items: HashMap<String, bool>,
    /// Indicates if a whitelisted element was pushed into the [`HashMap`].
    whitelist_empty: bool,
}

impl BlackWhiteList {
    /// Check the status of an element
    pub fn check(&self, name: &str) -> ElementState {
        self.items.get(name).map_or_else(
            || {
                if self.is_empty() && self.default {
                    ElementState::NotSpecified
                } else {
                    ElementState::BlackListed
                }
            },
            |keep| match keep {
                true => ElementState::WhiteListed,
                false => ElementState::BlackListed,
            },
        )
    }

    /// Checks if no elements were specified
    pub const fn is_empty(&self) -> bool {
        self.whitelist_empty
    }

    /// Checks if a name was explicitly blacklisted
    pub fn is_explicitly_blacklisted(&self, name: &str) -> bool {
        self.items.get(name).map_or_else(|| false, |keep| !*keep)
    }

    /// Pushes an element as whitelisted or blacklisted
    pub fn push(&mut self, name: String, keep: bool) -> Result<(), ()> {
        if keep {
            self.whitelist_empty = false;
        }
        let old = self.items.insert(name, keep);
        if old.is_some_and(|inner| inner != keep) {
            Err(())
        } else {
            Ok(())
        }
    }

    /// Sets the default rule
    ///
    /// If no rule is specified for the given tag, default is applied.
    pub const fn set_default(&mut self, default: bool) {
        self.default = default;
    }
}

impl Default for BlackWhiteList {
    fn default() -> Self {
        Self { items: HashMap::new(), whitelist_empty: true, default: true }
    }
}

/// Status of an element
///
/// An element can be whitelisted or blacklisted by the user. This state
/// contains both information.
#[derive(Debug)]
pub enum ElementState {
    /// Element ought to be removed
    BlackListed,
    /// No rules applied for this element
    NotSpecified,
    /// Element ought to be kept
    WhiteListed,
}

impl ElementState {
    /// Computes the output status for multiple checks
    ///
    /// This is used to perform multiple successive tests.
    pub const fn and(&self, other: &Self) -> Self {
        match (self, other) {
            (Self::BlackListed, _) | (_, Self::BlackListed) => Self::BlackListed,
            (Self::NotSpecified, Self::NotSpecified) => Self::NotSpecified,
            // in this arm, at least one is WhiteListed, because the other case is above.
            (Self::WhiteListed | Self::NotSpecified, Self::WhiteListed | Self::NotSpecified) =>
                Self::WhiteListed,
        }
    }

    /// Checks if an element was explicitly authorised, i.e., is whitelisted
    pub const fn is_allowed_or(&self, default: bool) -> bool {
        match self {
            Self::BlackListed => false,
            Self::NotSpecified => default,
            Self::WhiteListed => true,
        }
    }
}

/// Ways to match an attribute's value to decide whether to keep the tag or not.
#[derive(Debug, PartialEq, Eq)]
pub enum AttributeMatch {
    /// The tag's value must contain a word equal to the given string.
    Contains(String),
    /// The tag's value must be exactly the given string.
    Is(String),
    /// The tag must not have a value.
    NoValue,
}

impl AttributeMatch {
    /// Checks if a [`AttributeMatch`] is satisfied by a given attribute value.
    fn matches(&self, attribute_value: Option<&str>) -> bool {
        attribute_value.map_or(matches!(self, Self::NoValue), |attr_val| {
            if let Self::Is(this_val) = self {
                *this_val == *attr_val
            } else if let Self::Contains(this_val) = self {
                attr_val.split_whitespace().any(|word| word == this_val)
            } else {
                false
            }
        })
    }
}

/// Rules for associating names to values
//TODO: could add a default to create a method: exact_attributes
#[derive(Default, Debug)]
pub struct ValueAssociateHash {
    /// Names and attributes explicitly not wanted
    blacklist: Vec<(String, AttributeMatch)>,
    /// Names and attributes explicitly wanted
    whitelist: Vec<(String, AttributeMatch)>,
}

impl ValueAssociateHash {
    /// Checks if the attributes form a correct combination of rules
    pub fn check(&self, attrs: &[Attribute]) -> ElementState {
        let attrs_map: HashMap<_, _> = attrs
            .iter()
            .map(|attr| (attr.as_name().clone(), attr.as_value()))
            .collect();
        for (wanted_name, wanted_value) in &self.whitelist {
            match attrs_map.get(wanted_name) {
                None => return ElementState::BlackListed,
                Some(found_value) if !wanted_value.matches(found_value.map(String::as_str)) =>
                    return ElementState::BlackListed,
                Some(_) => (),
            }
        }
        for (wanted_name, wanted_value) in &self.blacklist {
            match attrs_map.get(wanted_name) {
                Some(found_value) if wanted_value.matches(found_value.map(String::as_str)) =>
                    return ElementState::BlackListed,
                Some(_) | None => (),
            }
        }
        if self.is_empty() {
            ElementState::NotSpecified
        } else {
            ElementState::WhiteListed
        }
    }

    /// Checks if the [`ValueAssociateHash`] wasn't given any rules.
    pub const fn is_empty(&self) -> bool {
        self.whitelist.is_empty() && self.blacklist.is_empty()
    }

    /// Checks if one of the attributes was explicitly blacklisted
    pub fn is_explicitly_blacklisted(&self, attrs: &[Attribute]) -> bool {
        let blacklist = self
            .blacklist
            .iter()
            .map(|(name, value)| (name, value))
            .collect::<HashMap<_, _>>();
        for attr in attrs {
            if let Some(value) = blacklist.get(&attr.as_name().clone())
                && value.matches(attr.as_value().map(String::as_str))
            {
                return true;
            }
        }
        false
    }

    /// Adds a rule for the attribute `name`
    pub fn push(&mut self, name: String, value: AttributeMatch, keep: bool) {
        let () = if keep {
            self.whitelist.push((name, value));
        } else {
            self.blacklist.push((name, value));
        };
    }
}