1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#![doc(html_favicon_url = "https://ruma.dev/favicon.ico")]
#![doc(html_logo_url = "https://ruma.dev/images/logo.png")]
//! Opinionated HTML parsing and manipulating library.
//!
//! Like the rest of the Ruma crates, this crate is primarily meant to be used for
//! the Matrix protocol. It should be able to be used to interact with any HTML
//! document but will offer APIs focused on specificities of HTML in the Matrix
//! specification..
//!
//! # Features
//!
//! * `matrix` - Allow to convert HTML elements data into enums with variants for elements and
//!   attributes [suggested by the Matrix Specification][spec].
//!
//! [spec]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes

#![warn(missing_docs)]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]

pub use html5ever::{tendril::StrTendril, Attribute, LocalName, Namespace, Prefix, QualName};

mod helpers;
mod html;
mod sanitizer_config;

pub use self::{helpers::*, html::*, sanitizer_config::*};

/// What [HTML elements and attributes] should be kept by the sanitizer.
///
/// [HTML elements and attributes]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[allow(clippy::exhaustive_enums)]
pub enum HtmlSanitizerMode {
    /// Keep only the elements and attributes suggested in the Matrix specification.
    ///
    /// In addition to filtering elements and attributes listed in the Matrix specification, it
    /// also removes elements that are nested more than 100 levels deep.
    ///
    /// Deprecated elements and attributes are also replaced when applicable.
    Strict,

    /// Like `Strict` mode, with additional elements and attributes that are not yet included in
    /// the spec, but are reasonable to keep.
    ///
    /// Differences with `Strict` mode:
    ///
    /// * The `matrix` scheme is allowed in links.
    Compat,
}