use std::collections::BTreeSet;
use html5ever::{Attribute, QualName, ns, tendril::StrTendril};
use ruma_common::{
IdParseError, MatrixToError, MatrixToUri, MatrixUri, MatrixUriError, MxcUri, OwnedMxcUri,
};
use crate::sanitizer_config::clean::{compat, spec};
const CLASS_LANGUAGE_PREFIX: &str = "language-";
#[derive(Debug, Clone)]
#[allow(clippy::exhaustive_structs)]
pub struct MatrixElementData {
pub element: MatrixElement,
pub attrs: BTreeSet<Attribute>,
}
impl MatrixElementData {
#[allow(clippy::mutable_key_type)]
pub(super) fn parse(name: &QualName, attrs: &BTreeSet<Attribute>) -> Self {
let (element, attrs) = MatrixElement::parse(name, attrs);
Self { element, attrs }
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum MatrixElement {
Del,
H(HeadingData),
Blockquote,
P,
A(AnchorData),
Ul,
Ol(OrderedListData),
Sup,
Sub,
Li,
B,
I,
U,
Strong,
Em,
S,
Code(CodeData),
Hr,
Br,
Div(DivData),
Table,
Thead,
Tbody,
Tr,
Th,
Td,
Caption,
Pre,
Span(SpanData),
Img(ImageData),
Details,
Summary,
MatrixReply,
Other(QualName),
}
impl MatrixElement {
#[allow(clippy::mutable_key_type)]
fn parse(name: &QualName, attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
if name.ns != ns!(html) {
return (Self::Other(name.clone()), attrs.clone());
}
match name.local.as_bytes() {
b"del" => (Self::Del, attrs.clone()),
b"h1" => (Self::H(HeadingData::new(1)), attrs.clone()),
b"h2" => (Self::H(HeadingData::new(2)), attrs.clone()),
b"h3" => (Self::H(HeadingData::new(3)), attrs.clone()),
b"h4" => (Self::H(HeadingData::new(4)), attrs.clone()),
b"h5" => (Self::H(HeadingData::new(5)), attrs.clone()),
b"h6" => (Self::H(HeadingData::new(6)), attrs.clone()),
b"blockquote" => (Self::Blockquote, attrs.clone()),
b"p" => (Self::P, attrs.clone()),
b"a" => {
let (data, attrs) = AnchorData::parse(attrs);
(Self::A(data), attrs)
}
b"ul" => (Self::Ul, attrs.clone()),
b"ol" => {
let (data, attrs) = OrderedListData::parse(attrs);
(Self::Ol(data), attrs)
}
b"sup" => (Self::Sup, attrs.clone()),
b"sub" => (Self::Sub, attrs.clone()),
b"li" => (Self::Li, attrs.clone()),
b"b" => (Self::B, attrs.clone()),
b"i" => (Self::I, attrs.clone()),
b"u" => (Self::U, attrs.clone()),
b"strong" => (Self::Strong, attrs.clone()),
b"em" => (Self::Em, attrs.clone()),
b"s" => (Self::S, attrs.clone()),
b"code" => {
let (data, attrs) = CodeData::parse(attrs);
(Self::Code(data), attrs)
}
b"hr" => (Self::Hr, attrs.clone()),
b"br" => (Self::Br, attrs.clone()),
b"div" => {
let (data, attrs) = DivData::parse(attrs);
(Self::Div(data), attrs)
}
b"table" => (Self::Table, attrs.clone()),
b"thead" => (Self::Thead, attrs.clone()),
b"tbody" => (Self::Tbody, attrs.clone()),
b"tr" => (Self::Tr, attrs.clone()),
b"th" => (Self::Th, attrs.clone()),
b"td" => (Self::Td, attrs.clone()),
b"caption" => (Self::Caption, attrs.clone()),
b"pre" => (Self::Pre, attrs.clone()),
b"span" => {
let (data, attrs) = SpanData::parse(attrs);
(Self::Span(data), attrs)
}
b"img" => {
let (data, attrs) = ImageData::parse(attrs);
(Self::Img(data), attrs)
}
b"details" => (Self::Details, attrs.clone()),
b"summary" => (Self::Summary, attrs.clone()),
b"mx-reply" => (Self::MatrixReply, attrs.clone()),
_ => (Self::Other(name.clone()), attrs.clone()),
}
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct HeadingData {
pub level: HeadingLevel,
}
impl HeadingData {
fn new(level: u8) -> Self {
Self { level: HeadingLevel(level) }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct HeadingLevel(u8);
impl HeadingLevel {
pub fn value(&self) -> u8 {
self.0
}
}
impl PartialEq<u8> for HeadingLevel {
fn eq(&self, other: &u8) -> bool {
self.0.eq(other)
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct AnchorData {
pub target: Option<StrTendril>,
pub href: Option<AnchorUri>,
}
impl AnchorData {
fn new() -> Self {
Self { target: None, href: None }
}
#[allow(clippy::mutable_key_type)]
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
let mut data = Self::new();
let mut remaining_attrs = BTreeSet::new();
for attr in attrs {
if attr.name.ns != ns!() {
remaining_attrs.insert(attr.clone());
continue;
}
match attr.name.local.as_bytes() {
b"target" => {
data.target = Some(attr.value.clone());
}
b"href" => {
if let Some(uri) = AnchorUri::parse(&attr.value) {
data.href = Some(uri);
} else {
remaining_attrs.insert(attr.clone());
}
}
_ => {
remaining_attrs.insert(attr.clone());
}
}
}
(data, remaining_attrs)
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum AnchorUri {
Matrix(MatrixUri),
MatrixTo(MatrixToUri),
Other(StrTendril),
}
impl AnchorUri {
fn parse(value: &StrTendril) -> Option<Self> {
let s = value.as_ref();
let mut allowed_schemes = spec::allowed_schemes("a", "href")
.into_iter()
.chain(compat::allowed_schemes("a", "href"))
.flatten();
if !allowed_schemes.any(|scheme| s.starts_with(&format!("{scheme}:"))) {
return None;
}
match MatrixUri::parse(s) {
Ok(uri) => return Some(Self::Matrix(uri)),
Err(IdParseError::InvalidMatrixUri(MatrixUriError::WrongScheme)) => {}
_ => return None,
}
match MatrixToUri::parse(s) {
Ok(uri) => return Some(Self::MatrixTo(uri)),
Err(IdParseError::InvalidMatrixToUri(MatrixToError::WrongBaseUrl)) => {}
_ => return None,
}
Some(Self::Other(value.clone()))
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct OrderedListData {
pub start: Option<i64>,
}
impl OrderedListData {
fn new() -> Self {
Self { start: None }
}
#[allow(clippy::mutable_key_type)]
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
let mut data = Self::new();
let mut remaining_attrs = BTreeSet::new();
for attr in attrs {
if attr.name.ns != ns!() {
remaining_attrs.insert(attr.clone());
continue;
}
match attr.name.local.as_bytes() {
b"start" => {
if let Ok(start) = attr.value.parse() {
data.start = Some(start);
} else {
remaining_attrs.insert(attr.clone());
}
}
_ => {
remaining_attrs.insert(attr.clone());
}
}
}
(data, remaining_attrs)
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct CodeData {
pub language: Option<StrTendril>,
}
impl CodeData {
fn new() -> Self {
Self { language: None }
}
#[allow(clippy::mutable_key_type)]
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
let mut data = Self::new();
let mut remaining_attrs = BTreeSet::new();
for attr in attrs {
if attr.name.ns != ns!() {
remaining_attrs.insert(attr.clone());
continue;
}
match attr.name.local.as_bytes() {
b"class" => {
let value_str = attr.value.as_ref();
for (match_start, _) in value_str.match_indices(CLASS_LANGUAGE_PREFIX) {
if match_start != 0
&& !value_str.as_bytes()[match_start - 1].is_ascii_whitespace()
{
continue;
}
let language_start = match_start + CLASS_LANGUAGE_PREFIX.len();
let str_end = &value_str[language_start..];
let language_end = str_end
.find(|c: char| c.is_ascii_whitespace())
.map(|pos| language_start + pos)
.unwrap_or(value_str.len());
if language_end == language_start {
continue;
}
let sub_len = (language_end - language_start) as u32;
data.language = Some(attr.value.subtendril(language_start as u32, sub_len));
if match_start != 0 || language_end != value_str.len() {
remaining_attrs.insert(attr.clone());
}
break;
}
if data.language.is_none() {
remaining_attrs.insert(attr.clone());
}
}
_ => {
remaining_attrs.insert(attr.clone());
}
}
}
(data, remaining_attrs)
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct SpanData {
pub bg_color: Option<StrTendril>,
pub color: Option<StrTendril>,
pub spoiler: Option<StrTendril>,
pub maths: Option<StrTendril>,
#[cfg(feature = "unstable-msc4286")]
pub external_payment_details: Option<StrTendril>,
}
impl SpanData {
fn new() -> Self {
Self {
bg_color: None,
color: None,
spoiler: None,
maths: None,
#[cfg(feature = "unstable-msc4286")]
external_payment_details: None,
}
}
#[allow(clippy::mutable_key_type)]
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
let mut data = Self::new();
let mut remaining_attrs = BTreeSet::new();
for attr in attrs {
if attr.name.ns != ns!() {
remaining_attrs.insert(attr.clone());
continue;
}
match attr.name.local.as_bytes() {
b"data-mx-bg-color" => {
data.bg_color = Some(attr.value.clone());
}
b"data-mx-color" => data.color = Some(attr.value.clone()),
b"data-mx-spoiler" => {
data.spoiler = Some(attr.value.clone());
}
b"data-mx-maths" => {
data.maths = Some(attr.value.clone());
}
#[cfg(feature = "unstable-msc4286")]
b"data-msc4286-external-payment-details" => {
data.external_payment_details = Some(attr.value.clone());
}
_ => {
remaining_attrs.insert(attr.clone());
}
}
}
(data, remaining_attrs)
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct ImageData {
pub width: Option<i64>,
pub height: Option<i64>,
pub alt: Option<StrTendril>,
pub title: Option<StrTendril>,
pub src: Option<OwnedMxcUri>,
}
impl ImageData {
fn new() -> Self {
Self { width: None, height: None, alt: None, title: None, src: None }
}
#[allow(clippy::mutable_key_type)]
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
let mut data = Self::new();
let mut remaining_attrs = BTreeSet::new();
for attr in attrs {
if attr.name.ns != ns!() {
remaining_attrs.insert(attr.clone());
continue;
}
match attr.name.local.as_bytes() {
b"width" => {
if let Ok(width) = attr.value.parse() {
data.width = Some(width);
} else {
remaining_attrs.insert(attr.clone());
}
}
b"height" => {
if let Ok(height) = attr.value.parse() {
data.height = Some(height);
} else {
remaining_attrs.insert(attr.clone());
}
}
b"alt" => data.alt = Some(attr.value.clone()),
b"title" => data.title = Some(attr.value.clone()),
b"src" => {
let uri = <&MxcUri>::from(attr.value.as_ref());
if uri.validate().is_ok() {
data.src = Some(uri.to_owned());
} else {
remaining_attrs.insert(attr.clone());
}
}
_ => {
remaining_attrs.insert(attr.clone());
}
}
}
(data, remaining_attrs)
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct DivData {
pub maths: Option<StrTendril>,
}
impl DivData {
fn new() -> Self {
Self { maths: None }
}
#[allow(clippy::mutable_key_type)]
fn parse(attrs: &BTreeSet<Attribute>) -> (Self, BTreeSet<Attribute>) {
let mut data = Self::new();
let mut remaining_attrs = BTreeSet::new();
for attr in attrs {
if attr.name.ns != ns!() {
remaining_attrs.insert(attr.clone());
continue;
}
match attr.name.local.as_bytes() {
b"data-mx-maths" => {
data.maths = Some(attr.value.clone());
}
_ => {
remaining_attrs.insert(attr.clone());
}
}
}
(data, remaining_attrs)
}
}