use core::ops::Range;
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use regex::{NoExpand, Regex};
use thiserror::Error;
use crate::escape::{escape_text, is_valid_name, push_escaped_attr};
use crate::selector::Selector;
use crate::types::ElementRef;
use crate::Markdown;
#[derive(Debug, Clone, Error, PartialEq, Eq)]
#[non_exhaustive]
pub enum MutateError {
#[error("invalid XML attribute name {name:?}")]
InvalidAttrName {
name: String,
},
#[error("duplicate attribute name {name:?} in update slice")]
DuplicateAttrName {
name: String,
},
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
#[non_exhaustive]
pub struct MutationReport {
pub output: String,
pub applied: usize,
pub skipped_overlaps: usize,
pub skipped_self_closing: usize,
}
pub(crate) fn update(doc: &Markdown, sel: &Selector, new_attrs: &[(&str, &str)]) -> String {
try_update(doc, sel, new_attrs)
.unwrap_or_else(|e| panic!("update() called with invalid attrs: {e}"))
.output
}
pub(crate) fn replace_content(doc: &Markdown, sel: &Selector, new_body: &str) -> String {
splice_content(doc, sel, new_body).output
}
pub(crate) fn replace_in(
doc: &Markdown,
sel: &Selector,
pattern: &Regex,
replacement: &str,
) -> String {
splice_regex(doc, sel, pattern, replacement).output
}
pub(crate) fn replace_text(doc: &Markdown, sel: &Selector, new_body: &str) -> String {
let escaped = escape_text(new_body).into_owned();
splice_content(doc, sel, &escaped).output
}
pub(crate) fn replace_text_in(
doc: &Markdown,
sel: &Selector,
pattern: &Regex,
replacement: &str,
) -> String {
let escaped = escape_text(replacement).into_owned();
splice_regex_with(doc, sel, pattern, &escaped).output
}
pub(crate) fn try_update(
doc: &Markdown,
sel: &Selector,
new_attrs: &[(&str, &str)],
) -> Result<MutationReport, MutateError> {
check_new_attrs(new_attrs)?;
let raw = doc.raw();
let mut splices: Vec<(Range<usize>, Cow<'_, str>)> = Vec::new();
for el in doc.select(sel) {
let open_tag = open_tag_span(&el);
let self_close = el.is_self_closing();
let rewritten = rewrite_open_tag(&el, new_attrs, self_close);
splices.push((open_tag, Cow::Owned(rewritten)));
}
Ok(apply_splices(raw, splices))
}
pub(crate) fn try_replace_content(
doc: &Markdown,
sel: &Selector,
new_body: &str,
) -> MutationReport {
splice_content(doc, sel, new_body)
}
pub(crate) fn try_replace_in(
doc: &Markdown,
sel: &Selector,
pattern: &Regex,
replacement: &str,
) -> MutationReport {
splice_regex(doc, sel, pattern, replacement)
}
fn splice_content<'a>(doc: &'a Markdown, sel: &Selector, new_body: &'a str) -> MutationReport {
let raw = doc.raw();
let mut self_closing_skipped = 0usize;
let mut splices: Vec<(Range<usize>, Cow<'_, str>)> = Vec::new();
for el in doc.select(sel) {
if el.is_self_closing() {
self_closing_skipped += 1;
continue;
}
splices.push((el.content_range(), Cow::Borrowed(new_body)));
}
let mut report = apply_splices(raw, splices);
report.skipped_self_closing = self_closing_skipped;
report
}
fn splice_regex(
doc: &Markdown,
sel: &Selector,
pattern: &Regex,
replacement: &str,
) -> MutationReport {
splice_regex_with(doc, sel, pattern, replacement)
}
fn splice_regex_with(
doc: &Markdown,
sel: &Selector,
pattern: &Regex,
replacement: &str,
) -> MutationReport {
let raw = doc.raw();
let mut self_closing_skipped = 0usize;
let mut splices: Vec<(Range<usize>, Cow<'_, str>)> = Vec::new();
for el in doc.select(sel) {
if el.is_self_closing() {
self_closing_skipped += 1;
continue;
}
let range = el.content_range();
let body = &raw[range.clone()];
let replaced = pattern.replace_all(body, NoExpand(replacement));
let payload: Cow<'_, str> = match replaced {
std::borrow::Cow::Borrowed(_) => Cow::Borrowed(body),
std::borrow::Cow::Owned(s) => Cow::Owned(s),
};
splices.push((range, payload));
}
let mut report = apply_splices(raw, splices);
report.skipped_self_closing = self_closing_skipped;
report
}
fn check_new_attrs(new_attrs: &[(&str, &str)]) -> Result<(), MutateError> {
let mut seen: HashSet<&str> = HashSet::with_capacity(new_attrs.len());
for (k, _) in new_attrs {
if !is_valid_name(k) {
return Err(MutateError::InvalidAttrName {
name: (*k).to_string(),
});
}
if !seen.insert(*k) {
return Err(MutateError::DuplicateAttrName {
name: (*k).to_string(),
});
}
}
Ok(())
}
fn open_tag_span(el: &ElementRef<'_>) -> Range<usize> {
let span = el.location();
let start = span.start.offset_usize();
if el.is_self_closing() {
start..span.end.offset_usize()
} else {
start..el.content_range().start
}
}
fn rewrite_open_tag(el: &ElementRef<'_>, new_attrs: &[(&str, &str)], self_close: bool) -> String {
let use_map = new_attrs.len() >= ATTR_INDEX_THRESHOLD;
let index: Option<HashMap<&str, usize>> = if use_map {
let mut m = HashMap::with_capacity(new_attrs.len());
for (i, (k, _)) in new_attrs.iter().enumerate() {
m.insert(*k, i);
}
Some(m)
} else {
None
};
let mut applied = vec![false; new_attrs.len()];
let mut out = String::new();
out.push('<');
out.push_str(el.tag());
for (name, existing) in el.attrs() {
out.push(' ');
let lookup = index
.as_ref()
.and_then(|m| m.get(name).copied())
.or_else(|| new_attrs.iter().position(|(k, _)| *k == name));
if let Some(i) = lookup {
write_attr(&mut out, name, new_attrs[i].1);
applied[i] = true;
} else {
write_attr(&mut out, name, existing);
}
}
for (i, (k, v)) in new_attrs.iter().enumerate() {
if !applied[i] {
out.push(' ');
write_attr(&mut out, k, v);
}
}
if self_close {
out.push_str("/>");
} else {
out.push('>');
}
out
}
const ATTR_INDEX_THRESHOLD: usize = 8;
fn write_attr(out: &mut String, name: &str, value: &str) {
out.push_str(name);
out.push_str("=\"");
push_escaped_attr(out, value);
out.push('"');
}
fn apply_splices(raw: &str, mut splices: Vec<(Range<usize>, Cow<'_, str>)>) -> MutationReport {
if splices.is_empty() {
return MutationReport {
output: raw.to_string(),
applied: 0,
skipped_overlaps: 0,
skipped_self_closing: 0,
};
}
splices.sort_by(|a, b| {
a.0.start
.cmp(&b.0.start)
.then_with(|| b.0.end.cmp(&a.0.end))
});
let mut out = String::with_capacity(raw.len());
let mut cursor: usize = 0;
let mut applied = 0usize;
let mut skipped = 0usize;
for (range, replacement) in splices {
if range.start < cursor {
skipped += 1;
continue;
}
if range.start > raw.len() || range.end > raw.len() || range.start > range.end {
skipped += 1;
continue;
}
out.push_str(&raw[cursor..range.start]);
out.push_str(&replacement);
cursor = range.end;
applied += 1;
}
if cursor < raw.len() {
out.push_str(&raw[cursor..]);
}
MutationReport {
output: out,
applied,
skipped_overlaps: skipped,
skipped_self_closing: 0,
}
}