use devup_editor_core::{Mark, TextSpan, normalize_spans};
use markup5ever::interface::Attribute;
use markup5ever_rcdom::{Handle, NodeData};
use serde_json::{Map, Value};
use super::{TABLE_STRUCTURE_TAGS, is_heading_tag};
use crate::clipboard::decode_props;
pub(super) fn element_tag(node: &Handle) -> Option<String> {
if let NodeData::Element { name, .. } = &node.data {
Some(name.local.as_ref().to_ascii_lowercase())
} else {
None
}
}
pub(super) fn element_attrs(node: &Handle) -> Vec<Attribute> {
if let NodeData::Element { attrs, .. } = &node.data {
attrs.borrow().clone()
} else {
Vec::new()
}
}
pub(super) fn attr_value(node: &Handle, name: &str) -> Option<String> {
if let NodeData::Element { attrs, .. } = &node.data {
for a in attrs.borrow().iter() {
if a.name.local.as_ref().eq_ignore_ascii_case(name) {
return Some(a.value.as_ref().to_string());
}
}
}
None
}
pub(super) fn attrs_contains(attrs: &[Attribute], name: &str, value: &str) -> bool {
attrs.iter().any(|a| {
a.name.local.as_ref().eq_ignore_ascii_case(name)
&& a.value.as_ref().eq_ignore_ascii_case(value)
})
}
pub(super) fn has_class(attrs: &[Attribute], class: &str) -> bool {
attrs.iter().any(|a| {
if !a.name.local.as_ref().eq_ignore_ascii_case("class") {
return false;
}
a.value.split_ascii_whitespace().any(|c| c == class)
})
}
pub(super) fn find_body(node: &Handle) -> Option<Handle> {
if element_tag(node).as_deref() == Some("body") {
return Some(node.clone());
}
for child in node.children.borrow().iter() {
if let Some(found) = find_body(child) {
return Some(found);
}
}
None
}
pub(super) fn direct_children_of_tag_any(node: &Handle, tags: &[&str]) -> Vec<Handle> {
node.children
.borrow()
.iter()
.filter(|c| {
element_tag(c)
.as_deref()
.map(|t| tags.contains(&t))
.unwrap_or(false)
})
.cloned()
.collect()
}
pub(super) fn collect_table_rows(table_el: &Handle) -> Vec<Handle> {
let mut rows: Vec<Handle> = Vec::new();
for child in table_el.children.borrow().iter() {
match element_tag(child).as_deref() {
Some("tbody" | "thead" | "tfoot") => {
for tr in child.children.borrow().iter() {
if element_tag(tr).as_deref() == Some("tr") {
rows.push(tr.clone());
}
}
}
Some("tr") => rows.push(child.clone()),
_ => {}
}
}
rows
}
#[derive(Default, Debug, Clone)]
pub(super) struct MarkSet {
bold: bool,
italic: bool,
underline: bool,
strike: bool,
code: bool,
link: Option<String>,
color: Option<String>,
highlight: Option<String>,
}
impl MarkSet {
pub(super) fn empty() -> Self {
Self::default()
}
pub(super) fn to_marks(&self) -> Vec<Mark> {
let mut out = Vec::new();
if self.bold {
out.push(Mark::bold());
}
if self.italic {
out.push(Mark::italic());
}
if self.underline {
out.push(Mark::underline());
}
if self.strike {
out.push(Mark::strike());
}
if self.code {
out.push(Mark::code());
}
if let Some(href) = &self.link {
let mut a = Map::new();
a.insert("href".into(), Value::String(href.clone()));
out.push(Mark::with_attrs("link", a));
}
if let Some(color) = &self.color {
let mut style = Map::new();
style.insert("color".into(), Value::String(color.clone()));
let mut a = Map::new();
a.insert("style".into(), Value::Object(style));
out.push(Mark::with_attrs("color", a));
}
if let Some(bg) = &self.highlight {
let mut style = Map::new();
style.insert("backgroundColor".into(), Value::String(bg.clone()));
let mut a = Map::new();
a.insert("style".into(), Value::Object(style));
out.push(Mark::with_attrs("highlight", a));
}
out
}
}
pub(super) fn extend_marks(base: &MarkSet, tag: &str, attrs: &[Attribute]) -> MarkSet {
let mut next = base.clone();
match tag {
"strong" | "b" => next.bold = true,
"em" | "i" => next.italic = true,
"u" | "ins" => next.underline = true,
"s" | "strike" | "del" => next.strike = true,
"code" | "kbd" | "samp" => next.code = true,
"a" => {
if let Some(href) = attrs
.iter()
.find(|a| a.name.local.as_ref().eq_ignore_ascii_case("href"))
{
let v = href.value.as_ref().to_string();
if !v.is_empty() {
next.link = Some(v);
}
}
}
_ => {}
}
if TABLE_STRUCTURE_TAGS.contains(&tag) {
return next;
}
if let Some(style_attr) = attrs
.iter()
.find(|a| a.name.local.as_ref().eq_ignore_ascii_case("style"))
.map(|a| a.value.as_ref().to_string())
{
let decl = parse_inline_style(&style_attr);
if let Some(fw) = decl.font_weight
&& is_bold_weight(&fw)
{
next.bold = true;
}
if let Some(fs) = decl.font_style
&& (fs == "italic" || fs == "oblique")
{
next.italic = true;
}
if let Some(td) = decl.text_decoration {
if td.contains("underline") {
next.underline = true;
}
if td.contains("line-through") {
next.strike = true;
}
}
if let Some(color) = decl.color {
next.color = Some(color);
}
if let Some(bg) = decl.background_color {
next.highlight = Some(bg);
}
}
next
}
pub(super) fn is_bold_weight(v: &str) -> bool {
if v.eq_ignore_ascii_case("bold") || v.eq_ignore_ascii_case("bolder") {
return true;
}
v.parse::<u32>().map(|n| n >= 600).unwrap_or(false)
}
pub(super) fn extract_spans(node: &Handle) -> Vec<TextSpan> {
let initial = match &node.data {
NodeData::Element { name, attrs, .. } => extend_marks(
&MarkSet::empty(),
&name.local.as_ref().to_ascii_lowercase(),
&attrs.borrow(),
),
_ => MarkSet::empty(),
};
let mut spans: Vec<TextSpan> = Vec::new();
for child in node.children.borrow().iter() {
collect_inline_into(child, &mut spans, &initial);
}
normalize_spans(&mut spans);
spans
}
pub(super) fn collect_inline_into(node: &Handle, out: &mut Vec<TextSpan>, marks: &MarkSet) {
match &node.data {
NodeData::Text { contents } => {
let text = contents.borrow().to_string();
if !text.is_empty() {
out.push(TextSpan::with_marks(text, marks.to_marks()));
}
}
NodeData::Element { name, attrs, .. } => {
let tag = name.local.as_ref().to_ascii_lowercase();
if tag == "br" {
out.push(TextSpan::with_marks("\n", marks.to_marks()));
return;
}
if tag == "input" {
return; }
let next = extend_marks(marks, &tag, &attrs.borrow());
for c in node.children.borrow().iter() {
collect_inline_into(c, out, &next);
}
}
_ => {}
}
}
pub(super) fn extract_spans_from_li(li: &Handle) -> Vec<TextSpan> {
let filtered = clone_node_filter_direct(li, &["ul", "ol", "details"]);
extract_spans(&filtered)
}
pub(super) fn collect_raw_text(node: &Handle) -> String {
let mut s = String::new();
walk_text(node, &mut s);
s
}
pub(super) fn walk_text(node: &Handle, out: &mut String) {
match &node.data {
NodeData::Text { contents } => out.push_str(&contents.borrow()),
NodeData::Element { .. } => {
for c in node.children.borrow().iter() {
walk_text(c, out);
}
}
_ => {}
}
}
#[derive(Default, Debug, Clone)]
pub(super) struct InlineStyle {
pub(super) background_color: Option<String>,
pub(super) border_color: Option<String>,
pub(super) border_width: Option<String>,
pub(super) border_style: Option<String>,
pub(super) vertical_align: Option<String>,
pub(super) padding: Option<String>,
pub(super) height: Option<String>,
pub(super) width: Option<String>,
pub(super) color: Option<String>,
pub(super) font_weight: Option<String>,
pub(super) font_style: Option<String>,
pub(super) text_decoration: Option<String>,
}
pub(super) fn parse_inline_style(style_attr: &str) -> InlineStyle {
let mut out = InlineStyle::default();
for decl in style_attr.split(';') {
let Some((k, v)) = decl.split_once(':') else {
continue;
};
let key = k.trim().to_ascii_lowercase();
let value = v.trim();
if value.is_empty() {
continue;
}
match key.as_str() {
"background-color" => out.background_color = Some(value.to_string()),
"border-color" => out.border_color = Some(value.to_string()),
"border-width" => out.border_width = Some(value.to_string()),
"border-style" => out.border_style = Some(value.to_string()),
"vertical-align" => out.vertical_align = Some(value.to_string()),
"padding" => out.padding = Some(value.to_string()),
"height" => out.height = Some(value.to_string()),
"width" => out.width = Some(value.to_string()),
"color" => out.color = Some(value.to_string()),
"font-weight" => out.font_weight = Some(value.to_string()),
"font-style" => out.font_style = Some(value.to_string()),
"text-decoration" | "text-decoration-line" => {
let cur = out.text_decoration.unwrap_or_default();
let combined = if cur.is_empty() {
value.to_string()
} else {
format!("{cur} {value}")
};
out.text_decoration = Some(combined);
}
_ => {}
}
}
out
}
pub(super) fn extract_cell_props(cell_el: &Handle) -> Option<Map<String, Value>> {
let mut out = decode_props_from_element(cell_el).unwrap_or_default();
if let Some(cs) = attr_value(cell_el, "colspan").and_then(|s| s.parse::<u64>().ok())
&& cs > 1
{
out.insert("colspan".into(), Value::from(cs));
}
if let Some(rs) = attr_value(cell_el, "rowspan").and_then(|s| s.parse::<u64>().ok())
&& rs > 1
{
out.insert("rowspan".into(), Value::from(rs));
}
if let Some(style_attr) = attr_value(cell_el, "style") {
let decl = parse_inline_style(&style_attr);
if !out.contains_key("backgroundColor")
&& let Some(v) = decl.background_color
{
out.insert("backgroundColor".into(), Value::String(v));
}
if !out.contains_key("borderColor")
&& let Some(v) = decl.border_color
{
out.insert("borderColor".into(), Value::String(v));
}
if !out.contains_key("borderWidth")
&& let Some(v) = decl.border_width
{
out.insert("borderWidth".into(), Value::String(v));
}
if !out.contains_key("borderStyle")
&& let Some(v) = decl.border_style
{
out.insert("borderStyle".into(), Value::String(v));
}
if !out.contains_key("verticalAlign")
&& let Some(v) = decl.vertical_align
{
out.insert("verticalAlign".into(), Value::String(v));
}
if !out.contains_key("padding")
&& let Some(v) = decl.padding
{
out.insert("padding".into(), Value::String(v));
}
}
if out.is_empty() { None } else { Some(out) }
}
pub(super) fn extract_row_props(row_el: &Handle) -> Option<Map<String, Value>> {
let mut out = decode_props_from_element(row_el).unwrap_or_default();
if !out.contains_key("height")
&& let Some(style_attr) = attr_value(row_el, "style")
&& let Some(v) = parse_inline_style(&style_attr).height
{
out.insert("height".into(), Value::String(v));
}
if let Some(Value::String(s)) = out.get("height") {
let parsed = s
.trim_end_matches("px")
.trim_end_matches("PX")
.parse::<f64>()
.ok();
let normalised = parsed.filter(|v| v.is_finite() && *v > 0.0);
match normalised {
#[allow(clippy::float_cmp)]
Some(v) if v == v.trunc() => {
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
let as_int = v as i64;
out.insert("height".into(), Value::from(as_int));
}
Some(v) => {
match serde_json::Number::from_f64(v) {
Some(n) => {
out.insert("height".into(), Value::from(n));
}
None => {
out.remove("height");
}
}
}
None => {
out.remove("height");
}
}
}
if out.is_empty() { None } else { Some(out) }
}
pub(super) fn decode_props_from_element(el: &Handle) -> Option<Map<String, Value>> {
let raw = attr_value(el, "data-devup-props")?;
decode_props(&raw)
}
pub(super) fn extract_colgroup_widths(table_el: &Handle, cols: usize) -> Option<Vec<f64>> {
let colgroup = table_el
.children
.borrow()
.iter()
.find(|c| element_tag(c).as_deref() == Some("colgroup"))
.cloned()?;
let col_els: Vec<Handle> = colgroup
.children
.borrow()
.iter()
.filter(|c| element_tag(c).as_deref() == Some("col"))
.cloned()
.collect();
if col_els.is_empty() {
return None;
}
let mut widths = Vec::with_capacity(cols);
let mut saw_any = false;
for i in 0..cols {
let col = col_els.get(i);
let w = col
.and_then(|c| attr_value(c, "style"))
.and_then(|s| parse_inline_style(&s).width)
.and_then(|s| {
let trimmed = s.trim().trim_end_matches("px");
trimmed.parse::<f64>().ok().filter(|n| *n > 0.0)
})
.or_else(|| {
col.and_then(|c| attr_value(c, "width"))
.and_then(|s| s.parse::<f64>().ok())
.filter(|n| *n > 0.0)
});
match w {
Some(n) => {
widths.push(n);
saw_any = true;
}
None => widths.push(120.0),
}
}
if saw_any { Some(widths) } else { None }
}
use std::cell::RefCell;
use std::rc::Rc;
pub(super) fn build_synthetic_parent(handles: &[Handle]) -> Handle {
let synthetic = markup5ever_rcdom::Node::new(NodeData::Element {
name: html5ever::QualName::new(
None,
markup5ever::ns!(html),
markup5ever::local_name!("div"),
),
attrs: RefCell::new(Vec::new()),
template_contents: RefCell::new(None),
mathml_annotation_xml_integration_point: false,
});
for h in handles {
synthetic.children.borrow_mut().push(h.clone());
}
synthetic
}
pub(super) fn clone_node_filter_direct(node: &Handle, strip_tags: &[&str]) -> Handle {
let cloned = deep_clone_element(node);
cloned.children.borrow_mut().retain(|c| {
element_tag(c)
.map(|t| !strip_tags.contains(&t.as_str()))
.unwrap_or(true)
});
cloned
}
pub(super) fn clone_node_without_checkboxes(node: &Handle) -> Handle {
let cloned = deep_clone_element(node);
strip_checkboxes_in_place(&cloned);
cloned
}
pub(super) fn strip_checkboxes_in_place(node: &Handle) {
node.children.borrow_mut().retain(|c| {
if let Some(t) = element_tag(c)
&& t == "input"
{
let is_cb = element_attrs(c).iter().any(|a| {
a.name.local.as_ref().eq_ignore_ascii_case("type")
&& a.value.as_ref().eq_ignore_ascii_case("checkbox")
});
if is_cb {
return false;
}
}
true
});
for c in node.children.borrow().iter() {
strip_checkboxes_in_place(c);
}
}
pub(super) fn strip_nested_blocks(node: &Handle) -> Handle {
let cloned = deep_clone_element(node);
cloned.children.borrow_mut().retain(|c| {
if let Some(t) = element_tag(c) {
!matches!(
t.as_str(),
"ul" | "ol" | "details" | "pre" | "blockquote" | "table"
)
} else {
true
}
});
cloned
}
pub(super) fn deep_clone_element(node: &Handle) -> Handle {
let data = match &node.data {
NodeData::Element { name, attrs, .. } => NodeData::Element {
name: name.clone(),
attrs: RefCell::new(attrs.borrow().clone()),
template_contents: RefCell::new(None),
mathml_annotation_xml_integration_point: false,
},
NodeData::Text { contents } => NodeData::Text {
contents: RefCell::new(contents.borrow().clone()),
},
NodeData::Comment { contents } => NodeData::Comment {
contents: contents.clone(),
},
NodeData::Doctype {
name,
public_id,
system_id,
} => NodeData::Doctype {
name: name.clone(),
public_id: public_id.clone(),
system_id: system_id.clone(),
},
NodeData::ProcessingInstruction { target, contents } => NodeData::ProcessingInstruction {
target: target.clone(),
contents: contents.clone(),
},
NodeData::Document => NodeData::Document,
};
let new_node = Rc::new(markup5ever_rcdom::Node {
parent: std::cell::Cell::new(None),
children: RefCell::new(Vec::new()),
data,
});
for c in node.children.borrow().iter() {
new_node.children.borrow_mut().push(deep_clone_element(c));
}
new_node
}
pub(super) fn is_notion_v3_toggle(li: &Handle) -> bool {
let block_children_count = li
.children
.borrow()
.iter()
.filter(|c| {
if let Some(t) = element_tag(c) {
matches!(
t.as_str(),
"p" | "div" | "ul" | "ol" | "blockquote" | "pre" | "table" | "details"
) || is_heading_tag(&t)
} else {
false
}
})
.count();
if block_children_count < 2 {
return false;
}
li.children
.borrow()
.iter()
.find_map(element_tag)
.map(|t| t == "p")
.unwrap_or(false)
}
pub(super) fn detect_direct_checkbox(li: &Handle) -> Option<bool> {
for c in li.children.borrow().iter() {
if let Some(t) = element_tag(c) {
if t == "input" && is_checkbox(c) {
return Some(checkbox_is_checked(c));
}
if t == "div" {
for gc in c.children.borrow().iter() {
if element_tag(gc).as_deref() == Some("input") && is_checkbox(gc) {
return Some(checkbox_is_checked(gc));
}
}
}
}
}
None
}
pub(super) fn detect_any_checkbox(li: &Handle) -> Option<bool> {
fn walk(node: &Handle) -> Option<bool> {
if let Some(t) = element_tag(node)
&& t == "input"
&& is_checkbox(node)
{
return Some(checkbox_is_checked(node));
}
for c in node.children.borrow().iter() {
if let Some(r) = walk(c) {
return Some(r);
}
}
None
}
walk(li)
}
pub(super) fn is_checkbox(input: &Handle) -> bool {
attr_value(input, "type")
.as_deref()
.unwrap_or("")
.eq_ignore_ascii_case("checkbox")
}
pub(super) fn checkbox_is_checked(input: &Handle) -> bool {
if let NodeData::Element { attrs, .. } = &input.data {
attrs
.borrow()
.iter()
.any(|a| a.name.local.as_ref().eq_ignore_ascii_case("checked"))
} else {
false
}
}
pub(super) fn has_descendant_with_class(node: &Handle, class: &str) -> bool {
if let NodeData::Element { attrs, .. } = &node.data
&& has_class(&attrs.borrow(), class)
{
return true;
}
for c in node.children.borrow().iter() {
if has_descendant_with_class(c, class) {
return true;
}
}
false
}
pub(super) fn find_descendant_with_any_class(node: &Handle, classes: &[&str]) -> Option<Handle> {
if let NodeData::Element { attrs, .. } = &node.data
&& classes.iter().any(|c| has_class(&attrs.borrow(), c))
{
return Some(node.clone());
}
for c in node.children.borrow().iter() {
if let Some(found) = find_descendant_with_any_class(c, classes) {
return Some(found);
}
}
None
}
pub(super) fn is_all_whitespace(spans: &[TextSpan]) -> bool {
spans.iter().all(|s| s.text.trim().is_empty())
}