use std::collections::{BTreeSet, HashMap};
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64;
use devup_editor_core::{Block, BlockId};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CopiedBlocks {
pub roots: Vec<Block>,
#[serde(rename = "byId")]
pub by_id: HashMap<BlockId, Block>,
}
pub const DEVUP_PROPS_ATTR: &str = "data-devup-props";
fn props_skip_keys() -> &'static BTreeSet<&'static str> {
use std::sync::OnceLock;
static S: OnceLock<BTreeSet<&'static str>> = OnceLock::new();
S.get_or_init(|| {
let mut s = BTreeSet::new();
s.insert("colspan");
s.insert("rowspan");
s.insert("columns");
s.insert("indent");
s
})
}
#[must_use]
pub fn encode_props(props: Option<&Map<String, Value>>) -> String {
let Some(props) = props else {
return String::new();
};
let skip = props_skip_keys();
let mut filtered = Map::new();
for (k, v) in props {
if skip.contains(k.as_str()) {
continue;
}
if v.is_null() {
continue;
}
filtered.insert(k.clone(), v.clone());
}
if filtered.is_empty() {
return String::new();
}
let Ok(json) = serde_json::to_string(&Value::Object(filtered)) else {
return String::new();
};
BASE64.encode(json.as_bytes())
}
#[must_use]
pub fn decode_props(raw: &str) -> Option<Map<String, Value>> {
if raw.is_empty() {
return None;
}
if let Ok(bytes) = BASE64.decode(raw.as_bytes())
&& let Ok(text) = std::str::from_utf8(&bytes)
&& let Ok(Value::Object(map)) = serde_json::from_str::<Value>(text)
{
return Some(map);
}
if let Ok(Value::Object(map)) = serde_json::from_str::<Value>(raw) {
return Some(map);
}
None
}
#[must_use]
pub fn looks_like_xml(src: &str) -> bool {
let trimmed = src.trim_start();
if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
return true;
}
let mut chars = trimmed.chars();
if chars.next() != Some('<') {
return false;
}
match chars.next() {
Some(c) => c.is_ascii_alphabetic(),
None => false,
}
}
#[must_use]
pub fn strip_xml_prolog(src: &str) -> String {
let mut rest = src.trim_start();
loop {
if let Some(after) = rest.strip_prefix("<?xml")
&& let Some(end) = after.find("?>")
{
rest = after[end + 2..].trim_start();
continue;
}
if let Some(after) = rest.strip_prefix("<!DOCTYPE")
&& let Some(end) = after.find('>')
{
rest = after[end + 1..].trim_start();
continue;
}
if let Some(after) = rest.strip_prefix("<!--")
&& let Some(end) = after.find("-->")
{
rest = after[end + 3..].trim_start();
continue;
}
break;
}
rest.to_string()
}
#[must_use]
pub fn clean_html(html: &str) -> String {
let mut out = String::with_capacity(html.len());
let mut i = 0usize;
while i < html.len() {
if let Some(next) = skip_matched_region(html, i) {
i = next;
continue;
}
let rest = &html[i..];
let Some(ch) = rest.chars().next() else { break };
out.push(ch);
i += ch.len_utf8();
}
out
}
fn skip_matched_region(html: &str, i: usize) -> Option<usize> {
let rest = &html[i..];
if rest.starts_with("<!--") {
let comment_end = rest.find("-->")?;
let inner = rest[4..comment_end].trim().to_ascii_lowercase();
if inner == "startfragment" || inner == "endfragment" {
return Some(i + comment_end + 3);
}
return None;
}
if starts_with_case_insensitive(rest, "<o:p") {
let open_end = rest.find('>')?;
let after_open = &rest[open_end + 1..];
if let Some(close_rel) = find_case_insensitive(after_open, "</o:p>") {
return Some(i + open_end + 1 + close_rel + "</o:p>".len());
}
return Some(i + open_end + 1);
}
if starts_with_case_insensitive(rest, "</o:p") {
let end = rest.find('>')?;
return Some(i + end + 1);
}
None
}
fn starts_with_case_insensitive(haystack: &str, needle: &str) -> bool {
haystack.len() >= needle.len()
&& haystack.is_char_boundary(needle.len())
&& haystack[..needle.len()].eq_ignore_ascii_case(needle)
}
fn find_case_insensitive(haystack: &str, needle: &str) -> Option<usize> {
let needle_lower = needle.to_ascii_lowercase();
let needle_len = needle.len();
if needle_len == 0 || haystack.len() < needle_len {
return None;
}
for i in 0..=haystack.len().saturating_sub(needle_len) {
if haystack.is_char_boundary(i)
&& haystack[i..i + needle_len].eq_ignore_ascii_case(&needle_lower)
{
return Some(i);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn encode_empty_props() {
assert_eq!(encode_props(None), "");
let empty = Map::new();
assert_eq!(encode_props(Some(&empty)), "");
}
#[test]
fn encode_skips_structural_keys() {
let mut m = Map::new();
m.insert("colspan".into(), json!(2));
m.insert("rowspan".into(), json!(3));
m.insert("columns".into(), json!([]));
m.insert("indent".into(), json!(1));
assert_eq!(
encode_props(Some(&m)),
"",
"all four skip keys alone should produce no marker"
);
}
#[test]
fn encode_drops_null_values() {
let mut m = Map::new();
m.insert("backgroundColor".into(), Value::Null);
m.insert("borderColor".into(), json!("#f59e0b"));
let encoded = encode_props(Some(&m));
let decoded = decode_props(&encoded).unwrap();
assert!(!decoded.contains_key("backgroundColor"));
assert_eq!(decoded.get("borderColor"), Some(&json!("#f59e0b")));
}
#[test]
fn roundtrip_preserves_arbitrary_props() {
let mut m = Map::new();
m.insert("backgroundColor".into(), json!("#fef3c7"));
m.insert("borderWidth".into(), json!("2px"));
m.insert("padding".into(), json!(12));
m.insert("verticalAlign".into(), json!("bottom"));
let encoded = encode_props(Some(&m));
let decoded = decode_props(&encoded).unwrap();
assert_eq!(decoded, m);
}
#[test]
fn decode_plain_json_fallback() {
let raw = r##"{"backgroundColor":"#fef3c7"}"##;
let decoded = decode_props(raw).unwrap();
assert_eq!(decoded.get("backgroundColor"), Some(&json!("#fef3c7")));
}
#[test]
fn decode_rejects_garbage() {
assert!(decode_props("").is_none());
assert!(decode_props("not base64 and not json!").is_none());
}
#[test]
fn clean_html_strips_ms_markers() {
let input = "<!--StartFragment--><p>hi</p><!--EndFragment-->";
assert_eq!(clean_html(input), "<p>hi</p>");
}
#[test]
fn clean_html_strips_o_p_tags() {
let input = "<o:p>junk</o:p><p>real</p><o:p />";
assert_eq!(clean_html(input), "<p>real</p>");
}
#[test]
fn clean_html_case_insensitive() {
let input = "<!--STARTFRAGMENT--><p>x</p><!-- EndFragment -->";
assert_eq!(clean_html(input), "<p>x</p>");
}
#[test]
fn clean_html_preserves_unknown_comments() {
let input = "<!-- keep me --><p>x</p>";
assert_eq!(clean_html(input), "<!-- keep me --><p>x</p>");
}
#[test]
fn clean_html_leaves_unicode_intact() {
let input = "안녕<!--StartFragment-->세계";
assert_eq!(clean_html(input), "안녕세계");
}
#[test]
fn looks_like_xml_recognises_canonical_prolog() {
assert!(looks_like_xml("<?xml version='1.0'?>"));
assert!(looks_like_xml("<!DOCTYPE html>"));
}
#[test]
fn looks_like_xml_accepts_bare_tags() {
assert!(looks_like_xml("<h1>"));
assert!(looks_like_xml("<paragraph>body</paragraph>"));
}
#[test]
fn looks_like_xml_rejects_markdown_lookalikes() {
assert!(!looks_like_xml("not xml"));
assert!(!looks_like_xml("<3 love you"));
assert!(!looks_like_xml("< malformed"));
assert!(!looks_like_xml(""));
}
#[test]
fn strip_xml_prolog_removes_xml_declaration() {
assert_eq!(
strip_xml_prolog(r#"<?xml version="1.0"?><h1>T</h1>"#),
"<h1>T</h1>"
);
}
#[test]
fn strip_xml_prolog_removes_doctype() {
assert_eq!(strip_xml_prolog("<!DOCTYPE html><h1>T</h1>"), "<h1>T</h1>");
}
#[test]
fn strip_xml_prolog_handles_combinations() {
let out =
strip_xml_prolog(r#"<?xml version="1.0"?><!DOCTYPE foo><!-- comment --><h1>T</h1>"#);
assert_eq!(out, "<h1>T</h1>");
}
#[test]
fn strip_xml_prolog_trims_leading_whitespace() {
assert_eq!(
strip_xml_prolog(" <?xml version=\"1.0\"?> <h1>T</h1>"),
"<h1>T</h1>"
);
}
}