devup_editor_html/
clipboard.rs1use std::collections::{BTreeSet, HashMap};
7
8use base64::Engine;
9use base64::engine::general_purpose::STANDARD as BASE64;
10use devup_editor_core::{Block, BlockId};
11use serde::{Deserialize, Serialize};
12use serde_json::{Map, Value};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct CopiedBlocks {
27 pub roots: Vec<Block>,
28 #[serde(rename = "byId")]
29 pub by_id: HashMap<BlockId, Block>,
30}
31
32pub const DEVUP_PROPS_ATTR: &str = "data-devup-props";
35
36fn props_skip_keys() -> &'static BTreeSet<&'static str> {
41 use std::sync::OnceLock;
42 static S: OnceLock<BTreeSet<&'static str>> = OnceLock::new();
43 S.get_or_init(|| {
44 let mut s = BTreeSet::new();
45 s.insert("colspan");
47 s.insert("rowspan");
48 s.insert("columns");
50 s.insert("indent");
52 s
53 })
54}
55
56#[must_use]
65pub fn encode_props(props: Option<&Map<String, Value>>) -> String {
66 let Some(props) = props else {
67 return String::new();
68 };
69 let skip = props_skip_keys();
70 let mut filtered = Map::new();
71 for (k, v) in props {
72 if skip.contains(k.as_str()) {
73 continue;
74 }
75 if v.is_null() {
76 continue;
77 }
78 filtered.insert(k.clone(), v.clone());
79 }
80 if filtered.is_empty() {
81 return String::new();
82 }
83 let Ok(json) = serde_json::to_string(&Value::Object(filtered)) else {
84 return String::new();
85 };
86 BASE64.encode(json.as_bytes())
87}
88
89#[must_use]
96pub fn decode_props(raw: &str) -> Option<Map<String, Value>> {
97 if raw.is_empty() {
98 return None;
99 }
100
101 if let Ok(bytes) = BASE64.decode(raw.as_bytes())
103 && let Ok(text) = std::str::from_utf8(&bytes)
104 && let Ok(Value::Object(map)) = serde_json::from_str::<Value>(text)
105 {
106 return Some(map);
107 }
108
109 if let Ok(Value::Object(map)) = serde_json::from_str::<Value>(raw) {
111 return Some(map);
112 }
113
114 None
115}
116
117#[must_use]
127pub fn looks_like_xml(src: &str) -> bool {
128 let trimmed = src.trim_start();
129 if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
130 return true;
131 }
132 let mut chars = trimmed.chars();
133 if chars.next() != Some('<') {
134 return false;
135 }
136 match chars.next() {
137 Some(c) => c.is_ascii_alphabetic(),
138 None => false,
139 }
140}
141
142#[must_use]
151pub fn strip_xml_prolog(src: &str) -> String {
152 let mut rest = src.trim_start();
153 loop {
154 if let Some(after) = rest.strip_prefix("<?xml")
155 && let Some(end) = after.find("?>")
156 {
157 rest = after[end + 2..].trim_start();
158 continue;
159 }
160 if let Some(after) = rest.strip_prefix("<!DOCTYPE")
161 && let Some(end) = after.find('>')
162 {
163 rest = after[end + 1..].trim_start();
164 continue;
165 }
166 if let Some(after) = rest.strip_prefix("<!--")
167 && let Some(end) = after.find("-->")
168 {
169 rest = after[end + 3..].trim_start();
170 continue;
171 }
172 break;
173 }
174 rest.to_string()
175}
176
177#[must_use]
186pub fn clean_html(html: &str) -> String {
187 let mut out = String::with_capacity(html.len());
188 let mut i = 0usize;
189 while i < html.len() {
190 if let Some(next) = skip_matched_region(html, i) {
191 i = next;
192 continue;
193 }
194 let rest = &html[i..];
196 let Some(ch) = rest.chars().next() else { break };
197 out.push(ch);
198 i += ch.len_utf8();
199 }
200 out
201}
202
203fn skip_matched_region(html: &str, i: usize) -> Option<usize> {
207 let rest = &html[i..];
208
209 if rest.starts_with("<!--") {
212 let comment_end = rest.find("-->")?;
213 let inner = rest[4..comment_end].trim().to_ascii_lowercase();
214 if inner == "startfragment" || inner == "endfragment" {
215 return Some(i + comment_end + 3);
216 }
217 return None;
218 }
219
220 if starts_with_case_insensitive(rest, "<o:p") {
222 let open_end = rest.find('>')?;
223 let after_open = &rest[open_end + 1..];
224 if let Some(close_rel) = find_case_insensitive(after_open, "</o:p>") {
225 return Some(i + open_end + 1 + close_rel + "</o:p>".len());
226 }
227 return Some(i + open_end + 1);
228 }
229
230 if starts_with_case_insensitive(rest, "</o:p") {
232 let end = rest.find('>')?;
233 return Some(i + end + 1);
234 }
235
236 None
237}
238
239fn starts_with_case_insensitive(haystack: &str, needle: &str) -> bool {
240 haystack.len() >= needle.len()
241 && haystack.is_char_boundary(needle.len())
242 && haystack[..needle.len()].eq_ignore_ascii_case(needle)
243}
244
245fn find_case_insensitive(haystack: &str, needle: &str) -> Option<usize> {
246 let needle_lower = needle.to_ascii_lowercase();
247 let needle_len = needle.len();
248 if needle_len == 0 || haystack.len() < needle_len {
249 return None;
250 }
251 for i in 0..=haystack.len().saturating_sub(needle_len) {
252 if haystack.is_char_boundary(i)
253 && haystack[i..i + needle_len].eq_ignore_ascii_case(&needle_lower)
254 {
255 return Some(i);
256 }
257 }
258 None
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264 use serde_json::json;
265
266 #[test]
267 fn encode_empty_props() {
268 assert_eq!(encode_props(None), "");
269 let empty = Map::new();
270 assert_eq!(encode_props(Some(&empty)), "");
271 }
272
273 #[test]
274 fn encode_skips_structural_keys() {
275 let mut m = Map::new();
276 m.insert("colspan".into(), json!(2));
277 m.insert("rowspan".into(), json!(3));
278 m.insert("columns".into(), json!([]));
279 m.insert("indent".into(), json!(1));
280 assert_eq!(
281 encode_props(Some(&m)),
282 "",
283 "all four skip keys alone should produce no marker"
284 );
285 }
286
287 #[test]
288 fn encode_drops_null_values() {
289 let mut m = Map::new();
290 m.insert("backgroundColor".into(), Value::Null);
291 m.insert("borderColor".into(), json!("#f59e0b"));
292 let encoded = encode_props(Some(&m));
293 let decoded = decode_props(&encoded).unwrap();
294 assert!(!decoded.contains_key("backgroundColor"));
295 assert_eq!(decoded.get("borderColor"), Some(&json!("#f59e0b")));
296 }
297
298 #[test]
299 fn roundtrip_preserves_arbitrary_props() {
300 let mut m = Map::new();
301 m.insert("backgroundColor".into(), json!("#fef3c7"));
302 m.insert("borderWidth".into(), json!("2px"));
303 m.insert("padding".into(), json!(12));
304 m.insert("verticalAlign".into(), json!("bottom"));
305 let encoded = encode_props(Some(&m));
306 let decoded = decode_props(&encoded).unwrap();
307 assert_eq!(decoded, m);
308 }
309
310 #[test]
311 fn decode_plain_json_fallback() {
312 let raw = r##"{"backgroundColor":"#fef3c7"}"##;
314 let decoded = decode_props(raw).unwrap();
315 assert_eq!(decoded.get("backgroundColor"), Some(&json!("#fef3c7")));
316 }
317
318 #[test]
319 fn decode_rejects_garbage() {
320 assert!(decode_props("").is_none());
321 assert!(decode_props("not base64 and not json!").is_none());
322 }
323
324 #[test]
325 fn clean_html_strips_ms_markers() {
326 let input = "<!--StartFragment--><p>hi</p><!--EndFragment-->";
327 assert_eq!(clean_html(input), "<p>hi</p>");
328 }
329
330 #[test]
331 fn clean_html_strips_o_p_tags() {
332 let input = "<o:p>junk</o:p><p>real</p><o:p />";
333 assert_eq!(clean_html(input), "<p>real</p>");
335 }
336
337 #[test]
338 fn clean_html_case_insensitive() {
339 let input = "<!--STARTFRAGMENT--><p>x</p><!-- EndFragment -->";
340 assert_eq!(clean_html(input), "<p>x</p>");
341 }
342
343 #[test]
344 fn clean_html_preserves_unknown_comments() {
345 let input = "<!-- keep me --><p>x</p>";
346 assert_eq!(clean_html(input), "<!-- keep me --><p>x</p>");
347 }
348
349 #[test]
350 fn clean_html_leaves_unicode_intact() {
351 let input = "안녕<!--StartFragment-->세계";
352 assert_eq!(clean_html(input), "안녕세계");
353 }
354
355 #[test]
356 fn looks_like_xml_recognises_canonical_prolog() {
357 assert!(looks_like_xml("<?xml version='1.0'?>"));
358 assert!(looks_like_xml("<!DOCTYPE html>"));
359 }
360
361 #[test]
362 fn looks_like_xml_accepts_bare_tags() {
363 assert!(looks_like_xml("<h1>"));
364 assert!(looks_like_xml("<paragraph>body</paragraph>"));
365 }
366
367 #[test]
368 fn looks_like_xml_rejects_markdown_lookalikes() {
369 assert!(!looks_like_xml("not xml"));
370 assert!(!looks_like_xml("<3 love you"));
371 assert!(!looks_like_xml("< malformed"));
372 assert!(!looks_like_xml(""));
373 }
374
375 #[test]
376 fn strip_xml_prolog_removes_xml_declaration() {
377 assert_eq!(
378 strip_xml_prolog(r#"<?xml version="1.0"?><h1>T</h1>"#),
379 "<h1>T</h1>"
380 );
381 }
382
383 #[test]
384 fn strip_xml_prolog_removes_doctype() {
385 assert_eq!(strip_xml_prolog("<!DOCTYPE html><h1>T</h1>"), "<h1>T</h1>");
386 }
387
388 #[test]
389 fn strip_xml_prolog_handles_combinations() {
390 let out =
391 strip_xml_prolog(r#"<?xml version="1.0"?><!DOCTYPE foo><!-- comment --><h1>T</h1>"#);
392 assert_eq!(out, "<h1>T</h1>");
393 }
394
395 #[test]
396 fn strip_xml_prolog_trims_leading_whitespace() {
397 assert_eq!(
398 strip_xml_prolog(" <?xml version=\"1.0\"?> <h1>T</h1>"),
399 "<h1>T</h1>"
400 );
401 }
402}