Skip to main content

hotmeal/
diff.rs

1//! HTML diffing with DOM patch generation.
2//!
3//! This module uses cinereus (GumTree/Chawathe) to compute tree diffs and translates
4//! them into DOM patches that can be applied to update an HTML document incrementally.
5
6use crate::{
7    Stem, StrTendril, debug,
8    dom::{self, Document, Namespace, NodeKind},
9};
10use cinereus::{
11    DiffTree, EditOp, Matching, MatchingConfig, NodeData, NodeHash, PropValue, Properties,
12    PropertyInFinalState, Tree, TreeTypes,
13    indextree::{self, NodeId},
14};
15use facet::Facet;
16use html5ever::{LocalName, QualName};
17use rapidhash::RapidHasher;
18use smallvec::{SmallVec, smallvec};
19use std::cell::Cell;
20use std::collections::{HashMap, HashSet};
21use std::hash::{Hash, Hasher};
22
23#[allow(unused_imports)]
24use crate::trace;
25
26/// Proxy for LocalName serialization
27#[derive(Debug, Clone, PartialEq, Eq, Facet)]
28#[facet(transparent)]
29pub struct LocalNameProxy(pub String);
30
31impl From<LocalNameProxy> for LocalName {
32    fn from(proxy: LocalNameProxy) -> Self {
33        LocalName::from(proxy.0)
34    }
35}
36
37impl From<&LocalName> for LocalNameProxy {
38    fn from(local: &LocalName) -> Self {
39        LocalNameProxy(local.to_string())
40    }
41}
42
43/// Proxy for QualName serialization (prefix, namespace, local_name)
44/// TODO: This string conversion is inefficient - consider interning namespaces or using indices
45#[derive(Debug, Clone, PartialEq, Eq, Facet)]
46pub struct QualNameProxy {
47    pub prefix: Option<String>,
48    pub ns: String,
49    pub local: String,
50}
51
52impl From<QualNameProxy> for QualName {
53    fn from(proxy: QualNameProxy) -> Self {
54        use html5ever::{Namespace, Prefix};
55        QualName {
56            // Filter out empty prefix strings - they should be None
57            prefix: proxy.prefix.filter(|s| !s.is_empty()).map(Prefix::from),
58            ns: Namespace::from(proxy.ns),
59            local: LocalName::from(proxy.local),
60        }
61    }
62}
63
64impl From<&QualName> for QualNameProxy {
65    fn from(qual: &QualName) -> Self {
66        QualNameProxy {
67            // Filter out empty prefix - serialize as None instead of Some("")
68            prefix: qual
69                .prefix
70                .as_ref()
71                .filter(|p| !p.is_empty())
72                .map(|p| p.to_string()),
73            ns: qual.ns.to_string(),
74            local: qual.local.to_string(),
75        }
76    }
77}
78
79/// An attribute name-value pair
80#[derive(Debug, Clone, PartialEq, Eq, Facet)]
81pub struct AttrPair<'a> {
82    #[facet(opaque, proxy = QualNameProxy)]
83    pub name: QualName,
84    pub value: Stem<'a>,
85}
86
87impl<'a> From<(QualName, Stem<'a>)> for AttrPair<'a> {
88    fn from((name, value): (QualName, Stem<'a>)) -> Self {
89        AttrPair { name, value }
90    }
91}
92
93impl<'a> From<AttrPair<'a>> for (QualName, Stem<'a>) {
94    fn from(pair: AttrPair<'a>) -> Self {
95        (pair.name, pair.value)
96    }
97}
98
99/// Property key - either text content or an attribute
100#[derive(Debug, Clone, PartialEq, Eq, Hash, Facet)]
101#[repr(u8)]
102pub enum PropKey {
103    /// Text content
104    Text,
105    /// Attribute with qualified name
106    Attr(#[facet(opaque, proxy = QualNameProxy)] QualName),
107}
108
109impl std::fmt::Display for PropKey {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        match self {
112            PropKey::Text => write!(f, "_text"),
113            PropKey::Attr(qual) => {
114                if let Some(ref prefix) = qual.prefix {
115                    write!(f, "{}:", prefix)?;
116                }
117                write!(f, "{}", qual.local)
118            }
119        }
120    }
121}
122
123/// Errors that can occur during diffing or patch application.
124#[derive(Facet, Debug)]
125#[facet(derive(Error))]
126#[repr(u8)]
127pub enum DiffError {
128    /// no body element found in document
129    NoBody,
130
131    /// path index {index} out of bounds
132    PathOutOfBounds { index: usize },
133
134    /// cannot get parent of empty path
135    EmptyPath,
136
137    /// slot {slot} not found
138    SlotNotFound { slot: u32 },
139
140    /// cannot insert at slot without relative path
141    SlotMissingRelativePath,
142
143    /// node is not a text node
144    NotATextNode,
145
146    /// node is not an element
147    NotAnElement,
148
149    /// node is not a comment
150    NotAComment,
151}
152
153/// A path to a node in the DOM tree.
154///
155/// Uses SmallVec<[u32; 16]> to avoid heap allocations for typical DOM depths.
156/// Most HTML documents have paths shorter than 16 elements, and u32 is plenty
157/// for child indices (no element has billions of children).
158#[derive(Debug, Clone, PartialEq, Eq, Hash, facet::Facet)]
159#[facet(transparent)]
160pub struct NodePath(pub SmallVec<[u32; 16]>);
161
162impl std::fmt::Display for NodePath {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        for (i, idx) in self.0.iter().enumerate() {
165            if i > 0 {
166                write!(f, ".")?;
167            }
168            write!(f, "{}", idx)?;
169        }
170        Ok(())
171    }
172}
173
174/// Reference to a node via a path.
175///
176/// The first element of the path is always the slot number:
177/// - `[0, 1, 2]` = slot 0 (main tree), child 1, child 2
178/// - `[1, 0, 3]` = slot 1 (displaced content), child 0, child 3
179///
180/// Slot 0 is special - it always contains the original tree (body).
181/// Slots 1+ are created when content is displaced during Insert/Move operations.
182#[derive(Debug, Clone, PartialEq, Eq, facet::Facet)]
183#[facet(transparent)]
184pub struct NodeRef(pub NodePath);
185
186/// Content that can be inserted as part of a new subtree.
187#[derive(Debug, Clone, PartialEq, Eq, facet::Facet)]
188#[repr(u8)]
189pub enum InsertContent<'a> {
190    /// An element with its tag, attributes, and nested children
191    Element {
192        #[facet(opaque, proxy = LocalNameProxy)]
193        tag: LocalName,
194        attrs: Vec<AttrPair<'a>>,
195        children: Vec<InsertContent<'a>>,
196    },
197    /// A text node
198    Text(Stem<'a>),
199    /// A comment node
200    Comment(Stem<'a>),
201}
202
203/// A property in the final state within an UpdateProps operation.
204/// The vec position determines the final ordering.
205#[derive(Debug, Clone, PartialEq, Eq, facet::Facet)]
206pub struct PropChange<'a> {
207    /// The property key (attribute name)
208    pub name: PropKey,
209    /// The value: None means "keep existing value", Some means "update to this value".
210    /// Properties not in the list are implicitly removed.
211    pub value: Option<Stem<'a>>,
212}
213
214/// Operations to transform the DOM.
215#[derive(Clone, PartialEq, Eq, facet::Facet)]
216#[repr(u8)]
217pub enum Patch<'a> {
218    /// Insert an element at a position.
219    /// The `at` NodeRef path includes the position as the last segment.
220    /// Path `[slot, a, b, c]` means: in slot, navigate to a, then b, then insert at position c.
221    InsertElement {
222        at: NodeRef,
223        #[facet(opaque, proxy = LocalNameProxy)]
224        tag: LocalName,
225        attrs: Vec<AttrPair<'a>>,
226        children: Vec<InsertContent<'a>>,
227        detach_to_slot: Option<u32>,
228    },
229
230    /// Insert a text node at a position.
231    InsertText {
232        at: NodeRef,
233        text: Stem<'a>,
234        detach_to_slot: Option<u32>,
235    },
236
237    /// Insert a comment node at a position.
238    InsertComment {
239        at: NodeRef,
240        text: Stem<'a>,
241        detach_to_slot: Option<u32>,
242    },
243
244    /// Remove a node
245    Remove { node: NodeRef },
246
247    /// Update text content of a text node at path.
248    SetText { path: NodePath, text: Stem<'a> },
249
250    /// Set attribute on element at path
251    SetAttribute {
252        path: NodePath,
253        #[facet(opaque, proxy = QualNameProxy)]
254        name: QualName,
255        value: Stem<'a>,
256    },
257
258    /// Remove attribute from element at path
259    RemoveAttribute {
260        path: NodePath,
261        #[facet(opaque, proxy = QualNameProxy)]
262        name: QualName,
263    },
264
265    /// Move a node from one location to another.
266    Move {
267        from: NodeRef,
268        to: NodeRef,
269        detach_to_slot: Option<u32>,
270    },
271
272    /// Update multiple properties on an element.
273    UpdateProps {
274        path: NodePath,
275        changes: Vec<PropChange<'a>>,
276    },
277
278    /// An opaque node's source content changed.
279    /// The DOM is NOT modified — the WASM applier dispatches a CustomEvent instead.
280    OpaqueChanged { path: NodePath, content: Stem<'a> },
281}
282
283impl<'a> std::fmt::Debug for Patch<'a> {
284    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
285        match self {
286            Patch::InsertElement {
287                at,
288                tag,
289                attrs,
290                children,
291                detach_to_slot,
292            } => {
293                write!(f, "Insert <{}> @{:?}", tag, at.0.0.as_slice())?;
294                if !attrs.is_empty() {
295                    write!(f, " ({} attrs)", attrs.len())?;
296                }
297                if !children.is_empty() {
298                    write!(f, " ({} children)", children.len())?;
299                }
300                if let Some(slot) = detach_to_slot {
301                    write!(f, " →slot{}", slot)?;
302                }
303                Ok(())
304            }
305            Patch::InsertText {
306                at,
307                text,
308                detach_to_slot,
309            } => {
310                let preview: String = text.chars().take(20).collect();
311                write!(f, "Insert text {:?} @{:?}", preview, at.0.0.as_slice())?;
312                if let Some(slot) = detach_to_slot {
313                    write!(f, " →slot{}", slot)?;
314                }
315                Ok(())
316            }
317            Patch::InsertComment {
318                at,
319                text,
320                detach_to_slot,
321            } => {
322                let preview: String = text.chars().take(20).collect();
323                write!(f, "Insert comment {:?} @{:?}", preview, at.0.0.as_slice())?;
324                if let Some(slot) = detach_to_slot {
325                    write!(f, " →slot{}", slot)?;
326                }
327                Ok(())
328            }
329            Patch::Remove { node } => {
330                write!(f, "Remove @{:?}", node.0.0.as_slice())
331            }
332            Patch::SetText { path, text } => {
333                let preview: String = text.chars().take(20).collect();
334                write!(f, "SetText {:?} @{:?}", preview, path.0.as_slice())
335            }
336            Patch::SetAttribute { path, name, value } => {
337                write!(
338                    f,
339                    "SetAttr {}={:?} @{:?}",
340                    name.local,
341                    value.as_ref(),
342                    path.0.as_slice()
343                )
344            }
345            Patch::RemoveAttribute { path, name } => {
346                write!(f, "RemoveAttr {} @{:?}", name.local, path.0.as_slice())
347            }
348            Patch::Move {
349                from,
350                to,
351                detach_to_slot,
352            } => {
353                write!(
354                    f,
355                    "Move {:?} → {:?}",
356                    from.0.0.as_slice(),
357                    to.0.0.as_slice()
358                )?;
359                if let Some(slot) = detach_to_slot {
360                    write!(f, " →slot{}", slot)?;
361                }
362                Ok(())
363            }
364            Patch::UpdateProps { path, changes } => {
365                write!(
366                    f,
367                    "UpdateProps @{:?} ({} changes)",
368                    path.0.as_slice(),
369                    changes.len()
370                )
371            }
372            Patch::OpaqueChanged { path, content } => {
373                let preview: String = content.chars().take(40).collect();
374                write!(f, "OpaqueChanged @{:?} {:?}", path.0.as_slice(), preview)
375            }
376        }
377    }
378}
379
380/// Node kind in the HTML tree.
381#[derive(Debug, Clone, PartialEq, Eq, Hash)]
382pub enum HtmlNodeKind {
383    /// An element node with a tag name and namespace
384    /// LocalName is interned via string_cache, Namespace distinguishes HTML/SVG/MathML
385    Element(LocalName, Namespace),
386    /// A text node
387    Text,
388    /// A comment node
389    Comment,
390}
391
392impl std::fmt::Display for HtmlNodeKind {
393    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
394        match self {
395            HtmlNodeKind::Element(tag, ns) => match ns {
396                Namespace::Html => write!(f, "<{}>", tag),
397                Namespace::Svg => write!(f, "<svg:{}>", tag),
398                Namespace::MathMl => write!(f, "<math:{}>", tag),
399            },
400            HtmlNodeKind::Text => write!(f, "#text"),
401            HtmlNodeKind::Comment => write!(f, "#comment"),
402        }
403    }
404}
405
406/// HTML element properties (attributes only).
407/// Text content is stored separately in NodeData::text.
408#[derive(Debug, Clone, Default)]
409pub struct HtmlProps<'a> {
410    /// Attributes - Vec preserves insertion order for consistent serialization
411    /// Keys are QualName (preserves namespace for xlink:href, xml:lang, etc.)
412    pub attrs: Vec<(QualName, Stem<'a>)>,
413}
414
415impl<'a> Properties for HtmlProps<'a> {
416    type Key = PropKey;
417    type Value = Stem<'a>;
418
419    #[allow(clippy::mutable_key_type)]
420    fn similarity(&self, other: &Self) -> f64 {
421        // Compare attributes using Dice coefficient
422        if self.attrs.is_empty() && other.attrs.is_empty() {
423            return 1.0;
424        }
425
426        let self_keys: std::collections::HashSet<_> = self.attrs.iter().map(|(k, _)| k).collect();
427        let other_keys: std::collections::HashSet<_> = other.attrs.iter().map(|(k, _)| k).collect();
428
429        let intersection = self_keys.intersection(&other_keys).count();
430        let union = self_keys.len() + other_keys.len();
431
432        if union == 0 {
433            1.0
434        } else {
435            (2 * intersection) as f64 / union as f64
436        }
437    }
438
439    fn diff(&self, other: &Self) -> Vec<PropertyInFinalState<Self::Key, Self::Value>> {
440        let mut result = Vec::new();
441
442        // Check if attribute ORDER differs (even if values are the same).
443        // Build a list of common keys in the order they appear in self.
444        let self_keys: Vec<_> = self.attrs.iter().map(|(k, _)| k).collect();
445        let other_keys: Vec<_> = other.attrs.iter().map(|(k, _)| k).collect();
446
447        // Find common keys in the order they appear in self
448        let self_common: Vec<_> = self_keys
449            .iter()
450            .filter(|k| other_keys.contains(k))
451            .copied()
452            .collect();
453        // Find common keys in the order they appear in other
454        let other_common: Vec<_> = other_keys
455            .iter()
456            .filter(|k| self_keys.contains(k))
457            .copied()
458            .collect();
459
460        // If common keys are in different order, we need to force an update
461        let order_differs = self_common != other_common;
462
463        // Include all attributes from the final state in order
464        let mut forced_one = false;
465        for (key, new_val) in &other.attrs {
466            let old_val = self.attrs.iter().find(|(k, _)| k == key).map(|(_, v)| v);
467            let value_same = old_val == Some(new_val);
468
469            // If order differs and values are the same, force at least one to be Different
470            // to trigger the UpdateProperties operation
471            let force_different = order_differs && value_same && !forced_one;
472            if force_different {
473                forced_one = true;
474            }
475
476            result.push(PropertyInFinalState {
477                key: PropKey::Attr(key.clone()),
478                value: if value_same && !force_different {
479                    PropValue::Same
480                } else {
481                    PropValue::Different(new_val.clone())
482                },
483            });
484        }
485
486        result
487    }
488
489    fn is_empty(&self) -> bool {
490        self.attrs.is_empty()
491    }
492
493    fn len(&self) -> usize {
494        self.attrs.len()
495    }
496}
497
498/// Tree types marker for HTML DOM.
499pub struct HtmlTreeTypes<'a>(std::marker::PhantomData<&'a ()>);
500
501impl<'a> TreeTypes for HtmlTreeTypes<'a> {
502    type Kind = HtmlNodeKind;
503    type Props = HtmlProps<'a>;
504    type Text = Stem<'a>;
505}
506
507/// Pre-computed diff data for a node.
508struct DiffNodeData<'a> {
509    hash: NodeHash,
510    kind: HtmlNodeKind,
511    props: HtmlProps<'a>,
512    /// Text content for text/comment nodes
513    text: Option<Stem<'a>>,
514    height: usize,
515    /// Cached position among siblings (0-indexed), computed on-demand
516    position: Cell<Option<u32>>,
517}
518
519/// A wrapper around Document that implements DiffTree.
520///
521/// This allows diffing Documents directly without building a separate cinereus Tree.
522/// The wrapper pre-computes hashes and caches kind/props for each node.
523///
524/// Two lifetime parameters:
525/// - `'b` - how long we borrow the Document
526/// - `'a` - the lifetime of data inside the Document (Stem<'a> values from original input)
527pub struct DiffableDocument<'b, 'a> {
528    doc: &'b Document<'a>,
529    /// The root for diffing (body element)
530    body_id: NodeId,
531    /// Pre-computed diff data indexed by NodeId
532    nodes: HashMap<NodeId, DiffNodeData<'a>>,
533}
534
535impl<'b, 'a> DiffableDocument<'b, 'a> {
536    /// Create a new DiffableDocument from a Document.
537    ///
538    /// Pre-computes hashes and caches kind/props for all body descendants.
539    pub fn new(doc: &'b Document<'a>) -> Result<Self, DiffError> {
540        let body_id = doc.body().ok_or(DiffError::NoBody)?;
541        // Pre-allocate based on arena size (upper bound for descendants)
542        let mut nodes = HashMap::with_capacity(doc.arena.count());
543
544        // First pass: compute kind, props, and text for all nodes
545        for node_id in body_id.descendants(&doc.arena) {
546            let node = doc.get(node_id);
547            let (kind, props, text) = match &node.kind {
548                NodeKind::Element(elem) => {
549                    let kind = HtmlNodeKind::Element(elem.tag.clone(), node.ns);
550                    let props = HtmlProps {
551                        attrs: elem
552                            .attrs
553                            .iter()
554                            .map(|(k, v)| (k.clone(), v.clone()))
555                            .collect(),
556                    };
557                    (kind, props, None)
558                }
559                NodeKind::Text(text) => {
560                    let kind = HtmlNodeKind::Text;
561                    let props = HtmlProps { attrs: Vec::new() };
562                    (kind, props, Some(text.clone()))
563                }
564                NodeKind::Comment(text) => {
565                    let kind = HtmlNodeKind::Comment;
566                    let props = HtmlProps { attrs: Vec::new() };
567                    (kind, props, Some(text.clone()))
568                }
569                NodeKind::Document => continue, // Skip document nodes
570            };
571
572            nodes.insert(
573                node_id,
574                DiffNodeData {
575                    hash: NodeHash(0), // Will be computed in second pass
576                    kind,
577                    props,
578                    text,
579                    height: 0,                 // Will be computed in second pass
580                    position: Cell::new(None), // Computed on-demand
581                },
582            );
583        }
584
585        // Second pass: compute heights and hashes bottom-up (post-order)
586        // We collect updates separately to avoid borrow conflicts
587        let post_order: Vec<_> = PostOrderIterator::new(body_id, &doc.arena).collect();
588        for node_id in post_order {
589            let children: Vec<_> = node_id.children(&doc.arena).collect();
590
591            // Compute height from children (already processed in post-order)
592            let height = if children.is_empty() {
593                0
594            } else {
595                1 + children
596                    .iter()
597                    .filter_map(|&c| nodes.get(&c))
598                    .map(|d| d.height)
599                    .max()
600                    .unwrap_or(0)
601            };
602
603            // Compute hash (Merkle-tree style): kind + text + children
604            let hash = if let Some(data) = nodes.get(&node_id) {
605                let mut hasher = RapidHasher::default();
606                data.kind.hash(&mut hasher);
607                // Include text content - this is the identity of text/comment nodes
608                if let Some(text) = &data.text {
609                    text.hash(&mut hasher);
610                }
611                for child_id in &children {
612                    if let Some(child_data) = nodes.get(child_id) {
613                        child_data.hash.0.hash(&mut hasher);
614                    }
615                }
616                NodeHash(hasher.finish())
617            } else {
618                NodeHash(0)
619            };
620
621            // Now update
622            if let Some(data) = nodes.get_mut(&node_id) {
623                data.height = height;
624                data.hash = hash;
625            }
626        }
627
628        Ok(Self {
629            doc,
630            body_id,
631            nodes,
632        })
633    }
634}
635
636impl<'b, 'a> DiffTree for DiffableDocument<'b, 'a> {
637    type Types = HtmlTreeTypes<'a>;
638
639    fn root(&self) -> NodeId {
640        self.body_id
641    }
642
643    fn node_count(&self) -> usize {
644        self.nodes.len()
645    }
646
647    fn hash(&self, id: NodeId) -> NodeHash {
648        self.nodes.get(&id).map(|d| d.hash).unwrap_or_default()
649    }
650
651    fn kind(&self, id: NodeId) -> &HtmlNodeKind {
652        self.nodes
653            .get(&id)
654            .map(|d| &d.kind)
655            .expect("node must exist")
656    }
657
658    fn properties(&self, id: NodeId) -> &HtmlProps<'a> {
659        self.nodes
660            .get(&id)
661            .map(|d| &d.props)
662            .expect("node must exist")
663    }
664
665    fn text(&self, id: NodeId) -> Option<&Stem<'a>> {
666        self.nodes.get(&id).and_then(|d| d.text.as_ref())
667    }
668
669    fn parent(&self, id: NodeId) -> Option<NodeId> {
670        self.doc.arena.get(id).and_then(|n| n.parent())
671    }
672
673    fn children(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
674        id.children(&self.doc.arena)
675    }
676
677    fn child_count(&self, id: NodeId) -> usize {
678        id.children(&self.doc.arena).count()
679    }
680
681    fn position(&self, id: NodeId) -> usize {
682        if let Some(data) = self.nodes.get(&id) {
683            // Check cache first
684            if let Some(pos) = data.position.get() {
685                return pos as usize;
686            }
687            // Compute and cache
688            let pos = if let Some(parent) = self.parent(id) {
689                parent
690                    .children(&self.doc.arena)
691                    .position(|c| c == id)
692                    .unwrap_or(0) as u32
693            } else {
694                0
695            };
696            data.position.set(Some(pos));
697            pos as usize
698        } else {
699            0
700        }
701    }
702
703    fn height(&self, id: NodeId) -> usize {
704        self.nodes.get(&id).map(|d| d.height).unwrap_or(0)
705    }
706
707    fn iter(&self) -> impl Iterator<Item = NodeId> + '_ {
708        self.body_id.descendants(&self.doc.arena)
709    }
710
711    fn post_order(&self) -> impl Iterator<Item = NodeId> + '_ {
712        PostOrderIterator::new(self.body_id, &self.doc.arena)
713    }
714
715    fn descendants(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
716        id.descendants(&self.doc.arena)
717    }
718
719    fn is_opaque(&self, id: NodeId) -> bool {
720        if let Some(data) = self.nodes.get(&id) {
721            data.props
722                .attrs
723                .iter()
724                .any(|(k, _)| k.local.as_ref() == "data-hotmeal-opaque")
725        } else {
726            false
727        }
728    }
729}
730
731/// Post-order iterator over tree nodes.
732struct PostOrderIterator<'a, 'b> {
733    arena: &'a indextree::Arena<dom::NodeData<'b>>,
734    stack: Vec<(NodeId, bool)>,
735}
736
737impl<'a, 'b> PostOrderIterator<'a, 'b> {
738    fn new(root: NodeId, arena: &'a indextree::Arena<dom::NodeData<'b>>) -> Self {
739        Self {
740            arena,
741            stack: vec![(root, false)],
742        }
743    }
744}
745
746impl Iterator for PostOrderIterator<'_, '_> {
747    type Item = NodeId;
748
749    fn next(&mut self) -> Option<Self::Item> {
750        while let Some((id, children_visited)) = self.stack.pop() {
751            if children_visited {
752                return Some(id);
753            }
754            self.stack.push((id, true));
755            let children: Vec<_> = id.children(self.arena).collect();
756            for child in children.into_iter().rev() {
757                self.stack.push((child, false));
758            }
759        }
760        None
761    }
762}
763
764/// Wrapper around `Tree<HtmlTreeTypes>` that adds `is_opaque` support.
765///
766/// `Tree<T>` is generic and can't check HTML attributes for opaqueness.
767/// This wrapper delegates all DiffTree methods and adds the `data-hotmeal-opaque` check.
768struct OpaqueAwareTree<'a>(Tree<HtmlTreeTypes<'a>>);
769
770impl<'a> DiffTree for OpaqueAwareTree<'a> {
771    type Types = HtmlTreeTypes<'a>;
772
773    fn root(&self) -> NodeId {
774        self.0.root()
775    }
776    fn node_count(&self) -> usize {
777        self.0.node_count()
778    }
779    fn hash(&self, id: NodeId) -> NodeHash {
780        self.0.hash(id)
781    }
782    fn kind(&self, id: NodeId) -> &HtmlNodeKind {
783        self.0.kind(id)
784    }
785    fn properties(&self, id: NodeId) -> &HtmlProps<'a> {
786        self.0.properties(id)
787    }
788    fn text(&self, id: NodeId) -> Option<&Stem<'a>> {
789        self.0.text(id)
790    }
791    fn parent(&self, id: NodeId) -> Option<NodeId> {
792        self.0.parent(id)
793    }
794    fn children(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
795        self.0.children(id)
796    }
797    fn child_count(&self, id: NodeId) -> usize {
798        self.0.child_count(id)
799    }
800    fn position(&self, id: NodeId) -> usize {
801        self.0.position(id)
802    }
803    fn height(&self, id: NodeId) -> usize {
804        self.0.height(id)
805    }
806    fn iter(&self) -> impl Iterator<Item = NodeId> + '_ {
807        self.0.iter()
808    }
809    fn post_order(&self) -> impl Iterator<Item = NodeId> + '_ {
810        self.0.post_order()
811    }
812    fn descendants(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
813        self.0.descendants(id)
814    }
815    fn is_opaque(&self, id: NodeId) -> bool {
816        self.0
817            .properties(id)
818            .attrs
819            .iter()
820            .any(|(k, _)| k.local.as_ref() == "data-hotmeal-opaque")
821    }
822}
823
824/// Build a cinereus tree from an arena_dom::Document (body content only).
825/// If the document has no body, returns an empty body tree.
826pub fn build_tree_from_arena<'a>(doc: &Document<'a>) -> Tree<HtmlTreeTypes<'a>> {
827    // Find body element, or create an empty body tree if none exists
828    let (body_tag, body_ns, body_id) = if let Some(body_id) = doc.body() {
829        let body_node = doc.get(body_id);
830        if let NodeKind::Element(elem) = &body_node.kind {
831            (elem.tag.clone(), body_node.ns, Some(body_id))
832        } else {
833            (LocalName::from("body"), Namespace::Html, None)
834        }
835    } else {
836        (LocalName::from("body"), Namespace::Html, None)
837    };
838
839    let root_data = NodeData {
840        hash: NodeHash(0),
841        kind: HtmlNodeKind::Element(body_tag, body_ns),
842        properties: HtmlProps { attrs: Vec::new() },
843        text: None,
844    };
845
846    let mut tree = Tree::new(root_data);
847    let tree_root = tree.root;
848
849    // Add children from body (only if we have a body)
850    if let Some(body_id) = body_id {
851        add_arena_children(&mut tree, tree_root, doc, body_id);
852    }
853
854    // Recompute hashes bottom-up
855    recompute_hashes(&mut tree);
856
857    tree
858}
859
860fn add_arena_children<'a>(
861    tree: &mut Tree<HtmlTreeTypes<'a>>,
862    parent: indextree::NodeId,
863    doc: &Document<'a>,
864    arena_parent: indextree::NodeId,
865) {
866    let children: Vec<_> = arena_parent.children(&doc.arena).collect();
867
868    for child_id in children.into_iter() {
869        let child_node = doc.get(child_id);
870        match &child_node.kind {
871            NodeKind::Element(elem) => {
872                let kind = HtmlNodeKind::Element(elem.tag.clone(), child_node.ns);
873                let props = HtmlProps {
874                    attrs: elem
875                        .attrs
876                        .iter()
877                        .map(|(k, v)| (k.clone(), v.clone()))
878                        .collect(),
879                };
880                let data = NodeData {
881                    hash: NodeHash(0),
882                    kind,
883                    properties: props,
884                    text: None,
885                };
886                let node_id = tree.add_child(parent, data);
887                add_arena_children(tree, node_id, doc, child_id);
888            }
889            NodeKind::Text(text) => {
890                let kind = HtmlNodeKind::Text;
891                let props = HtmlProps { attrs: Vec::new() };
892                let data = NodeData {
893                    hash: NodeHash(0),
894                    kind,
895                    properties: props,
896                    text: Some(text.clone()),
897                };
898                tree.add_child(parent, data);
899            }
900            NodeKind::Comment(text) => {
901                let kind = HtmlNodeKind::Comment;
902                let props = HtmlProps { attrs: Vec::new() };
903                let data = NodeData {
904                    hash: NodeHash(0),
905                    kind,
906                    properties: props,
907                    text: Some(text.clone()),
908                };
909                tree.add_child(parent, data);
910            }
911            NodeKind::Document => {
912                // Skip document nodes
913            }
914        }
915    }
916}
917
918/// Recompute hashes for all nodes in bottom-up order.
919///
920/// The hash captures: node kind + text content + children structure (Merkle-tree style).
921/// Attributes are NOT included - they're compared via the Properties trait after matching,
922/// using similarity scores to decide whether nodes should match.
923fn recompute_hashes(tree: &mut Tree<HtmlTreeTypes<'_>>) {
924    // Process in post-order (children before parents)
925    let nodes: Vec<NodeId> = tree.post_order().collect();
926
927    for node_id in nodes {
928        let mut hasher = RapidHasher::default();
929
930        let data = tree.get(node_id);
931        data.kind.hash(&mut hasher);
932
933        // Include text content in hash - this is the identity of text/comment nodes
934        if let Some(text) = &data.text {
935            text.hash(&mut hasher);
936        }
937
938        // Hash children's hashes (Merkle-tree style)
939        let children: Vec<NodeId> = tree.children(node_id).collect();
940        for child in children {
941            tree.get(child).hash.0.hash(&mut hasher);
942        }
943
944        // Update the hash
945        let new_hash = NodeHash(hasher.finish());
946        tree.arena
947            .get_mut(node_id)
948            .expect("node should exist")
949            .get_mut()
950            .hash = new_hash;
951    }
952}
953
954/// Create an insert patch for a node and all its descendants.
955/// Used when we need to insert entire subtrees (e.g., when old doc has no body).
956fn create_insert_patch<'a>(
957    doc: &Document<'a>,
958    node_id: NodeId,
959    position: usize,
960) -> Result<Patch<'a>, DiffError> {
961    let node = doc.get(node_id);
962
963    match &node.kind {
964        NodeKind::Element(elem) => {
965            let attrs: Vec<AttrPair<'a>> = elem
966                .attrs
967                .iter()
968                .map(|(name, value)| AttrPair {
969                    name: name.clone(),
970                    value: value.clone(),
971                })
972                .collect();
973
974            let children: Vec<InsertContent<'a>> = node_id
975                .children(&doc.arena)
976                .filter_map(|child_id| create_insert_content(doc, child_id))
977                .collect();
978
979            Ok(Patch::InsertElement {
980                at: NodeRef(NodePath(smallvec![0, position as u32])),
981                tag: elem.tag.clone(),
982                attrs,
983                children,
984                detach_to_slot: None,
985            })
986        }
987        NodeKind::Text(text) => Ok(Patch::InsertText {
988            at: NodeRef(NodePath(smallvec![0, position as u32])),
989            text: text.clone(),
990            detach_to_slot: None,
991        }),
992        NodeKind::Comment(text) => Ok(Patch::InsertComment {
993            at: NodeRef(NodePath(smallvec![0, position as u32])),
994            text: text.clone(),
995            detach_to_slot: None,
996        }),
997        NodeKind::Document => Err(DiffError::NoBody), // Shouldn't happen
998    }
999}
1000
1001/// Create InsertContent for a node and its descendants (recursive).
1002fn create_insert_content<'a>(doc: &Document<'a>, node_id: NodeId) -> Option<InsertContent<'a>> {
1003    let node = doc.get(node_id);
1004
1005    match &node.kind {
1006        NodeKind::Element(elem) => {
1007            let attrs: Vec<AttrPair<'a>> = elem
1008                .attrs
1009                .iter()
1010                .map(|(name, value)| AttrPair {
1011                    name: name.clone(),
1012                    value: value.clone(),
1013                })
1014                .collect();
1015
1016            let children: Vec<InsertContent<'a>> = node_id
1017                .children(&doc.arena)
1018                .filter_map(|child_id| create_insert_content(doc, child_id))
1019                .collect();
1020
1021            Some(InsertContent::Element {
1022                tag: elem.tag.clone(),
1023                attrs,
1024                children,
1025            })
1026        }
1027        NodeKind::Text(text) => Some(InsertContent::Text(text.clone())),
1028        NodeKind::Comment(text) => Some(InsertContent::Comment(text.clone())),
1029        NodeKind::Document => None,
1030    }
1031}
1032
1033/// Diff two HTML tendrils and return DOM patches.
1034///
1035/// Parses both HTML inputs and diffs them.
1036/// Patches borrow from the tendrils' buffers (zero-copy).
1037pub fn diff_html<'a>(
1038    old_html: &'a StrTendril,
1039    new_html: &'a StrTendril,
1040) -> Result<Vec<Patch<'a>>, DiffError> {
1041    let old_doc = dom::parse(old_html);
1042    let new_doc = dom::parse(new_html);
1043    diff(&old_doc, &new_doc)
1044}
1045
1046/// Diff two arena documents and return DOM patches.
1047///
1048/// This is the primary diffing API for arena_dom documents.
1049pub fn diff<'a>(old: &Document<'a>, new: &Document<'a>) -> Result<Vec<Patch<'a>>, DiffError> {
1050    let old_has_body = old.body().is_some();
1051    let new_has_body = new.body().is_some();
1052
1053    // Handle cases where one or both documents have no body
1054    match (old_has_body, new_has_body) {
1055        (false, false) => {
1056            // Neither has body - nothing to diff
1057            return Ok(vec![]);
1058        }
1059        (false, true) => {
1060            // Old has no body, new has body - insert all new body children
1061            let new_body_id = new.body().unwrap();
1062            let mut patches = Vec::new();
1063            for (pos, child_id) in new_body_id.children(&new.arena).enumerate() {
1064                patches.push(create_insert_patch(new, child_id, pos)?);
1065            }
1066            return Ok(patches);
1067        }
1068        (true, false) => {
1069            // Old has body, new has no body - remove all old body children
1070            let old_body_id = old.body().unwrap();
1071            let mut patches = Vec::new();
1072            // Remove children in reverse order to keep indices stable
1073            let children: Vec<_> = old_body_id.children(&old.arena).collect();
1074            for (pos, _child_id) in children.iter().enumerate().rev() {
1075                patches.push(Patch::Remove {
1076                    node: NodeRef(NodePath(smallvec![0, pos as u32])),
1077                });
1078            }
1079            return Ok(patches);
1080        }
1081        (true, true) => {
1082            // Both have bodies - proceed with normal diffing
1083        }
1084    }
1085
1086    // Build cinereus Tree for old (needed for shadow tree mutation)
1087    let raw_tree_a = build_tree_from_arena(old);
1088    let tree_a = OpaqueAwareTree(raw_tree_a);
1089    // Use DiffableDocument for new (avoids second tree allocation)
1090    let diff_b = DiffableDocument::new(new)?;
1091
1092    #[cfg(test)]
1093    {
1094        trace!(
1095            "tree_a: root hash={:?}, kind={:?}",
1096            tree_a.0.get(tree_a.0.root).hash,
1097            tree_a.0.get(tree_a.0.root).kind
1098        );
1099        trace!(
1100            "diff_b: root hash={:?}, kind={:?}",
1101            diff_b.hash(diff_b.root()),
1102            diff_b.kind(diff_b.root())
1103        );
1104    }
1105
1106    let config = MatchingConfig {
1107        min_height: 0,
1108        ..MatchingConfig::default()
1109    };
1110
1111    let mut matching = cinereus::compute_matching(&tree_a, &diff_b, &config);
1112
1113    // Force root match if same tag
1114    let root_a_kind = tree_a.0.get(tree_a.0.root).kind.clone();
1115    let root_b_kind = diff_b.kind(diff_b.root()).clone();
1116    if root_a_kind == root_b_kind && !matching.contains_a(tree_a.0.root) {
1117        matching.add(tree_a.0.root, diff_b.root());
1118    }
1119
1120    let edit_ops = cinereus::generate_edit_script(&tree_a, &diff_b, &matching);
1121
1122    #[cfg(test)]
1123    {
1124        debug!("matching pairs: {}", matching.len());
1125        for (a, b) in matching.pairs() {
1126            trace!("  matched: {:?} <-> {:?}", a, b);
1127        }
1128        trace!("edit_ops: {:?}", edit_ops);
1129    }
1130
1131    debug!(
1132        ops_count = edit_ops.len(),
1133        matched_pairs = matching.len(),
1134        "arena_dom cinereus diff complete"
1135    );
1136
1137    // After edit script, emit OpaqueChanged for matched opaque pairs with different inner HTML
1138    let mut opaque_patches = Vec::new();
1139    for (a_id, b_id) in matching.pairs() {
1140        // Check if the node in the new tree is opaque
1141        if !diff_b.is_opaque(b_id) {
1142            continue;
1143        }
1144        // Compare hashes — if identical, no change
1145        if tree_a.hash(a_id) == diff_b.hash(b_id) {
1146            continue;
1147        }
1148        // Get the original DOM node IDs for serialization
1149        // For tree_a (old), the NodeId corresponds to the cinereus tree, not the Document arena.
1150        // We need to serialize from the original documents instead.
1151        // tree_b's b_id maps directly to the new Document's arena (DiffableDocument wraps it).
1152        // tree_a's a_id maps to the cinereus Tree arena, not the old Document arena.
1153        // So we serialize from the new document (which is what we want — the new content).
1154        let new_inner = new.serialize_inner_html(b_id);
1155        opaque_patches.push((
1156            a_id,
1157            Stem::Owned(compact_str::CompactString::from(new_inner)),
1158        ));
1159    }
1160
1161    let mut patches = convert_ops_with_shadow(edit_ops, &tree_a.0, &diff_b, &matching)?;
1162
1163    // Now emit OpaqueChanged patches with correct paths from the shadow tree
1164    // We need to compute paths for the opaque nodes. Since convert_ops_with_shadow
1165    // already consumed the shadow tree, we build a simpler path computation.
1166    // Actually, let's integrate opaque patches into convert_ops_with_shadow instead.
1167    // For now, compute paths using the same approach as the shadow tree.
1168    if !opaque_patches.is_empty() {
1169        // Build a fresh shadow just for path computation
1170        let shadow = ShadowTree::new(tree_a.0.arena.clone(), tree_a.0.root);
1171        for (a_id, content) in opaque_patches {
1172            let path = shadow.compute_path(a_id);
1173            patches.push(Patch::OpaqueChanged {
1174                path: NodePath(path),
1175                content,
1176            });
1177        }
1178    }
1179
1180    Ok(patches)
1181}
1182
1183/// Encapsulates the shadow tree with slot-based path computation.
1184///
1185/// The arena has a "super root" whose children are slot nodes:
1186/// - Slot 0: The original tree (body element)
1187/// - Slot 1+: Displaced content from Insert/Move operations
1188///
1189/// All paths start with the slot number: [0, 1, 2] means slot 0, child 1, child 2.
1190/// This eliminates the need for separate tracking of detached nodes.
1191pub(crate) struct ShadowTree<'a> {
1192    pub(crate) arena: indextree::Arena<NodeData<HtmlTreeTypes<'a>>>,
1193    /// The super root - its children are slot nodes
1194    pub(crate) super_root: NodeId,
1195    /// Number of slots (slot 0 always exists, created in new())
1196    next_slot: u32,
1197}
1198
1199impl<'a> ShadowTree<'a> {
1200    fn new(
1201        mut arena: indextree::Arena<NodeData<HtmlTreeTypes<'a>>>,
1202        original_root: NodeId,
1203    ) -> Self {
1204        // Create the super root (a meta node, not a real DOM node)
1205        let super_root = arena.new_node(NodeData {
1206            hash: NodeHash(0),
1207            kind: HtmlNodeKind::Comment, // Just a marker, not used
1208            properties: HtmlProps::default(),
1209            text: None,
1210        });
1211
1212        // Create slot 0 and reparent the original tree under it
1213        let slot0 = arena.new_node(NodeData {
1214            hash: NodeHash(0),
1215            kind: HtmlNodeKind::Comment, // Slot marker
1216            properties: HtmlProps::default(),
1217            text: None,
1218        });
1219        super_root.append(slot0, &mut arena);
1220
1221        // Move original_root under slot 0
1222        original_root.detach(&mut arena);
1223        slot0.append(original_root, &mut arena);
1224
1225        Self {
1226            arena,
1227            super_root,
1228            next_slot: 1, // Slot 0 already exists
1229        }
1230    }
1231
1232    /// Get the slot node for a given slot number.
1233    fn get_slot(&self, slot: u32) -> Option<NodeId> {
1234        self.super_root.children(&self.arena).nth(slot as usize)
1235    }
1236
1237    /// Get the content root for slot 0 (the original tree root, e.g., body).
1238    fn slot0_content(&self) -> NodeId {
1239        let slot0 = self.get_slot(0).expect("slot 0 must exist");
1240        slot0
1241            .children(&self.arena)
1242            .next()
1243            .expect("slot 0 must have content")
1244    }
1245
1246    /// Compute the full path for any node. The first element is always the slot number.
1247    ///
1248    /// For a node at super_root → slot0 → body → div → text, the path is `[0, 0, 0]`:
1249    /// - slot number 0
1250    /// - div is child 0 of body (the slot content root)
1251    /// - text is child 0 of div
1252    ///
1253    /// Note: The slot content root's position within the slot node is NOT included.
1254    fn compute_path(&self, node: NodeId) -> SmallVec<[u32; 16]> {
1255        let mut path = SmallVec::new();
1256        let mut current = node;
1257
1258        while let Some(parent_id) = self.arena.get(current).and_then(|n| n.parent()) {
1259            // Check if parent is a slot node (grandparent is super_root)
1260            let grandparent = self.arena.get(parent_id).and_then(|n| n.parent());
1261            if grandparent == Some(self.super_root) {
1262                // parent_id is a slot node, current is the slot content root (e.g., body)
1263                // Get the slot number and stop - don't include current's position
1264                let slot = self
1265                    .super_root
1266                    .children(&self.arena)
1267                    .position(|c| c == parent_id)
1268                    .unwrap_or(0) as u32;
1269                path.push(slot);
1270                break;
1271            }
1272
1273            // Normal case: add position and continue up
1274            let position = parent_id
1275                .children(&self.arena)
1276                .position(|c| c == current)
1277                .unwrap_or(0) as u32;
1278            path.push(position);
1279            current = parent_id;
1280        }
1281
1282        path.reverse();
1283        path
1284    }
1285
1286    /// Get a NodeRef for any node.
1287    fn get_node_ref(&self, node: NodeId) -> NodeRef {
1288        let path = self.compute_path(node);
1289        debug!(?node, ?path, "get_node_ref: computed path");
1290        NodeRef(NodePath(path))
1291    }
1292
1293    /// Get a NodeRef with a specific position appended (for insert/move targets).
1294    fn get_node_ref_with_position(&self, parent: NodeId, position: usize) -> NodeRef {
1295        let mut path = self.compute_path(parent);
1296        path.push(position as u32);
1297        NodeRef(NodePath(path))
1298    }
1299
1300    /// Create a new slot and return its number.
1301    fn create_slot(&mut self) -> u32 {
1302        let slot_num = self.next_slot;
1303        self.next_slot += 1;
1304
1305        let slot_node = self.arena.new_node(NodeData {
1306            hash: NodeHash(0),
1307            kind: HtmlNodeKind::Comment, // Slot marker
1308            properties: HtmlProps::default(),
1309            text: None,
1310        });
1311        self.super_root.append(slot_node, &mut self.arena);
1312
1313        debug!(slot_num, "created new slot");
1314        slot_num
1315    }
1316
1317    /// Detach a node to a new slot, returning the slot number.
1318    fn detach_to_slot(&mut self, node: NodeId) -> u32 {
1319        let slot_num = self.create_slot();
1320        let slot_node = self.get_slot(slot_num).expect("just created");
1321
1322        node.detach(&mut self.arena);
1323        slot_node.append(node, &mut self.arena);
1324
1325        debug!(?node, slot_num, "detached node to slot");
1326        slot_num
1327    }
1328
1329    /// Detach a node with a placeholder to prevent sibling shifts.
1330    fn detach_with_placeholder(&mut self, node: NodeId) {
1331        let placeholder = self.arena.new_node(NodeData {
1332            hash: NodeHash(0),
1333            kind: HtmlNodeKind::Text,
1334            properties: HtmlProps::default(),
1335            text: None,
1336        });
1337        node.insert_before(placeholder, &mut self.arena);
1338        node.detach(&mut self.arena);
1339    }
1340
1341    /// Check if `ancestor` is an ancestor of `node`.
1342    fn is_ancestor(&self, ancestor: NodeId, node: NodeId) -> bool {
1343        let mut current = node;
1344        while let Some(parent) = self.arena.get(current).and_then(|n| n.parent()) {
1345            if parent == ancestor {
1346                return true;
1347            }
1348            current = parent;
1349        }
1350        false
1351    }
1352
1353    /// Replace a node with a placeholder, returning the placeholder's NodeId.
1354    /// The node's children are reparented under the placeholder.
1355    fn replace_with_placeholder(&mut self, node: NodeId) -> NodeId {
1356        debug!(
1357            ?node,
1358            node_kind = %self.arena[node].get().kind,
1359            "replace_with_placeholder: replacing node"
1360        );
1361        let placeholder = self.arena.new_node(NodeData {
1362            hash: NodeHash(0),
1363            kind: HtmlNodeKind::Text,
1364            properties: HtmlProps::default(),
1365            text: None,
1366        });
1367        // Insert placeholder as sibling of node
1368        node.insert_before(placeholder, &mut self.arena);
1369        // Move all children from node to placeholder
1370        let children: Vec<_> = node.children(&self.arena).collect();
1371        debug!(
1372            children_count = children.len(),
1373            "replace_with_placeholder: moving children to placeholder"
1374        );
1375        for child in children {
1376            child.detach(&mut self.arena);
1377            placeholder.append(child, &mut self.arena);
1378        }
1379        // Now detach the empty node
1380        node.detach(&mut self.arena);
1381        debug!(?placeholder, "replace_with_placeholder: done");
1382        placeholder
1383    }
1384
1385    /// Insert a new node at a position, handling displacement via slots.
1386    /// Returns the slot number if an occupant was displaced.
1387    fn insert_at_position(
1388        &mut self,
1389        parent: NodeId,
1390        position: usize,
1391        new_node: NodeId,
1392    ) -> Option<u32> {
1393        let children: Vec<_> = parent.children(&self.arena).collect();
1394
1395        if position < children.len() {
1396            let occupant = children[position];
1397            // Insert new_node before occupant, then detach occupant to slot
1398            occupant.insert_before(new_node, &mut self.arena);
1399            let slot = self.detach_to_slot(occupant);
1400            Some(slot)
1401        } else {
1402            // Grow the children array with placeholder text nodes to reach the target position
1403            while parent.children(&self.arena).count() < position {
1404                let placeholder = self.arena.new_node(NodeData {
1405                    hash: NodeHash(0),
1406                    kind: HtmlNodeKind::Text,
1407                    properties: HtmlProps { attrs: Vec::new() },
1408                    text: Some(Stem::new()),
1409                });
1410                parent.append(placeholder, &mut self.arena);
1411            }
1412
1413            // Now insert at position (either appending or displacing)
1414            let current_children: Vec<_> = parent.children(&self.arena).collect();
1415            if position < current_children.len() {
1416                let occupant = current_children[position];
1417                occupant.insert_before(new_node, &mut self.arena);
1418                let slot = self.detach_to_slot(occupant);
1419                Some(slot)
1420            } else {
1421                parent.append(new_node, &mut self.arena);
1422                None
1423            }
1424        }
1425    }
1426
1427    /// Move a node (already in shadow tree) to a new position.
1428    /// Returns the slot number if an occupant was displaced.
1429    fn move_to_position(
1430        &mut self,
1431        node: NodeId,
1432        new_parent: NodeId,
1433        position: usize,
1434    ) -> Option<u32> {
1435        // CRITICAL: If node is an ancestor of new_parent, we must replace node with
1436        // a placeholder FIRST. Otherwise, insert_before would create a cycle in the
1437        // tree (indextree's insert_before doesn't check for cycles like append does).
1438        //
1439        // Example: moving A under C when the tree is A -> B -> C
1440        //   1. Replace A with placeholder: (P) -> B -> C, A is detached
1441        //   2. Now we can safely insert A under C: (P) -> B -> C -> A
1442        let is_ancestor = self.is_ancestor(node, new_parent);
1443        debug!(
1444            ?node,
1445            ?new_parent,
1446            is_ancestor,
1447            "move_to_position: checking ancestry"
1448        );
1449        if is_ancestor {
1450            self.replace_with_placeholder(node);
1451        } else {
1452            // Check if node is a direct child of a slot (i.e., a slot root).
1453            // In that case, we just detach it without a placeholder.
1454            let parent = self.arena.get(node).and_then(|n| n.parent());
1455            let is_slot_root = parent.is_some_and(|p| {
1456                self.arena.get(p).and_then(|n| n.parent()) == Some(self.super_root)
1457            });
1458
1459            if !is_slot_root {
1460                // Node is deeper in the tree - detach with placeholder to prevent shifts
1461                self.detach_with_placeholder(node);
1462            } else {
1463                // Node is a slot root - just detach it
1464                node.detach(&mut self.arena);
1465            }
1466        }
1467
1468        // Now insert at target position
1469        let children: Vec<_> = new_parent.children(&self.arena).collect();
1470        debug!(
1471            ?node,
1472            position,
1473            children_count = children.len(),
1474            "Move: checking target position"
1475        );
1476
1477        if position < children.len() {
1478            let occupant = children[position];
1479            let _occupant_kind = &self.arena[occupant].get().kind;
1480            debug!(
1481                ?occupant,
1482                ?_occupant_kind,
1483                "Move: found occupant at target position"
1484            );
1485
1486            if occupant != node {
1487                // Displace occupant to a slot
1488                occupant.insert_before(node, &mut self.arena);
1489                let slot = self.detach_to_slot(occupant);
1490                debug!(?occupant, slot, "Move: detached occupant to slot");
1491                Some(slot)
1492            } else {
1493                debug!("Move: node already at target position");
1494                None
1495            }
1496        } else {
1497            // Fill gaps with placeholder text nodes (per Chawathe semantics)
1498            while new_parent.children(&self.arena).count() < position {
1499                let placeholder = self.arena.new_node(NodeData {
1500                    hash: NodeHash(0),
1501                    kind: HtmlNodeKind::Text,
1502                    properties: HtmlProps { attrs: Vec::new() },
1503                    text: Some(Stem::new()),
1504                });
1505                new_parent.append(placeholder, &mut self.arena);
1506            }
1507
1508            // Re-check after filling gaps
1509            let current_children: Vec<_> = new_parent.children(&self.arena).collect();
1510            if position < current_children.len() {
1511                let occupant = current_children[position];
1512                if occupant != node {
1513                    occupant.insert_before(node, &mut self.arena);
1514                    let slot = self.detach_to_slot(occupant);
1515                    debug!(?occupant, slot, "Move: detached gap-filler to slot");
1516                    Some(slot)
1517                } else {
1518                    None
1519                }
1520            } else {
1521                debug!("Move: appending (no occupant)");
1522                new_parent.append(node, &mut self.arena);
1523                None
1524            }
1525        }
1526    }
1527}
1528
1529/// Convert cinereus ops to path-based Patches.
1530///
1531/// This uses a "shadow tree" approach: we maintain a mutable copy of tree_a
1532/// and simulate applying each operation to it. This lets us compute correct
1533/// paths that account for index shifts from earlier operations.
1534///
1535/// Key insight from Chawathe semantics: INSERT and MOVE do NOT shift siblings.
1536/// They DISPLACE whatever is at the target position into a slot for later use.
1537fn convert_ops_with_shadow<'a, T: DiffTree<Types = HtmlTreeTypes<'a>>>(
1538    ops: Vec<EditOp<HtmlTreeTypes<'a>>>,
1539    tree_a: &Tree<HtmlTreeTypes<'a>>,
1540    tree_b: &T,
1541    matching: &Matching,
1542) -> Result<Vec<Patch<'a>>, DiffError> {
1543    // Create shadow tree with encapsulated state
1544    let mut shadow = ShadowTree::new(tree_a.arena.clone(), tree_a.root);
1545
1546    // Map from tree_b NodeIds to shadow tree NodeIds
1547    // Initially populated from matching (matched nodes)
1548    let mut b_to_shadow: HashMap<NodeId, NodeId> = HashMap::new();
1549    for (a_id, b_id) in matching.pairs() {
1550        b_to_shadow.insert(b_id, a_id);
1551    }
1552
1553    // Collect all nodes that have explicit Insert operations in the ops list.
1554    // These should not be included as children during extract_content_from_tree_b.
1555    let nodes_with_insert_ops: HashSet<NodeId> = ops
1556        .iter()
1557        .filter_map(|op| match op {
1558            EditOp::Insert { node_b, .. } => Some(*node_b),
1559            _ => None,
1560        })
1561        .collect();
1562
1563    let mut result = Vec::new();
1564
1565    #[cfg(test)]
1566    shadow.debug_print_tree("Initial shadow tree");
1567
1568    // Process operations in cinereus order.
1569    // For each op: update shadow tree, THEN compute paths from updated state.
1570    for op in ops {
1571        debug!(?op, "Processing operation");
1572        match op {
1573            EditOp::UpdateProperties {
1574                node_a,
1575                node_b: _,
1576                changes,
1577            } => {
1578                // Path to the node containing the attributes
1579                let path = shadow.compute_path(node_a);
1580
1581                // Convert cinereus PropertyInFinalState to our PropChange
1582                // The changes vec represents the complete final attribute state
1583                let prop_changes: Vec<PropChange> = changes
1584                    .into_iter()
1585                    .map(|c| PropChange {
1586                        name: c.key,
1587                        value: match c.value {
1588                            cinereus::tree::PropValue::Same => None,
1589                            cinereus::tree::PropValue::Different(v) => Some(v),
1590                        },
1591                    })
1592                    .collect();
1593
1594                // cinereus only emits UpdateProperties when there's a real change or full removal
1595                result.push(Patch::UpdateProps {
1596                    path: NodePath(path),
1597                    changes: prop_changes,
1598                });
1599                // No structural change for UpdateProps
1600            }
1601
1602            EditOp::Insert {
1603                node_b,
1604                parent_b,
1605                position,
1606                kind,
1607                ..
1608            } => {
1609                // Find the parent in our shadow tree
1610                let shadow_parent = b_to_shadow
1611                    .get(&parent_b)
1612                    .copied()
1613                    .unwrap_or_else(|| shadow.slot0_content());
1614
1615                // Create a new node in shadow tree (placeholder for structure tracking)
1616                let new_data: NodeData<HtmlTreeTypes> = NodeData {
1617                    hash: NodeHash(0),
1618                    kind: kind.clone(),
1619                    properties: tree_b.properties(node_b).clone(),
1620                    text: tree_b.text(node_b).cloned(),
1621                };
1622                let new_node = shadow.arena.new_node(new_data);
1623
1624                // Insert and handle displacement automatically
1625                let detach_to_slot = shadow.insert_at_position(shadow_parent, position, new_node);
1626
1627                b_to_shadow.insert(node_b, new_node);
1628
1629                // Get reference with position included - this makes Insert consistent with Move!
1630                let at = shadow.get_node_ref_with_position(shadow_parent, position);
1631
1632                // Create the patch based on node kind
1633                match kind {
1634                    HtmlNodeKind::Element(tag, _ns) => {
1635                        let (attrs, children) = extract_content_from_tree_b(
1636                            node_b,
1637                            tree_b,
1638                            &b_to_shadow,
1639                            &nodes_with_insert_ops,
1640                        );
1641                        result.push(Patch::InsertElement {
1642                            at,
1643                            tag: tag.clone(),
1644                            attrs,
1645                            children,
1646                            detach_to_slot,
1647                        });
1648                    }
1649                    HtmlNodeKind::Text => {
1650                        let text = tree_b.text(node_b).cloned().unwrap_or_default();
1651                        result.push(Patch::InsertText {
1652                            at,
1653                            text,
1654                            detach_to_slot,
1655                        });
1656                    }
1657                    HtmlNodeKind::Comment => {
1658                        let text = tree_b.text(node_b).cloned().unwrap_or_default();
1659                        result.push(Patch::InsertComment {
1660                            at,
1661                            text,
1662                            detach_to_slot,
1663                        });
1664                    }
1665                }
1666
1667                #[cfg(test)]
1668                {
1669                    // Debug: print what's at the insertion position after Insert
1670                    debug!(
1671                        ?shadow_parent,
1672                        position,
1673                        ?detach_to_slot,
1674                        "After Insert - checking parent state"
1675                    );
1676                    if let Some(_parent_node) = shadow.arena.get(shadow_parent) {
1677                        let children: Vec<_> = shadow_parent
1678                            .children(&shadow.arena)
1679                            .enumerate()
1680                            .map(|(i, child)| {
1681                                let data = &shadow.arena[child].get();
1682                                (i, child, &data.kind)
1683                            })
1684                            .collect();
1685                        debug!(?children, "Parent children after Insert");
1686                    }
1687                    shadow.debug_print_tree("After Insert");
1688                }
1689            }
1690
1691            EditOp::Delete { node_a } => {
1692                let _node_kind = &tree_a.get(node_a).kind;
1693                debug!(?node_a, ?_node_kind, "Delete operation");
1694
1695                // Get the node reference (path starts with slot number)
1696                let node = shadow.get_node_ref(node_a);
1697
1698                // Detach from tree with placeholder to preserve sibling positions
1699                shadow.detach_with_placeholder(node_a);
1700
1701                result.push(Patch::Remove { node });
1702
1703                #[cfg(test)]
1704                shadow.debug_print_tree("After Delete");
1705            }
1706
1707            EditOp::Move {
1708                node_a,
1709                node_b,
1710                new_parent_b,
1711                new_position,
1712            } => {
1713                // Find new parent in shadow tree
1714                let shadow_new_parent = b_to_shadow
1715                    .get(&new_parent_b)
1716                    .copied()
1717                    .unwrap_or_else(|| shadow.slot0_content());
1718
1719                debug!(
1720                    ?node_a,
1721                    ?new_parent_b,
1722                    ?shadow_new_parent,
1723                    ?new_position,
1724                    "Move: starting"
1725                );
1726
1727                // CRITICAL: If node_a is an ancestor of shadow_new_parent, we need special handling.
1728                // In the DOM, moving a node moves its entire subtree. So we can't directly move
1729                // an ancestor under its descendant - we'd be moving the descendant too!
1730                //
1731                // Solution: First move all children of node_a to node_a's parent position,
1732                // then move the (now childless) node_a under the descendant.
1733                let is_ancestor = shadow.is_ancestor(node_a, shadow_new_parent);
1734                if is_ancestor {
1735                    debug!(
1736                        ?node_a,
1737                        ?shadow_new_parent,
1738                        "Move: ancestor case - reparenting children first"
1739                    );
1740
1741                    // Get the parent of node_a and the position of node_a within that parent
1742                    let node_a_parent = shadow
1743                        .arena
1744                        .get(node_a)
1745                        .and_then(|n| n.parent())
1746                        .expect("node_a should have a parent");
1747                    let node_a_position = node_a_parent
1748                        .children(&shadow.arena)
1749                        .position(|c| c == node_a)
1750                        .unwrap_or(0);
1751
1752                    // Collect children of node_a (they need to be moved to node_a's position)
1753                    let children: Vec<_> = node_a.children(&shadow.arena).collect();
1754
1755                    // Move each child to node_a's parent, right after node_a's position
1756                    // We process in reverse order so positions stay stable
1757                    for (i, child) in children.iter().enumerate().rev() {
1758                        let child_from = shadow.get_node_ref(*child);
1759                        let child_position = node_a_position + 1 + i;
1760
1761                        // Move child in shadow tree (simple detach since we're moving to sibling position)
1762                        shadow.detach_with_placeholder(*child);
1763
1764                        // Insert after node_a
1765                        let siblings: Vec<_> = node_a_parent.children(&shadow.arena).collect();
1766                        if child_position < siblings.len() {
1767                            siblings[child_position].insert_before(*child, &mut shadow.arena);
1768                        } else {
1769                            node_a_parent.append(*child, &mut shadow.arena);
1770                        }
1771
1772                        let child_to =
1773                            shadow.get_node_ref_with_position(node_a_parent, child_position);
1774
1775                        debug!(
1776                            ?child,
1777                            ?child_from,
1778                            ?child_to,
1779                            "Move: reparenting child of ancestor"
1780                        );
1781
1782                        result.push(Patch::Move {
1783                            from: child_from,
1784                            to: child_to,
1785                            detach_to_slot: None,
1786                        });
1787                    }
1788
1789                    #[cfg(test)]
1790                    shadow.debug_print_tree("After reparenting children");
1791                }
1792
1793                // Get source reference (after any child reparenting)
1794                debug!(?node_a, "Move: computing from reference for node");
1795                let from = shadow.get_node_ref(node_a);
1796                debug!(?node_a, ?from, "Move: computed from reference");
1797
1798                // Debug: check what's at the target position BEFORE the move
1799                #[cfg(test)]
1800                if shadow.arena.get(shadow_new_parent).is_some() {
1801                    #[allow(unused_variables)]
1802                    let children: Vec<_> = shadow_new_parent
1803                        .children(&shadow.arena)
1804                        .enumerate()
1805                        .map(|(i, child)| {
1806                            let data = &shadow.arena[child].get();
1807                            (i, child, &data.kind)
1808                        })
1809                        .collect();
1810                    debug!(?children, "Parent children BEFORE Move");
1811                }
1812
1813                // Move node to new position - handles displacement automatically!
1814                let detach_to_slot =
1815                    shadow.move_to_position(node_a, shadow_new_parent, new_position);
1816
1817                // Get target reference with position
1818                let to = shadow.get_node_ref_with_position(shadow_new_parent, new_position);
1819
1820                debug!(?node_a, ?from, ?to, ?detach_to_slot, "Generated Move patch");
1821
1822                result.push(Patch::Move {
1823                    from,
1824                    to,
1825                    detach_to_slot,
1826                });
1827
1828                // Update b_to_shadow
1829                b_to_shadow.insert(node_b, node_a);
1830
1831                #[cfg(test)]
1832                shadow.debug_print_tree("After Move");
1833            }
1834
1835            EditOp::SetText {
1836                node_a,
1837                node_b: _,
1838                text,
1839            } => {
1840                // Path to the text/comment node
1841                let path = shadow.compute_path(node_a);
1842
1843                result.push(Patch::SetText {
1844                    path: NodePath(path),
1845                    text,
1846                });
1847                // No structural change for SetText
1848            }
1849        }
1850    }
1851
1852    Ok(result)
1853}
1854
1855/// Extract attributes and children from a node in tree_b.
1856fn extract_content_from_tree_b<'a, T: DiffTree<Types = HtmlTreeTypes<'a>>>(
1857    node_b: NodeId,
1858    tree_b: &T,
1859    b_to_shadow: &HashMap<NodeId, NodeId>,
1860    nodes_with_insert_ops: &HashSet<NodeId>,
1861) -> (Vec<AttrPair<'a>>, Vec<InsertContent<'a>>) {
1862    let props = tree_b.properties(node_b);
1863    let attrs: Vec<_> = props
1864        .attrs
1865        .iter()
1866        .map(|(k, v)| AttrPair {
1867            name: k.clone(),
1868            value: v.clone(),
1869        })
1870        .collect();
1871
1872    // Get children
1873    let mut children = Vec::new();
1874    for child_id in tree_b.children(node_b) {
1875        // Skip children that:
1876        // 1. Are matched (will be handled by Move operations)
1877        // 2. Have their own Insert operations (not simplified away)
1878        if b_to_shadow.contains_key(&child_id) || nodes_with_insert_ops.contains(&child_id) {
1879            continue;
1880        }
1881
1882        let child_kind = tree_b.kind(child_id);
1883        match child_kind {
1884            HtmlNodeKind::Element(tag, _ns) => {
1885                let (child_attrs, child_children) = extract_content_from_tree_b(
1886                    child_id,
1887                    tree_b,
1888                    b_to_shadow,
1889                    nodes_with_insert_ops,
1890                );
1891                children.push(InsertContent::Element {
1892                    tag: tag.clone(),
1893                    attrs: child_attrs,
1894                    children: child_children,
1895                });
1896            }
1897            HtmlNodeKind::Text => {
1898                let text = tree_b.text(child_id).cloned().unwrap_or_default();
1899                children.push(InsertContent::Text(text));
1900            }
1901            HtmlNodeKind::Comment => {
1902                let text = tree_b.text(child_id).cloned().unwrap_or_default();
1903                children.push(InsertContent::Comment(text));
1904            }
1905        }
1906    }
1907
1908    (attrs, children)
1909}
1910
1911// ============================================================================
1912// into_owned implementations for serialization across lifetime boundaries
1913// ============================================================================
1914
1915impl<'a> AttrPair<'a> {
1916    /// Convert to an owned version with 'static lifetime.
1917    pub fn into_owned(self) -> AttrPair<'static> {
1918        AttrPair {
1919            name: self.name,
1920            value: self.value.into_owned(),
1921        }
1922    }
1923}
1924
1925impl<'a> InsertContent<'a> {
1926    /// Convert to an owned version with 'static lifetime.
1927    pub fn into_owned(self) -> InsertContent<'static> {
1928        match self {
1929            InsertContent::Element {
1930                tag,
1931                attrs,
1932                children,
1933            } => InsertContent::Element {
1934                tag,
1935                attrs: attrs.into_iter().map(|a| a.into_owned()).collect(),
1936                children: children.into_iter().map(|c| c.into_owned()).collect(),
1937            },
1938            InsertContent::Text(s) => InsertContent::Text(s.into_owned()),
1939            InsertContent::Comment(s) => InsertContent::Comment(s.into_owned()),
1940        }
1941    }
1942}
1943
1944impl<'a> PropChange<'a> {
1945    /// Convert to an owned version with 'static lifetime.
1946    pub fn into_owned(self) -> PropChange<'static> {
1947        PropChange {
1948            name: self.name,
1949            value: self.value.map(|s| s.into_owned()),
1950        }
1951    }
1952}
1953
1954impl<'a> Patch<'a> {
1955    /// Convert to an owned version with 'static lifetime.
1956    pub fn into_owned(self) -> Patch<'static> {
1957        match self {
1958            Patch::InsertElement {
1959                at,
1960                tag,
1961                attrs,
1962                children,
1963                detach_to_slot,
1964            } => Patch::InsertElement {
1965                at,
1966                tag,
1967                attrs: attrs.into_iter().map(|a| a.into_owned()).collect(),
1968                children: children.into_iter().map(|c| c.into_owned()).collect(),
1969                detach_to_slot,
1970            },
1971            Patch::InsertText {
1972                at,
1973                text,
1974                detach_to_slot,
1975            } => Patch::InsertText {
1976                at,
1977                text: text.into_owned(),
1978                detach_to_slot,
1979            },
1980            Patch::InsertComment {
1981                at,
1982                text,
1983                detach_to_slot,
1984            } => Patch::InsertComment {
1985                at,
1986                text: text.into_owned(),
1987                detach_to_slot,
1988            },
1989            Patch::Remove { node } => Patch::Remove { node },
1990            Patch::SetText { path, text } => Patch::SetText {
1991                path,
1992                text: text.into_owned(),
1993            },
1994            Patch::SetAttribute { path, name, value } => Patch::SetAttribute {
1995                path,
1996                name,
1997                value: value.into_owned(),
1998            },
1999            Patch::RemoveAttribute { path, name } => Patch::RemoveAttribute { path, name },
2000            Patch::Move {
2001                from,
2002                to,
2003                detach_to_slot,
2004            } => Patch::Move {
2005                from,
2006                to,
2007                detach_to_slot,
2008            },
2009            Patch::UpdateProps { path, changes } => Patch::UpdateProps {
2010                path,
2011                changes: changes.into_iter().map(|c| c.into_owned()).collect(),
2012            },
2013            Patch::OpaqueChanged { path, content } => Patch::OpaqueChanged {
2014                path,
2015                content: content.into_owned(),
2016            },
2017        }
2018    }
2019}
2020
2021#[cfg(test)]
2022mod tests {
2023    use super::*;
2024    use crate::dom;
2025    use facet_testhelpers::test;
2026
2027    /// Helper to create a StrTendril from a string
2028    fn t(s: &str) -> StrTendril {
2029        StrTendril::from(s)
2030    }
2031
2032    #[test]
2033    fn test_build_tree_simple() {
2034        let html = t("<html><body><div>hello</div></body></html>");
2035        let doc = dom::parse(&html);
2036        let tree = build_tree_from_arena(&doc);
2037
2038        // Root is body element (build_tree_from_arena uses body as root)
2039        let root_data = tree.get(tree.root);
2040        assert!(matches!(&root_data.kind, HtmlNodeKind::Element(t, _) if t.as_ref() == "body"));
2041
2042        // One child (div)
2043        assert_eq!(tree.child_count(tree.root), 1);
2044    }
2045
2046    #[test]
2047    fn test_diff_text_change() {
2048        let old_html = t("<html><body><div>old</div></body></html>");
2049        let new_html = t("<html><body><div>new</div></body></html>");
2050
2051        let old = dom::parse(&old_html);
2052        let new = dom::parse(&new_html);
2053        let patches = diff(&old, &new).unwrap();
2054
2055        // Should have a SetText patch for the text change
2056        let has_text_update = patches.iter().any(|p| matches!(p, Patch::SetText { .. }));
2057        assert!(
2058            has_text_update,
2059            "Expected SetText patch, got: {:?}",
2060            patches
2061        );
2062    }
2063
2064    #[test]
2065    fn test_diff_attr_change() {
2066        let old_html = t(r#"<html><body><div class="foo"></div></body></html>"#);
2067        let new_html = t(r#"<html><body><div class="bar"></div></body></html>"#);
2068
2069        let old = dom::parse(&old_html);
2070        let new = dom::parse(&new_html);
2071        let patches = diff(&old, &new).unwrap();
2072
2073        let has_attr_update = patches.iter().any(|p| {
2074            matches!(p, Patch::UpdateProps { changes, .. }
2075                if changes.iter().any(|c| matches!(c.name, PropKey::Attr(ref q) if q.local.as_ref() == "class")))
2076        });
2077        assert!(
2078            has_attr_update,
2079            "Expected attr update patch, got: {:?}",
2080            patches
2081        );
2082    }
2083
2084    #[test]
2085    fn test_diff_remove_all_children() {
2086        // Reproduce fuzzer failure: body with child -> body with no children
2087        let old_html = t("<html><body><span></span></body></html>");
2088        let new_html = t("<html><body></body></html>");
2089
2090        let old = dom::parse(&old_html);
2091        let new = dom::parse(&new_html);
2092        let patches = diff(&old, &new).unwrap();
2093        debug!("Patches: {:#?}", patches);
2094
2095        // Should be able to apply the patches
2096        let mut doc = dom::parse(&old_html);
2097        doc.apply_patches(patches).expect("apply should succeed");
2098
2099        let result = doc.to_html();
2100        let expected = dom::parse(&new_html).to_html();
2101        assert_eq!(result, expected, "HTML output should match");
2102    }
2103
2104    #[test]
2105    fn test_diff_complex_fuzzer_case() {
2106        // Fuzzer found: <body><strong>old_text</strong></body> -> <body>new_text<strong>updated</strong></body>
2107        let old_html = t("<html><body><strong>old</strong></body></html>");
2108        let new_html = t("<html><body>new_text<strong>updated</strong></body></html>");
2109
2110        let old = dom::parse(&old_html);
2111        let new = dom::parse(&new_html);
2112        let patches = diff(&old, &new).unwrap();
2113        debug!("Patches: {:#?}", patches);
2114
2115        let mut doc = dom::parse(&old_html);
2116        doc.apply_patches(patches).expect("apply should succeed");
2117
2118        let result = doc.to_html();
2119        let expected = dom::parse(&new_html).to_html();
2120        debug!("Result: {}", result);
2121        debug!("Expected: {}", expected);
2122        assert_eq!(result, expected, "HTML output should match");
2123    }
2124
2125    #[test]
2126    fn test_diff_actual_fuzzer_crash() {
2127        // Actual fuzzer crash case (simplified):
2128        // Old: <strong>text1</strong><strong>text2</strong><img>
2129        // New: text3<strong>text4</strong>
2130        let old_html =
2131            t("<html><body><strong>text1</strong><strong>text2</strong><img></body></html>");
2132        let new_html = t("<html><body>text3<strong>text4</strong></body></html>");
2133
2134        let old = dom::parse(&old_html);
2135        let new = dom::parse(&new_html);
2136        let patches = diff(&old, &new).unwrap();
2137        debug!("Patches: {:#?}", patches);
2138
2139        let mut doc = dom::parse(&old_html);
2140        doc.apply_patches(patches).expect("apply should succeed");
2141
2142        let result = doc.to_html();
2143        let expected = dom::parse(&new_html).to_html();
2144        debug!("Result: {}", result);
2145        debug!("Expected: {}", expected);
2146        assert_eq!(result, expected, "HTML output should match");
2147    }
2148
2149    #[test]
2150    fn test_fuzzer_special_chars() {
2151        trace!("what");
2152
2153        // Test with actual fuzzer input that has special chars
2154        // html5ever parses "<jva       xx a >" as an element, creating nested structure
2155        // The bug: UpdateProps at [1,0] followed by Remove at [1,0] - we update text then delete it!
2156        // This appears to be a path tracking bug when handling complex displacement scenarios.
2157        let old_html = t(
2158            r#"<html><body><strong>n<&nhnnz"""" v</strong><strong>< bit<jva       xx a ></strong><img src="n" alt="v"></body></html>"#,
2159        );
2160        let new_html = t(r#"<html><body>n<strong>aaa</strong></body></html>"#);
2161
2162        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2163        debug!("Patches: {:#?}", patches);
2164
2165        let mut doc = dom::parse(&old_html);
2166        trace!("Old tree: {:#?}", doc);
2167
2168        doc.apply_patches(patches).expect("apply failed");
2169
2170        let result = doc.to_html();
2171        let expected = dom::parse(&new_html).to_html();
2172
2173        debug!("Result: {}", result);
2174        debug!("Expected: {}", expected);
2175        assert_eq!(result, expected, "HTML output should match");
2176    }
2177
2178    #[test]
2179    fn test_fuzzer_img_li_roundtrip() {
2180        // Fuzzer found roundtrip failure with img and li elements
2181        let old_html = t(
2182            r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html><body><p>Unclosed paragraph<span class=""></span><div>Inside P which browsers will auto-close</div><span>Unclosed span<div>Block in span</div></body></html>"#,
2183        );
2184        let new_html = t(
2185            r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html><body><img src="vvv" alt="ttt">d <li>d<<<&<<a"d <<<</li></body></html>"#,
2186        );
2187
2188        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2189        debug!("Patches: {:#?}", patches);
2190
2191        let mut doc = dom::parse(&old_html);
2192        doc.apply_patches(patches).expect("apply failed");
2193
2194        let result = doc.to_html();
2195        let expected_doc = dom::parse(&new_html);
2196        let expected = expected_doc.to_html();
2197
2198        debug!("Result: {}", result);
2199        debug!("Expected: {}", expected);
2200        assert_eq!(result, expected, "HTML output should match");
2201    }
2202
2203    #[test]
2204    fn test_fuzzer_nested_ul_remove() {
2205        // Fuzzer found issue with nested ul elements - img not being removed correctly
2206        let old_html = t(
2207            r#"<!DOCTYPE html><html><body><ul class="h"><ul class="z"><img src="vvv" alt="wvv"><ul class="h"><img src="vvv"></ul></ul></ul></body></html>"#,
2208        );
2209        let new_html = t(
2210            r#"<!DOCTYPE html><html><body><ul class="h"><ul class="h"></ul><ul class="q"><img src="aaa"></ul></ul></body></html>"#,
2211        );
2212
2213        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2214        debug!("Patches: {:#?}", patches);
2215
2216        let mut doc = dom::parse(&old_html);
2217        doc.apply_patches(patches).expect("apply failed");
2218
2219        let result = doc.to_html();
2220        let expected_doc = dom::parse(&new_html);
2221        let expected = expected_doc.to_html();
2222
2223        debug!("Result: {}", result);
2224        debug!("Expected: {}", expected);
2225        assert_eq!(result, expected, "HTML output should match");
2226    }
2227
2228    #[test]
2229    fn test_fuzzer_em_li_navigate_text() {
2230        // Fuzzer found "Insert: cannot navigate through text node" error
2231        // The fuzzer generates unescaped HTML which html5ever parses as actual elements
2232        let old_html = t(r#"<!DOCTYPE html><html><body><em> <v<      << v</em></body></html>"#);
2233        let new_html =
2234            t(r#"<!DOCTYPE html><html><body><li>a< <v<      <<</li><img src=""></body></html>"#);
2235
2236        let old_doc = dom::parse(&old_html);
2237        let new_doc = dom::parse(&new_html);
2238        debug!("Old HTML parsed: {:#?}", old_doc);
2239        debug!("New HTML parsed: {:#?}", new_doc);
2240
2241        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2242        debug!("Patches: {:#?}", patches);
2243
2244        let mut doc = dom::parse(&old_html);
2245        doc.apply_patches(patches).expect("apply failed");
2246
2247        let result = doc.to_html();
2248        let expected = dom::parse(&new_html).to_html();
2249
2250        debug!("Result: {}", result);
2251        debug!("Expected: {}", expected);
2252        assert_eq!(result, expected, "HTML output should match");
2253    }
2254
2255    #[test]
2256    fn test_fuzzer_nested_ol_patch_order() {
2257        // Fuzzer found "patch at index 4 is out of order" error
2258        let old_html = t(r#"<!DOCTYPE html><html><body><ol start="0"></ol></body></html>"#);
2259        let new_html = t(
2260            r#"<!DOCTYPE html><html><body><ol start="255"></ol><ol start="93"></ol><ol start="91"><ol start="1"><a href="vaaaaaaaaaaaaa"></a></ol></ol></body></html>"#,
2261        );
2262
2263        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2264        debug!("Patches: {:#?}", patches);
2265
2266        let mut doc = dom::parse(&old_html);
2267        doc.apply_patches(patches).expect("apply failed");
2268
2269        let result = doc.to_html();
2270        let expected_doc = dom::parse(&new_html);
2271        let expected = expected_doc.to_html();
2272
2273        debug!("Result: {}", result);
2274        debug!("Expected: {}", expected);
2275        assert_eq!(result, expected, "HTML output should match");
2276    }
2277
2278    #[test]
2279    fn test_fuzzer_slot_contains_text() {
2280        // Fuzzer found "Move: slot contains text, cannot navigate to child" error
2281        let old_html = t(
2282            r#"<!DOCTYPE html><html><body><article><code><</code><code><</code><code><</code><code><</code></article></body></html>"#,
2283        );
2284        let new_html = t(
2285            r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html><body><code><</code><code><</code><code><</code><code><</code><article><code><</code><code><</code><h2><<<<<<<<<<<<<</h2></article></body></html>"#,
2286        );
2287
2288        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2289        debug!("Patches: {:#?}", patches);
2290
2291        let mut doc = dom::parse(&old_html);
2292        doc.apply_patches(patches).expect("apply failed");
2293
2294        let result = doc.to_html();
2295        let expected = dom::parse(&new_html).to_html();
2296
2297        debug!("Result: {}", result);
2298        debug!("Expected: {}", expected);
2299        assert_eq!(result, expected, "HTML output should match");
2300    }
2301
2302    #[test]
2303    fn test_fuzzer_article_code_move() {
2304        // Fuzzer found "Move: slot contains text, cannot navigate to child"
2305        // Fuzzer generates unescaped HTML
2306        let old_html = t(
2307            r#"<!DOCTYPE html><html><body><article><code><</code><code><</code><code><</code><code><</code><article><article><code><</code></article></article></article></body></html>"#,
2308        );
2309        let new_html = t(
2310            r#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html><body><code><</code><code><</code><code><</code><code><</code><article><code><</code><code><</code><h2><<<<<<<<<<<<<</h2></article></body></html>"#,
2311        );
2312
2313        let patches = super::diff_html(&old_html, &new_html).expect("diff failed");
2314        debug!("Patches: {:#?}", patches);
2315
2316        let mut doc = dom::parse(&old_html);
2317        doc.apply_patches(patches).expect("apply failed");
2318
2319        let result = doc.to_html();
2320        let expected = dom::parse(&new_html).to_html();
2321
2322        debug!("Result: {}", result);
2323        debug!("Expected: {}", expected);
2324        assert_eq!(result, expected, "HTML output should match");
2325    }
2326
2327    #[test]
2328    fn test_arena_dom_diff() {
2329        // Test diffing with arena_dom documents
2330        let old_html = t("<html><body><div>Old content</div></body></html>");
2331        let new_html = t("<html><body><div>New content</div></body></html>");
2332
2333        let old_doc = dom::parse(&old_html);
2334        let new_doc = dom::parse(&new_html);
2335
2336        let patches = diff(&old_doc, &new_doc).expect("diff failed");
2337        debug!("Patches: {:#?}", patches);
2338
2339        // Verify correctness by applying patches and comparing result
2340        let mut doc = dom::parse(&old_html);
2341        doc.apply_patches(patches).expect("apply failed");
2342        let result = doc.to_html();
2343        let expected = dom::parse(&new_html).to_html();
2344        assert_eq!(result, expected, "HTML output should match");
2345    }
2346
2347    #[test]
2348    fn test_arena_dom_diff_add_element() {
2349        let old_html = t("<html><body><div>Content</div></body></html>");
2350        let new_html = t("<html><body><div>Content</div><p>New paragraph</p></body></html>");
2351
2352        let old_doc = dom::parse(&old_html);
2353        let new_doc = dom::parse(&new_html);
2354
2355        let patches = diff(&old_doc, &new_doc).expect("diff failed");
2356        debug!("Patches: {:#?}", patches);
2357
2358        // Should have an InsertElement patch
2359        assert!(!patches.is_empty());
2360        let has_insert = patches
2361            .iter()
2362            .any(|p| matches!(p, Patch::InsertElement { .. }));
2363        assert!(has_insert, "Should have InsertElement patch");
2364    }
2365
2366    #[test]
2367    fn test_patch_serialization() {
2368        use crate::diff_html;
2369
2370        let old_html = t(r#"<html><body><div>Content</div></body></html>"#);
2371        let new_html = t(r#"<html><body><div class="highlight">Content</div></body></html>"#);
2372
2373        let patches = diff_html(&old_html, &new_html).expect("diff should work");
2374
2375        let json = facet_json::to_string(&patches).expect("serialization should work");
2376        let roundtrip: Vec<Patch> =
2377            facet_json::from_str(&json).expect("deserialization should work");
2378        assert_eq!(patches, roundtrip);
2379    }
2380
2381    #[test]
2382    fn test_fuzz_seed_0_template_0() {
2383        use crate::diff_html;
2384
2385        let old_html = t(r##"<article>
2386    <h1>Article Title</h1>
2387    <p>First paragraph with <strong>bold</strong> and <em>italic</em> text.</p>
2388    <p>Second paragraph with a <a href="#">link</a>.</p>
2389  </article>"##);
2390
2391        let new_html = t(r##"<article>
2392
2393    <p>First paragraph with <strong>bold</strong> and content</p>
2394    <p data-test="hidden">Second paragraph with a .</p>
2395  </article>"##);
2396
2397        let patches = diff_html(&old_html, &new_html).expect("diff should work");
2398        debug!("Patches: {:#?}", patches);
2399
2400        // Check for slot references (first path element > 0 means it's in a non-main slot)
2401        for (i, patch) in patches.iter().enumerate() {
2402            debug!("Patch {}: {:?}", i, patch);
2403            if let Patch::Move { from, to, .. } = patch {
2404                let from_slot = from.0.0.first().copied().unwrap_or(0);
2405                let to_slot = to.0.0.first().copied().unwrap_or(0);
2406                if from_slot > 0 {
2407                    debug!("  -> Move FROM slot {}", from_slot);
2408                }
2409                if to_slot > 0 {
2410                    debug!("  -> Move TO slot {}", to_slot);
2411                }
2412            }
2413        }
2414    }
2415
2416    #[test]
2417    fn test_fuzz_seed_27_template_4() {
2418        use crate::diff_html;
2419        use crate::dom;
2420
2421        // This test reproduces a bug where the diff algorithm generates paths like [0,2,3,0]
2422        // but after all Move operations, [0,2] is a text node (which has no children).
2423        // The Delete operation tries to navigate to child 3 of a text node and fails.
2424
2425        let old_html = r##"<div>
2426    <h3>Features</h3>
2427    <ul>
2428      <li>Feature one with <code>code</code></li>
2429      <li>Feature two with <strong>emphasis</strong></li>
2430      <li>Feature three</li>
2431    </ul>
2432  </div>"##;
2433
2434        let new_html = r##"<div title="primary">
2435    <h3>Features</h3>
2436    <ul>
2437      <li>Feature one with <code>item</code></li>
2438
2439
2440    </ul>
2441  </div>"##;
2442
2443        let old_tendril = t(old_html);
2444        let new_tendril = t(new_html);
2445        let patches = diff_html(&old_tendril, &new_tendril).expect("diff should work");
2446        debug!("Patches: {:#?}", patches);
2447
2448        for (i, patch) in patches.iter().enumerate() {
2449            debug!("Patch {}: {:?}", i, patch);
2450        }
2451
2452        // Apply all patches at once
2453        let full_html = t(&format!("<html><body>{}</body></html>", old_html));
2454        let mut doc = dom::parse(&full_html);
2455        if let Err(e) = doc.apply_patches(patches.clone()) {
2456            panic!("Patches failed: {:?}", e);
2457        }
2458    }
2459
2460    #[test]
2461    fn measure_position_calls_xxl() {
2462        use cinereus::{get_position_stats, reset_position_counters};
2463
2464        let xxl_html = include_str!("../tests/fixtures/xxl.html");
2465        let modified = xxl_html.replacen("<div", "<div class=\"modified\"", 1);
2466
2467        // Reset counters
2468        reset_position_counters();
2469
2470        // Do the diff
2471        let old_tendril = t(xxl_html);
2472        let new_tendril = t(&modified);
2473        let old = dom::parse(&old_tendril);
2474        let new = dom::parse(&new_tendril);
2475        let _patches = diff(&old, &new).expect("diff failed");
2476
2477        // Get stats
2478        let (calls, scanned) = get_position_stats();
2479
2480        trace!("\n=== XXL document diff position() stats ===");
2481        trace!("  position() calls: {}", calls);
2482        trace!("  siblings scanned: {}", scanned);
2483        if calls > 0 {
2484            trace!(
2485                "  avg siblings per call: {:.2}",
2486                scanned as f64 / calls as f64
2487            );
2488        }
2489        trace!("===========================================\n");
2490    }
2491
2492    /// Regression test for facet-json bug: escape sequences after multi-byte UTF-8
2493    /// were incorrectly deserialized. Fixed in https://github.com/facet-rs/facet/pull/1892
2494    #[test]
2495    fn test_facet_json_unicode_escape_bug() {
2496        // When Unicode characters precede an escape sequence like \n,
2497        // facet-json incorrectly deserializes it as literal backslash-n
2498        let original = "中文\n".to_string();
2499        debug!("Original bytes: {:?}", original.as_bytes());
2500        // Original bytes: [228, 184, 173, 230, 150, 135, 10]
2501
2502        let json = facet_json::to_string(&original).expect("serialize");
2503        debug!("JSON: {}", json);
2504
2505        let roundtrip: String = facet_json::from_str(&json).expect("deserialize");
2506        debug!("Roundtrip bytes: {:?}", roundtrip.as_bytes());
2507        // BUG: Roundtrip bytes: [228, 184, 173, 230, 150, 135, 92, 110]
2508        //                                                    ^^ literal '\n'
2509
2510        assert_eq!(original, roundtrip, "String should roundtrip through JSON");
2511    }
2512
2513    #[test]
2514    fn test_string_ascii_only_multiple_newlines() {
2515        // ASCII-only strings with escapes work correctly
2516        let original = "\n  hello\n  world\n".to_string();
2517
2518        let json = facet_json::to_string(&original).expect("serialize");
2519        let roundtrip: String = facet_json::from_str(&json).expect("deserialize");
2520
2521        assert_eq!(
2522            original, roundtrip,
2523            "ASCII string should roundtrip through JSON"
2524        );
2525    }
2526
2527    // =========================================================================
2528    // HTML5 Spec: Foster Parenting Tests
2529    // https://html.spec.whatwg.org/multipage/parsing.html#foster-parent
2530    //
2531    // When content appears in invalid positions inside tables (e.g., text or
2532    // inline elements directly inside <table>, <tbody>, <tr>, etc.), the HTML5
2533    // spec requires "foster parenting" - moving that content before the table.
2534    // =========================================================================
2535
2536    /// Basic foster parenting: element inside table
2537    #[test]
2538    fn test_foster_parent_element_in_table() {
2539        use crate::dom;
2540
2541        // Invalid HTML: <span> directly inside <table> triggers foster parenting
2542        let html = "<table><span>hello</span><tr><td>cell</td></tr></table>";
2543        let full_html = t(&format!("<html><body>{}</body></html>", html));
2544        let doc = dom::parse(&full_html);
2545        let result = doc.to_html();
2546
2547        // The span should be foster-parented BEFORE the table
2548        assert!(
2549            result.contains("<span>hello</span><table>"),
2550            "Span should appear before table, got: {}",
2551            result
2552        );
2553    }
2554
2555    /// Foster parenting: text node inside table
2556    #[test]
2557    fn test_foster_parent_text_in_table() {
2558        use crate::dom;
2559
2560        // Text directly in table gets foster parented
2561        let html = "<table>orphan text<tr><td>cell</td></tr></table>";
2562        let full_html = t(&format!("<html><body>{}</body></html>", html));
2563        let doc = dom::parse(&full_html);
2564        let result = doc.to_html();
2565
2566        // The text should appear before the table
2567        assert!(
2568            result.contains("orphan text<table>"),
2569            "Text should be foster-parented before table, got: {}",
2570            result
2571        );
2572    }
2573
2574    /// Foster parenting: multiple items
2575    #[test]
2576    fn test_foster_parent_multiple_items() {
2577        use crate::dom;
2578
2579        // Multiple items that need foster parenting
2580        let html = "<table><b>bold</b><i>italic</i><tr><td>cell</td></tr></table>";
2581        let full_html = t(&format!("<html><body>{}</body></html>", html));
2582        let doc = dom::parse(&full_html);
2583        let result = doc.to_html();
2584
2585        // Both elements should be before the table, in order
2586        assert!(
2587            result.contains("<b>bold</b><i>italic</i><table>"),
2588            "Both elements should be foster-parented before table, got: {}",
2589            result
2590        );
2591    }
2592
2593    /// HTML5 spec example: `<table><b><tr><td>aaa</td></tr>bbb</table>ccc`
2594    /// From: https://html.spec.whatwg.org/multipage/parsing.html#foster-parent
2595    #[test]
2596    fn test_foster_parent_spec_example() {
2597        use crate::dom;
2598
2599        // This is the exact example from the HTML5 spec
2600        let html = "<table><b><tr><td>aaa</td></tr>bbb</table>ccc";
2601        let full_html = t(&format!("<html><body>{}</body></html>", html));
2602        let doc = dom::parse(&full_html);
2603        let result = doc.to_html();
2604
2605        // Per the spec, this should produce:
2606        // <b></b><b>bbb</b><table><tbody><tr><td>aaa</td></tr></tbody></table><b>ccc</b>
2607        //
2608        // The <b> is opened before table (foster parented), empty because it's
2609        // immediately followed by <tr>. The "bbb" text after </tr> is also foster
2610        // parented (with the <b> formatting inherited). The "ccc" after </table>
2611        // keeps the <b> formatting.
2612
2613        // Check key structural elements
2614        assert!(
2615            result.contains("<tbody><tr><td>aaa</td></tr></tbody>"),
2616            "Table content should be properly structured, got: {}",
2617            result
2618        );
2619        assert!(
2620            result.contains("bbb"),
2621            "Foster parented text 'bbb' should exist, got: {}",
2622            result
2623        );
2624        assert!(
2625            result.contains("ccc"),
2626            "Text 'ccc' after table should exist, got: {}",
2627            result
2628        );
2629    }
2630
2631    /// Foster parenting with nested tables
2632    #[test]
2633    fn test_foster_parent_nested_tables() {
2634        use crate::dom;
2635
2636        // Inner table content should only affect the innermost table
2637        let html =
2638            "<table><tr><td><table><span>inner</span><tr><td>x</td></tr></table></td></tr></table>";
2639        let full_html = t(&format!("<html><body>{}</body></html>", html));
2640        let doc = dom::parse(&full_html);
2641        let result = doc.to_html();
2642
2643        // The span should be foster parented before the inner table, not the outer
2644        assert!(
2645            result.contains("<span>inner</span><table>"),
2646            "Span should be foster-parented before inner table, got: {}",
2647            result
2648        );
2649        // The outer table structure should remain intact
2650        assert!(
2651            result.contains("<table><tbody><tr><td>"),
2652            "Outer table structure should be preserved, got: {}",
2653            result
2654        );
2655    }
2656
2657    /// Foster parenting: content in tbody
2658    #[test]
2659    fn test_foster_parent_content_in_tbody() {
2660        use crate::dom;
2661
2662        // Content directly in tbody (outside tr) also triggers foster parenting
2663        let html = "<table><tbody><span>orphan</span><tr><td>cell</td></tr></tbody></table>";
2664        let full_html = t(&format!("<html><body>{}</body></html>", html));
2665        let doc = dom::parse(&full_html);
2666        let result = doc.to_html();
2667
2668        // The span should be foster parented before the table
2669        assert!(
2670            result.contains("<span>orphan</span><table>"),
2671            "Span in tbody should be foster-parented before table, got: {}",
2672            result
2673        );
2674    }
2675
2676    /// Foster parenting: content in tr (outside td)
2677    #[test]
2678    fn test_foster_parent_content_in_tr() {
2679        use crate::dom;
2680
2681        // Content directly in tr (outside td/th) triggers foster parenting
2682        let html = "<table><tr><span>orphan</span><td>cell</td></tr></table>";
2683        let full_html = t(&format!("<html><body>{}</body></html>", html));
2684        let doc = dom::parse(&full_html);
2685        let result = doc.to_html();
2686
2687        // The span should be foster parented before the table
2688        assert!(
2689            result.contains("<span>orphan</span><table>"),
2690            "Span in tr should be foster-parented before table, got: {}",
2691            result
2692        );
2693    }
2694
2695    /// Foster parenting preserves element order
2696    #[test]
2697    fn test_foster_parent_preserves_order() {
2698        use crate::dom;
2699
2700        let html = "<table><a>1</a><b>2</b><c>3</c><tr><td>x</td></tr></table>";
2701        let full_html = t(&format!("<html><body>{}</body></html>", html));
2702        let doc = dom::parse(&full_html);
2703        let result = doc.to_html();
2704
2705        // Elements should appear in the same order they were in the source
2706        let a_pos = result.find("<a>1</a>").expect("should find a");
2707        let b_pos = result.find("<b>2</b>").expect("should find b");
2708        let c_pos = result.find("<c>3</c>").expect("should find c");
2709        let table_pos = result.find("<table>").expect("should find table");
2710
2711        assert!(
2712            a_pos < b_pos && b_pos < c_pos && c_pos < table_pos,
2713            "Elements should be in order before table, got: {}",
2714            result
2715        );
2716    }
2717
2718    /// Foster parenting: whitespace handling
2719    #[test]
2720    fn test_foster_parent_whitespace() {
2721        use crate::dom;
2722
2723        // Whitespace-only text nodes in certain positions are handled specially
2724        // but significant text gets foster parented
2725        let html = "<table>  significant  <tr><td>cell</td></tr></table>";
2726        let full_html = t(&format!("<html><body>{}</body></html>", html));
2727        let doc = dom::parse(&full_html);
2728        let result = doc.to_html();
2729
2730        // The text should be before the table
2731        assert!(
2732            result.contains("significant") && result.find("significant") < result.find("<table>"),
2733            "Significant text should be foster-parented before table, got: {}",
2734            result
2735        );
2736    }
2737
2738    /// Regression test for patch JSON roundtrip with Unicode.
2739    /// Fixed in https://github.com/facet-rs/facet/pull/1892
2740    #[test]
2741    fn test_patch_json_roundtrip_with_unicode() {
2742        use crate::diff_html;
2743        use crate::dom;
2744
2745        let old_html = t("<my-header>\n      <h1>Title</h1>\n    </my-header>");
2746        let new_html = t("<my-header>\n      中文App Title\n    </my-header>");
2747
2748        let patches = diff_html(&old_html, &new_html).expect("diff should work");
2749        let json = facet_json::to_string(&patches).expect("serialization should work");
2750        let roundtrip: Vec<Patch> =
2751            facet_json::from_str(&json).expect("deserialization should work");
2752
2753        assert_eq!(patches, roundtrip, "Patches should roundtrip through JSON");
2754
2755        let old_full = t(&format!(
2756            "<html><body>{}</body></html>",
2757            old_html.as_ref() as &str
2758        ));
2759        let mut doc = dom::parse(&old_full);
2760        doc.apply_patches(roundtrip).expect("apply should succeed");
2761
2762        let result = doc.to_html();
2763        let new_full = t(&format!(
2764            "<html><body>{}</body></html>",
2765            new_html.as_ref() as &str
2766        ));
2767        let expected = dom::parse(&new_full).to_html();
2768        assert_eq!(result, expected, "HTML output should match");
2769    }
2770
2771    /// Test SVG inside strong - reproduction of fuzz seed 1 failure
2772    #[test]
2773    fn test_svg_inside_strong() {
2774        use crate::diff_html;
2775        use crate::dom;
2776
2777        // Simplified version of the failing case
2778        let old_html = r#"<p>Text with <strong>bold</strong> word.</p>"#;
2779        let new_html = r#"<p>Text with <strong><svg width="29" height="21"><circle></circle></svg></strong> word.</p>"#;
2780
2781        trace!("=== Old HTML ===");
2782        trace!("{}", old_html);
2783        trace!("\n=== New HTML ===");
2784        trace!("{}", new_html);
2785
2786        let old_full = t(&format!("<html><body>{}</body></html>", old_html));
2787        let new_full = t(&format!("<html><body>{}</body></html>", new_html));
2788        let old_parsed = dom::parse(&old_full);
2789        let new_parsed = dom::parse(&new_full);
2790
2791        trace!("\n=== Old parsed ===");
2792        trace!("{}", old_parsed.to_html());
2793        trace!("\n=== New parsed ===");
2794        trace!("{}", new_parsed.to_html());
2795
2796        let patches = diff_html(&old_full, &new_full).expect("diff should work");
2797
2798        trace!("\n=== Patches ===");
2799        for (i, patch) in patches.iter().enumerate() {
2800            trace!("{}: {:?}", i, patch);
2801        }
2802
2803        let mut doc = dom::parse(&old_full);
2804        doc.apply_patches(patches).expect("apply should succeed");
2805
2806        let result = doc.to_html();
2807        let expected = new_parsed.to_html();
2808
2809        trace!("\n=== After patches ===");
2810        trace!("{}", result);
2811        trace!("\n=== Expected ===");
2812        trace!("{}", expected);
2813
2814        assert_eq!(result, expected, "HTML output should match");
2815    }
2816
2817    /// Test: adoption agency algorithm difference with block elements
2818    ///
2819    /// When you put a block element (like `<section>`) inside a formatting element
2820    /// (like `<strong>`) inside `<p>`, HTML5 requires complex "adoption agency"
2821    /// handling. We should preserve the formatting element across the block boundary
2822    /// so that subsequent SVG is wrapped correctly.
2823    ///
2824    /// Browser: maintains `<strong>` context after `</section>`, wrapping subsequent SVG
2825    /// html5ever: now matches browser behavior here.
2826    #[test]
2827    #[ignore = "fixed in fork, but not in upstream"]
2828    fn test_adoption_agency_block_in_formatting() {
2829        use crate::dom;
2830
2831        // When you put <section> inside <p>, the browser closes the <p> first
2832        // This tests HTML5 adoption agency algorithm behavior
2833        let html = r#"<p>First with <strong>text<section>break</section><svg width="29"><circle></circle></svg></strong> end.</p>"#;
2834
2835        let full_html = t(&format!("<html><body>{}</body></html>", html));
2836        let doc = dom::parse(&full_html);
2837        let result = doc.to_html();
2838
2839        // The <section> should have been moved outside <p> due to HTML5 parsing rules
2840        assert!(
2841            !result.contains("<p><strong>text<section>"),
2842            "Section should not remain inside p>strong (invalid nesting), got: {}",
2843            result
2844        );
2845
2846        // Verify our output matches the corrected html5ever behavior.
2847        // The SVG should be wrapped by the active formatting element.
2848        assert_eq!(
2849            result,
2850            "<html><head></head><body><p>First with <strong>text</strong></p>\
2851             <section><strong>break</strong></section>\
2852             <strong><svg width=\"29\"><circle></circle></svg></strong> end.<p></p></body></html>",
2853            "Output should match html5ever behavior"
2854        );
2855    }
2856
2857    /// Regression test for OOM caused by cycle in parent chain.
2858    /// When moving a node under its own descendant, we must not create a cycle.
2859    #[test]
2860    fn test_move_parent_under_child_no_cycle() {
2861        // Build a tree: A -> B -> C (A is grandparent of C)
2862        // Then try to move A to position 0 under B, displacing C.
2863        // This triggers insert_before which doesn't check for cycles.
2864        let mut arena: indextree::Arena<NodeData<HtmlTreeTypes>> = indextree::Arena::new();
2865
2866        // Create nodes: body -> div_a -> div_b -> div_c
2867        let body = arena.new_node(NodeData {
2868            hash: NodeHash(0),
2869            kind: HtmlNodeKind::Element(LocalName::from("body"), Namespace::Html),
2870            properties: HtmlProps::default(),
2871            text: None,
2872        });
2873        let div_a = arena.new_node(NodeData {
2874            hash: NodeHash(0),
2875            kind: HtmlNodeKind::Element(LocalName::from("div"), Namespace::Html),
2876            properties: HtmlProps::default(),
2877            text: None,
2878        });
2879        let div_b = arena.new_node(NodeData {
2880            hash: NodeHash(0),
2881            kind: HtmlNodeKind::Element(LocalName::from("div"), Namespace::Html),
2882            properties: HtmlProps::default(),
2883            text: None,
2884        });
2885        let div_c = arena.new_node(NodeData {
2886            hash: NodeHash(0),
2887            kind: HtmlNodeKind::Element(LocalName::from("div"), Namespace::Html),
2888            properties: HtmlProps::default(),
2889            text: None,
2890        });
2891
2892        body.append(div_a, &mut arena);
2893        div_a.append(div_b, &mut arena);
2894        div_b.append(div_c, &mut arena);
2895
2896        // Structure: body -> div_a -> div_b -> div_c
2897        let mut shadow = ShadowTree::new(arena, body);
2898
2899        shadow.debug_print_tree("Initial");
2900
2901        // Now try to move div_a to position 0 under div_b (moving parent under child)
2902        // div_c is at position 0, so this triggers insert_before
2903        // This should NOT create a cycle
2904        shadow.move_to_position(div_a, div_b, 0);
2905
2906        shadow.debug_print_tree("After move");
2907
2908        // Verify no cycle by computing path - this would hang/OOM if there's a cycle
2909        let path = shadow.compute_path(div_a);
2910        debug!(?path, "Path to div_a after move");
2911
2912        // div_a should now be a child of div_b
2913        assert!(
2914            div_b.children(&shadow.arena).any(|c| c == div_a),
2915            "div_a should be a child of div_b after move"
2916        );
2917    }
2918
2919    #[test]
2920    fn test_opaque_unchanged_no_patches() {
2921        // When an opaque node's content is identical, no patches should be emitted
2922        let old_html =
2923            t(r#"<html><body><div data-hotmeal-opaque><p>hello</p></div></body></html>"#);
2924        let new_html =
2925            t(r#"<html><body><div data-hotmeal-opaque><p>hello</p></div></body></html>"#);
2926
2927        let old = dom::parse(&old_html);
2928        let new = dom::parse(&new_html);
2929        let patches = diff(&old, &new).unwrap();
2930
2931        assert!(
2932            patches.is_empty(),
2933            "Identical opaque nodes should produce no patches, got: {:?}",
2934            patches
2935        );
2936    }
2937
2938    #[test]
2939    fn test_opaque_changed_emits_opaque_changed() {
2940        // When an opaque node's content changes, we should get an OpaqueChanged patch
2941        let old_html = t(r#"<html><body><div data-hotmeal-opaque><p>old</p></div></body></html>"#);
2942        let new_html = t(r#"<html><body><div data-hotmeal-opaque><p>new</p></div></body></html>"#);
2943
2944        let old = dom::parse(&old_html);
2945        let new = dom::parse(&new_html);
2946        let patches = diff(&old, &new).unwrap();
2947
2948        let opaque_patches: Vec<_> = patches
2949            .iter()
2950            .filter(|p| matches!(p, Patch::OpaqueChanged { .. }))
2951            .collect();
2952
2953        assert_eq!(
2954            opaque_patches.len(),
2955            1,
2956            "Should have exactly one OpaqueChanged patch, got: {:?}",
2957            patches
2958        );
2959
2960        // Should NOT have any Insert/Delete/Move/SetText patches for the opaque children
2961        let structural_patches: Vec<_> = patches
2962            .iter()
2963            .filter(|p| {
2964                matches!(
2965                    p,
2966                    Patch::InsertElement { .. }
2967                        | Patch::InsertText { .. }
2968                        | Patch::Remove { .. }
2969                        | Patch::Move { .. }
2970                        | Patch::SetText { .. }
2971                )
2972            })
2973            .collect();
2974
2975        assert!(
2976            structural_patches.is_empty(),
2977            "Should not have structural patches for opaque children, got: {:?}",
2978            structural_patches
2979        );
2980
2981        // Verify the content contains the new inner HTML
2982        if let Patch::OpaqueChanged { content, .. } = &opaque_patches[0] {
2983            assert!(
2984                content.as_ref().contains("<p>new</p>"),
2985                "OpaqueChanged content should contain new inner HTML, got: {:?}",
2986                content.as_ref()
2987            );
2988        }
2989    }
2990
2991    #[test]
2992    fn test_opaque_with_normal_siblings() {
2993        // Normal siblings should still diff normally alongside opaque nodes
2994        let old_html = t(
2995            r#"<html><body><div data-hotmeal-opaque><p>old</p></div><span>before</span></body></html>"#,
2996        );
2997        let new_html = t(
2998            r#"<html><body><div data-hotmeal-opaque><p>new</p></div><span>after</span></body></html>"#,
2999        );
3000
3001        let old = dom::parse(&old_html);
3002        let new = dom::parse(&new_html);
3003        let patches = diff(&old, &new).unwrap();
3004
3005        // Should have an OpaqueChanged for the div
3006        let opaque_count = patches
3007            .iter()
3008            .filter(|p| matches!(p, Patch::OpaqueChanged { .. }))
3009            .count();
3010        assert_eq!(
3011            opaque_count, 1,
3012            "Should have OpaqueChanged for opaque div, got: {:?}",
3013            patches
3014        );
3015
3016        // Should have a SetText for the span's text change
3017        let text_count = patches
3018            .iter()
3019            .filter(|p| matches!(p, Patch::SetText { .. }))
3020            .count();
3021        assert_eq!(
3022            text_count, 1,
3023            "Should have SetText for sibling span text change, got: {:?}",
3024            patches
3025        );
3026    }
3027
3028    #[test]
3029    fn test_opaque_roundtrip_rust_applier() {
3030        // Verify that OpaqueChanged can be applied via the Rust applier
3031        let old_html = t(r#"<html><body><div data-hotmeal-opaque><p>old</p></div></body></html>"#);
3032        let new_html = t(r#"<html><body><div data-hotmeal-opaque><p>new</p></div></body></html>"#);
3033
3034        let old = dom::parse(&old_html);
3035        let new = dom::parse(&new_html);
3036        let patches = diff(&old, &new).unwrap();
3037
3038        let mut doc = dom::parse(&old_html);
3039        doc.apply_patches(patches).expect("apply should succeed");
3040
3041        // After applying, the opaque div should have new content
3042        let result = doc.to_html();
3043        let expected = dom::parse(&new_html).to_html();
3044        assert_eq!(
3045            result, expected,
3046            "HTML output should match after OpaqueChanged"
3047        );
3048    }
3049
3050    #[test]
3051    fn test_opaque_deeply_nested_content() {
3052        // Opaque node with deeply nested content changing
3053        let old_html = t(
3054            r#"<html><body><div data-hotmeal-opaque><div class="wrapper"><ul><li>item 1</li><li>item 2</li></ul></div></div></body></html>"#,
3055        );
3056        let new_html = t(
3057            r#"<html><body><div data-hotmeal-opaque><div class="wrapper"><ul><li>item A</li><li>item B</li><li>item C</li></ul></div></div></body></html>"#,
3058        );
3059
3060        let old = dom::parse(&old_html);
3061        let new = dom::parse(&new_html);
3062        let patches = diff(&old, &new).unwrap();
3063
3064        let opaque_count = patches
3065            .iter()
3066            .filter(|p| matches!(p, Patch::OpaqueChanged { .. }))
3067            .count();
3068        assert_eq!(opaque_count, 1, "Should emit one OpaqueChanged");
3069
3070        // No structural patches for deeply nested children
3071        let structural: Vec<_> = patches
3072            .iter()
3073            .filter(|p| {
3074                matches!(
3075                    p,
3076                    Patch::InsertElement { .. }
3077                        | Patch::InsertText { .. }
3078                        | Patch::Remove { .. }
3079                        | Patch::Move { .. }
3080                        | Patch::SetText { .. }
3081                )
3082            })
3083            .collect();
3084        assert!(
3085            structural.is_empty(),
3086            "Deeply nested opaque children should not produce structural patches, got: {:?}",
3087            structural
3088        );
3089    }
3090
3091    #[test]
3092    fn test_opaque_multiple_siblings() {
3093        // Multiple opaque nodes as siblings, each changing independently
3094        let old_html = t(
3095            r#"<html><body><div data-hotmeal-opaque><p>chart 1</p></div><div data-hotmeal-opaque><p>chart 2</p></div></body></html>"#,
3096        );
3097        let new_html = t(
3098            r#"<html><body><div data-hotmeal-opaque><p>updated 1</p></div><div data-hotmeal-opaque><p>updated 2</p></div></body></html>"#,
3099        );
3100
3101        let old = dom::parse(&old_html);
3102        let new = dom::parse(&new_html);
3103        let patches = diff(&old, &new).unwrap();
3104
3105        let opaque_patches: Vec<_> = patches
3106            .iter()
3107            .filter(|p| matches!(p, Patch::OpaqueChanged { .. }))
3108            .collect();
3109        assert_eq!(
3110            opaque_patches.len(),
3111            2,
3112            "Should emit OpaqueChanged for each changed opaque sibling, got: {:?}",
3113            patches
3114        );
3115    }
3116
3117    #[test]
3118    fn test_opaque_structural_replacement() {
3119        // Content changes from one element type to a completely different one
3120        // (like mermaid: <pre> -> <svg>)
3121        let old_html = t(
3122            r#"<html><body><div data-hotmeal-opaque><pre class="mermaid">graph TD; A-->B;</pre></div></body></html>"#,
3123        );
3124        let new_html = t(
3125            r#"<html><body><div data-hotmeal-opaque><svg><rect></rect></svg></div></body></html>"#,
3126        );
3127
3128        let old = dom::parse(&old_html);
3129        let new = dom::parse(&new_html);
3130        let patches = diff(&old, &new).unwrap();
3131
3132        let opaque_count = patches
3133            .iter()
3134            .filter(|p| matches!(p, Patch::OpaqueChanged { .. }))
3135            .count();
3136        assert_eq!(
3137            opaque_count, 1,
3138            "Should emit OpaqueChanged even with radically different content"
3139        );
3140
3141        // Verify the new content is carried in the patch
3142        if let Some(Patch::OpaqueChanged { content, .. }) = patches
3143            .iter()
3144            .find(|p| matches!(p, Patch::OpaqueChanged { .. }))
3145        {
3146            assert!(
3147                content.as_ref().contains("<svg>"),
3148                "OpaqueChanged content should contain new SVG, got: {:?}",
3149                content.as_ref()
3150            );
3151        }
3152    }
3153
3154    #[test]
3155    fn test_opaque_only_one_changed() {
3156        // Two opaque siblings, only one changes
3157        let old_html = t(
3158            r#"<html><body><div data-hotmeal-opaque><p>same</p></div><div data-hotmeal-opaque><p>old</p></div></body></html>"#,
3159        );
3160        let new_html = t(
3161            r#"<html><body><div data-hotmeal-opaque><p>same</p></div><div data-hotmeal-opaque><p>new</p></div></body></html>"#,
3162        );
3163
3164        let old = dom::parse(&old_html);
3165        let new = dom::parse(&new_html);
3166        let patches = diff(&old, &new).unwrap();
3167
3168        let opaque_patches: Vec<_> = patches
3169            .iter()
3170            .filter(|p| matches!(p, Patch::OpaqueChanged { .. }))
3171            .collect();
3172        assert_eq!(
3173            opaque_patches.len(),
3174            1,
3175            "Only the changed opaque node should emit OpaqueChanged, got: {:?}",
3176            patches
3177        );
3178    }
3179
3180    #[test]
3181    fn test_opaque_roundtrip_deeply_nested() {
3182        // Full roundtrip test with deep nested content change
3183        let old_html = t(
3184            r#"<html><body><div data-hotmeal-opaque><div><span>old text</span></div></div></body></html>"#,
3185        );
3186        let new_html = t(
3187            r#"<html><body><div data-hotmeal-opaque><div><span>new text</span><span>extra</span></div></div></body></html>"#,
3188        );
3189
3190        let old = dom::parse(&old_html);
3191        let new = dom::parse(&new_html);
3192        let patches = diff(&old, &new).unwrap();
3193
3194        let mut doc = dom::parse(&old_html);
3195        doc.apply_patches(patches).expect("apply should succeed");
3196
3197        let result = doc.to_html();
3198        let expected = dom::parse(&new_html).to_html();
3199        assert_eq!(
3200            result, expected,
3201            "Deeply nested opaque roundtrip should match"
3202        );
3203    }
3204
3205    #[test]
3206    fn test_opaque_roundtrip_with_sibling_changes() {
3207        // Full roundtrip: both opaque and normal siblings change
3208        let old_html = t(
3209            r#"<html><body><div data-hotmeal-opaque><p>old chart</p></div><p>before text</p></body></html>"#,
3210        );
3211        let new_html = t(
3212            r#"<html><body><div data-hotmeal-opaque><p>new chart</p></div><p>after text</p></body></html>"#,
3213        );
3214
3215        let old = dom::parse(&old_html);
3216        let new = dom::parse(&new_html);
3217        let patches = diff(&old, &new).unwrap();
3218
3219        let mut doc = dom::parse(&old_html);
3220        doc.apply_patches(patches).expect("apply should succeed");
3221
3222        let result = doc.to_html();
3223        let expected = dom::parse(&new_html).to_html();
3224        assert_eq!(
3225            result, expected,
3226            "Roundtrip with sibling changes should match"
3227        );
3228    }
3229}