Skip to main content

xml_3dm/diff/
patch.rs

1//! Patch application algorithm.
2//!
3//! Applies a diff document to a base tree to reconstruct the modified version.
4
5use std::collections::HashMap;
6use std::io::Write;
7
8use crate::error::{Error, Result};
9use crate::node::{new_branch_node, NodeInner, NodeRef, XmlContent, XmlElement};
10
11use super::bfs_index::BfsIndex;
12use super::{
13    DIFF_COPY_TAG, DIFF_CPYDST_ATTR, DIFF_CPYRUN_ATTR, DIFF_CPYSRC_ATTR, DIFF_ESC_TAG,
14    DIFF_INSERT_TAG, DIFF_ROOTOP_ATTR, DIFF_ROOTOP_INS, DIFF_ROOT_TAG,
15};
16
17/// Patch application for applying diffs to base trees.
18pub struct Patch {
19    _placeholder: (),
20}
21
22impl Default for Patch {
23    fn default() -> Self {
24        Self::new()
25    }
26}
27
28impl Patch {
29    /// Creates a new patch applicator.
30    pub fn new() -> Self {
31        Patch { _placeholder: () }
32    }
33
34    /// Applies a diff to a base tree and writes the result.
35    ///
36    /// # Arguments
37    /// * `base` - The base tree to patch
38    /// * `diff` - The diff tree (parsed from diff XML)
39    /// * `writer` - Output writer for the patched tree
40    pub fn patch<W: Write>(&self, base: &NodeRef, diff: &NodeRef, writer: &mut W) -> Result<()> {
41        let index = BfsIndex::new(base);
42        let patched = self.apply_patch(diff, &index)?;
43
44        // Write output
45        writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?;
46
47        // Get first child (skip the dummy root)
48        let borrowed = patched.borrow();
49        if let Some(child) = borrowed.children().first() {
50            self.dump_tree(child, writer, 0)?;
51        }
52
53        Ok(())
54    }
55
56    /// Applies the patch and returns the patched tree.
57    fn apply_patch(&self, diff: &NodeRef, index: &BfsIndex) -> Result<NodeRef> {
58        // Create a dummy root to hold the result
59        let patch_root = new_branch_node(Some(XmlContent::Element(XmlElement::new(
60            "$DUMMY$".to_string(),
61            HashMap::new(),
62        ))));
63
64        // Get the diff element (skip $ROOT$ fake root if present)
65        let diff_elem = {
66            let borrowed = diff.borrow();
67            if let Some(first_child) = borrowed.children().first() {
68                first_child.clone()
69            } else {
70                return Err(Error::Parse("Empty diff document".to_string()));
71            }
72        };
73
74        // Verify it's a diff root tag
75        let is_diff_root = {
76            let borrowed = diff_elem.borrow();
77            match borrowed.content() {
78                Some(XmlContent::Element(e)) => e.qname() == DIFF_ROOT_TAG,
79                _ => false,
80            }
81        };
82
83        if !is_diff_root {
84            return Err(Error::Parse("Invalid root tag for diff".to_string()));
85        }
86
87        // Check for root operation attribute
88        let root_op = {
89            let borrowed = diff_elem.borrow();
90            if let Some(XmlContent::Element(e)) = borrowed.content() {
91                e.attributes().get(DIFF_ROOTOP_ATTR).cloned()
92            } else {
93                None
94            }
95        };
96
97        match root_op.as_deref() {
98            None | Some("") => {
99                // Default op is copy
100                let root_id = index.root_id();
101                self.apply_copy(&patch_root, &diff_elem, root_id, index)?;
102            }
103            Some(op) if op == DIFF_ROOTOP_INS => {
104                // Insert operation
105                self.apply_insert(&patch_root, &diff_elem, index)?;
106            }
107            Some(op) => {
108                return Err(Error::Parse(format!("Invalid rootop for diff: {}", op)));
109            }
110        }
111
112        // Return the first child of the patch root
113        let borrowed = patch_root.borrow();
114        borrowed
115            .children()
116            .first()
117            .cloned()
118            .ok_or_else(|| Error::Parse("Patch produced no output".to_string()))
119    }
120
121    /// Applies an insert operation.
122    fn apply_insert(&self, patch: &NodeRef, diff: &NodeRef, index: &BfsIndex) -> Result<()> {
123        let (content, is_command, qname) = {
124            let diff_borrowed = diff.borrow();
125            let content = diff_borrowed.content().cloned();
126
127            // Check if this is a command or just content to insert
128            let (is_command, qname) = match &content {
129                Some(XmlContent::Element(e)) => {
130                    let qn = e.qname().to_string();
131                    let is_cmd = qn == DIFF_COPY_TAG || qn == DIFF_INSERT_TAG || qn == DIFF_ESC_TAG;
132                    (is_cmd, Some(qn))
133                }
134                _ => (false, None),
135            };
136
137            (content, is_command, qname)
138        };
139
140        if !is_command {
141            // Simple insert - create node and recurse for children
142            let node = new_branch_node(content);
143
144            let children: Vec<_> = diff.borrow().children().to_vec();
145            for child in children {
146                self.apply_insert(&node, &child, index)?;
147            }
148
149            NodeInner::add_child_to_ref(patch, node);
150        } else if let Some(qn) = qname {
151            if qn == DIFF_ESC_TAG || qn == DIFF_INSERT_TAG {
152                // Process children
153                let children: Vec<_> = diff.borrow().children().to_vec();
154                for child in children {
155                    self.apply_insert(patch, &child, index)?;
156                }
157            } else if qn == DIFF_COPY_TAG {
158                // Copy operation
159                let src_str = {
160                    let diff_borrowed = diff.borrow();
161                    if let Some(XmlContent::Element(e)) = diff_borrowed.content() {
162                        e.attributes().get(DIFF_CPYSRC_ATTR).cloned()
163                    } else {
164                        None
165                    }
166                };
167
168                let src_str = src_str.ok_or_else(|| {
169                    Error::Parse("Missing src attribute in copy command".to_string())
170                })?;
171
172                let src_id = src_str.parse().unwrap_or(0);
173                self.apply_copy(patch, diff, src_id, index)?;
174            }
175        }
176
177        Ok(())
178    }
179
180    /// Applies a copy operation.
181    fn apply_copy(
182        &self,
183        patch: &NodeRef,
184        diff: &NodeRef,
185        src_id: u64,
186        index: &BfsIndex,
187    ) -> Result<()> {
188        // Gather stop nodes and their destinations
189        let mut dst_nodes: Vec<(u64, NodeRef)> = Vec::new();
190        let mut stop_nodes: HashMap<u64, Option<NodeRef>> = HashMap::new();
191
192        // Get run count
193        let run = {
194            let diff_borrowed = diff.borrow();
195            match diff_borrowed.content() {
196                Some(XmlContent::Element(e)) => e
197                    .attributes()
198                    .get(DIFF_CPYRUN_ATTR)
199                    .and_then(|v| v.parse::<u64>().ok())
200                    .unwrap_or(1),
201                _ => 1,
202            }
203        };
204
205        // Process diff children to find stop nodes
206        {
207            let diff_borrowed = diff.borrow();
208            for child in diff_borrowed.children() {
209                let child_borrowed = child.borrow();
210                let child_content = child_borrowed.content();
211
212                // Verify it's a copy or insert command
213                let is_valid_cmd = match child_content {
214                    Some(XmlContent::Element(e)) => {
215                        let qname = e.qname();
216                        qname == DIFF_COPY_TAG || qname == DIFF_INSERT_TAG
217                    }
218                    _ => false,
219                };
220
221                if !is_valid_cmd {
222                    return Err(Error::Parse(
223                        "Only copy or insert commands may appear below a copy command".to_string(),
224                    ));
225                }
226
227                // Get destination node
228                let dst_str = match child_content {
229                    Some(XmlContent::Element(e)) => e.attributes().get(DIFF_CPYDST_ATTR).cloned(),
230                    _ => None,
231                };
232
233                if let Some(dst_str) = dst_str {
234                    let stop_node = index.lookup_str(&dst_str).ok_or_else(|| {
235                        Error::Parse(format!("Invalid dst in command: {}", dst_str))
236                    })?;
237
238                    let stop_id = stop_node.borrow().id();
239                    dst_nodes.push((stop_id, child.clone()));
240                    stop_nodes.entry(stop_id).or_insert(None);
241                }
242            }
243        }
244
245        // Get source root
246        let src_root = index
247            .lookup(src_id)
248            .ok_or_else(|| Error::Parse(format!("Invalid src in copy command: {}", src_id)))?;
249
250        // Perform copy with run count
251        let mut current_src = src_root;
252        let mut current_id = src_id;
253
254        for _i_run in 1..run {
255            self.dfs_copy(patch, &current_src, &mut HashMap::new())?;
256            current_id += 1;
257            if let Some(next) = index.lookup(current_id) {
258                current_src = next;
259            }
260        }
261
262        // Final copy with stop nodes
263        self.dfs_copy(patch, &current_src, &mut stop_nodes)?;
264
265        // Recurse for each diff child
266        for (stop_id, diff_child) in &dst_nodes {
267            if let Some(Some(created_node)) = stop_nodes.get(stop_id) {
268                self.apply_insert(created_node, diff_child, index)?;
269            }
270        }
271
272        Ok(())
273    }
274
275    /// Performs a DFS copy of the source tree.
276    #[allow(clippy::only_used_in_recursion)]
277    fn dfs_copy(
278        &self,
279        dst: &NodeRef,
280        src: &NodeRef,
281        stop_nodes: &mut HashMap<u64, Option<NodeRef>>,
282    ) -> Result<()> {
283        let (content, src_id, children) = {
284            let src_borrowed = src.borrow();
285            (
286                src_borrowed.content().cloned(),
287                src_borrowed.id(),
288                src_borrowed.children().to_vec(),
289            )
290        };
291
292        let copied = new_branch_node(content);
293        NodeInner::add_child_to_ref(dst, copied.clone());
294
295        if let std::collections::hash_map::Entry::Occupied(mut e) = stop_nodes.entry(src_id) {
296            // Mark this as the stop point and record the created node
297            e.insert(Some(copied));
298            return Ok(());
299        }
300
301        // Copy children
302        for child in children {
303            self.dfs_copy(&copied, &child, stop_nodes)?;
304        }
305
306        Ok(())
307    }
308
309    /// Writes the tree to the output.
310    #[allow(clippy::only_used_in_recursion)]
311    fn dump_tree<W: Write>(
312        &self,
313        node: &NodeRef,
314        writer: &mut W,
315        indent: usize,
316    ) -> std::io::Result<()> {
317        let borrowed = node.borrow();
318
319        match borrowed.content() {
320            Some(XmlContent::Text(text)) => {
321                // Convert char slice to String
322                let text_str: String = text.text().iter().collect();
323                write!(
324                    writer,
325                    "{}{}",
326                    Self::indent_str_for(indent),
327                    escape_xml(&text_str)
328                )?;
329            }
330            Some(XmlContent::Comment(comment)) => {
331                // Output comment
332                let comment_text: String = comment.text().iter().collect();
333                writeln!(
334                    writer,
335                    "{}<!-- {} -->",
336                    Self::indent_str_for(indent),
337                    comment_text
338                )?;
339            }
340            Some(XmlContent::ProcessingInstruction(pi)) => {
341                // Output processing instruction
342                if pi.content().is_empty() {
343                    writeln!(
344                        writer,
345                        "{}<?{}?>",
346                        Self::indent_str_for(indent),
347                        pi.target()
348                    )?;
349                } else {
350                    writeln!(
351                        writer,
352                        "{}<?{} {}?>",
353                        Self::indent_str_for(indent),
354                        pi.target(),
355                        pi.content()
356                    )?;
357                }
358            }
359            Some(XmlContent::Element(elem)) => {
360                write!(writer, "{}<{}", Self::indent_str_for(indent), elem.qname())?;
361                // Namespace declarations (sorted for deterministic output)
362                let mut ns_prefixes: Vec<_> = elem.namespace_decls().keys().collect();
363                ns_prefixes.sort();
364                for prefix in ns_prefixes {
365                    if let Some(uri) = elem.namespace_decls().get(prefix) {
366                        if prefix.is_empty() {
367                            write!(writer, " xmlns=\"{}\"", escape_xml_attr(uri))?;
368                        } else {
369                            write!(writer, " xmlns:{}=\"{}\"", prefix, escape_xml_attr(uri))?;
370                        }
371                    }
372                }
373                // Sort attributes for consistent output
374                let mut attr_names: Vec<_> = elem.attributes().keys().collect();
375                attr_names.sort();
376                for name in attr_names {
377                    if let Some(value) = elem.attributes().get(name) {
378                        write!(writer, " {}=\"{}\"", name, escape_xml_attr(value))?;
379                    }
380                }
381                writeln!(writer, ">")?;
382
383                for child in borrowed.children() {
384                    self.dump_tree(child, writer, indent + 1)?;
385                }
386
387                writeln!(
388                    writer,
389                    "{}</{}>",
390                    Self::indent_str_for(indent),
391                    elem.qname()
392                )?;
393            }
394            None => {}
395        }
396
397        Ok(())
398    }
399
400    /// Returns the indentation string for a given level.
401    fn indent_str_for(level: usize) -> String {
402        "  ".repeat(level)
403    }
404}
405
406/// Escapes special XML characters in text content.
407fn escape_xml(s: &str) -> String {
408    s.replace('&', "&amp;")
409        .replace('<', "&lt;")
410        .replace('>', "&gt;")
411}
412
413/// Escapes special XML characters in attribute values.
414fn escape_xml_attr(s: &str) -> String {
415    s.replace('&', "&amp;")
416        .replace('<', "&lt;")
417        .replace('>', "&gt;")
418        .replace('"', "&quot;")
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424    use crate::node::{new_base_node, XmlText};
425
426    fn create_base_tree() -> NodeRef {
427        let root = new_base_node(Some(XmlContent::Element(XmlElement::new(
428            "root".to_string(),
429            HashMap::new(),
430        ))));
431        let child1 = new_base_node(Some(XmlContent::Element(XmlElement::new(
432            "child1".to_string(),
433            HashMap::new(),
434        ))));
435        let child2 = new_base_node(Some(XmlContent::Element(XmlElement::new(
436            "child2".to_string(),
437            HashMap::new(),
438        ))));
439        let text = new_base_node(Some(XmlContent::Text(XmlText::new("hello"))));
440
441        NodeInner::add_child_to_ref(&child1, text);
442        NodeInner::add_child_to_ref(&root, child1);
443        NodeInner::add_child_to_ref(&root, child2);
444
445        root
446    }
447
448    fn create_simple_diff() -> NodeRef {
449        // Create a diff that just copies the root
450        // <$ROOT$><diff><copy src="0" /></diff></$ROOT$>
451        let fake_root = new_branch_node(Some(XmlContent::Element(XmlElement::new(
452            "$ROOT$".to_string(),
453            HashMap::new(),
454        ))));
455        let diff_elem = new_branch_node(Some(XmlContent::Element(XmlElement::new(
456            DIFF_ROOT_TAG.to_string(),
457            HashMap::new(),
458        ))));
459
460        NodeInner::add_child_to_ref(&fake_root, diff_elem);
461
462        fake_root
463    }
464
465    #[test]
466    fn test_patch_new() {
467        let _patch = Patch::new();
468        // Just verify construction works
469    }
470
471    #[test]
472    fn test_escape_functions() {
473        assert_eq!(escape_xml("<test>"), "&lt;test&gt;");
474        assert_eq!(escape_xml_attr("\"test\""), "&quot;test&quot;");
475    }
476
477    #[test]
478    fn test_simple_patch() {
479        let base = create_base_tree();
480        let diff = create_simple_diff();
481        let patch = Patch::new();
482
483        let mut output = Vec::new();
484        let result = patch.patch(&base, &diff, &mut output);
485
486        // This might fail due to diff structure, but shouldn't panic
487        // The actual integration test would use properly structured diffs
488        assert!(result.is_ok() || result.is_err());
489    }
490}