Skip to main content

xml_3dm/diff/
patch.rs

1//! Patch application algorithm.
2//!
3//! Applies a diff document to a base tree to reconstruct the modified version.
4
5use std::collections::HashMap;
6use std::io::Write;
7
8use crate::error::{Error, Result};
9use crate::node::{new_branch_node, NodeInner, NodeRef, XmlContent, XmlElement};
10
11use super::bfs_index::BfsIndex;
12use super::{
13    DIFF_COPY_TAG, DIFF_CPYDST_ATTR, DIFF_CPYRUN_ATTR, DIFF_CPYSRC_ATTR, DIFF_ESC_TAG,
14    DIFF_INSERT_TAG, DIFF_ROOTOP_ATTR, DIFF_ROOTOP_INS, DIFF_ROOT_TAG,
15};
16
17/// Patch application for applying diffs to base trees.
18pub struct Patch {
19    _placeholder: (),
20}
21
22impl Default for Patch {
23    fn default() -> Self {
24        Self::new()
25    }
26}
27
28impl Patch {
29    /// Creates a new patch applicator.
30    pub fn new() -> Self {
31        Patch { _placeholder: () }
32    }
33
34    /// Applies a diff to a base tree and writes the result.
35    ///
36    /// # Arguments
37    /// * `base` - The base tree to patch
38    /// * `diff` - The diff tree (parsed from diff XML)
39    /// * `writer` - Output writer for the patched tree
40    pub fn patch<W: Write>(&self, base: &NodeRef, diff: &NodeRef, writer: &mut W) -> Result<()> {
41        let index = BfsIndex::new(base);
42        let patched = self.apply_patch(diff, &index)?;
43
44        // Write output
45        writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?;
46
47        // Get first child (skip the dummy root)
48        let borrowed = patched.borrow();
49        if let Some(child) = borrowed.children().first() {
50            self.dump_tree(child, writer, 0)?;
51        }
52
53        Ok(())
54    }
55
56    /// Applies the patch and returns the patched tree.
57    fn apply_patch(&self, diff: &NodeRef, index: &BfsIndex) -> Result<NodeRef> {
58        // Create a dummy root to hold the result
59        let patch_root = new_branch_node(Some(XmlContent::Element(XmlElement::new(
60            "$DUMMY$".to_string(),
61            HashMap::new(),
62        ))));
63
64        // Get the diff element (skip $ROOT$ fake root if present)
65        let diff_elem = {
66            let borrowed = diff.borrow();
67            if let Some(first_child) = borrowed.children().first() {
68                first_child.clone()
69            } else {
70                return Err(Error::Parse("Empty diff document".to_string()));
71            }
72        };
73
74        // Verify it's a diff root tag
75        let is_diff_root = {
76            let borrowed = diff_elem.borrow();
77            match borrowed.content() {
78                Some(XmlContent::Element(e)) => e.qname() == DIFF_ROOT_TAG,
79                _ => false,
80            }
81        };
82
83        if !is_diff_root {
84            return Err(Error::Parse("Invalid root tag for diff".to_string()));
85        }
86
87        // Check for root operation attribute
88        let root_op = {
89            let borrowed = diff_elem.borrow();
90            if let Some(XmlContent::Element(e)) = borrowed.content() {
91                e.attributes().get(DIFF_ROOTOP_ATTR).cloned()
92            } else {
93                None
94            }
95        };
96
97        match root_op.as_deref() {
98            None | Some("") => {
99                // Default op is copy
100                let root_id = index.root_id();
101                self.apply_copy(&patch_root, &diff_elem, root_id, index)?;
102            }
103            Some(op) if op == DIFF_ROOTOP_INS => {
104                // Insert operation
105                self.apply_insert(&patch_root, &diff_elem, index)?;
106            }
107            Some(op) => {
108                return Err(Error::Parse(format!("Invalid rootop for diff: {}", op)));
109            }
110        }
111
112        // Return the first child of the patch root
113        let borrowed = patch_root.borrow();
114        borrowed
115            .children()
116            .first()
117            .cloned()
118            .ok_or_else(|| Error::Parse("Patch produced no output".to_string()))
119    }
120
121    /// Applies an insert operation.
122    fn apply_insert(&self, patch: &NodeRef, diff: &NodeRef, index: &BfsIndex) -> Result<()> {
123        let (content, is_command, qname) = {
124            let diff_borrowed = diff.borrow();
125            let content = diff_borrowed.content().cloned();
126
127            // Check if this is a command or just content to insert
128            let (is_command, qname) = match &content {
129                Some(XmlContent::Element(e)) => {
130                    let qn = e.qname().to_string();
131                    let is_cmd = qn == DIFF_COPY_TAG || qn == DIFF_INSERT_TAG || qn == DIFF_ESC_TAG;
132                    (is_cmd, Some(qn))
133                }
134                _ => (false, None),
135            };
136
137            (content, is_command, qname)
138        };
139
140        if !is_command {
141            // Simple insert - create node and recurse for children
142            let node = new_branch_node(content);
143
144            let children: Vec<_> = diff.borrow().children().to_vec();
145            for child in children {
146                self.apply_insert(&node, &child, index)?;
147            }
148
149            NodeInner::add_child_to_ref(patch, node);
150        } else if let Some(qn) = qname {
151            if qn == DIFF_ESC_TAG || qn == DIFF_INSERT_TAG {
152                // Process children
153                let children: Vec<_> = diff.borrow().children().to_vec();
154                for child in children {
155                    self.apply_insert(patch, &child, index)?;
156                }
157            } else if qn == DIFF_COPY_TAG {
158                // Copy operation
159                let src_str = {
160                    let diff_borrowed = diff.borrow();
161                    if let Some(XmlContent::Element(e)) = diff_borrowed.content() {
162                        e.attributes().get(DIFF_CPYSRC_ATTR).cloned()
163                    } else {
164                        None
165                    }
166                };
167
168                let src_str = src_str.ok_or_else(|| {
169                    Error::Parse("Missing src attribute in copy command".to_string())
170                })?;
171
172                let src_id = src_str.parse().unwrap_or(0);
173                self.apply_copy(patch, diff, src_id, index)?;
174            }
175        }
176
177        Ok(())
178    }
179
180    /// Applies a copy operation.
181    fn apply_copy(
182        &self,
183        patch: &NodeRef,
184        diff: &NodeRef,
185        src_id: u64,
186        index: &BfsIndex,
187    ) -> Result<()> {
188        // Gather stop nodes and their destinations
189        let mut dst_nodes: Vec<(u64, NodeRef)> = Vec::new();
190        let mut stop_nodes: HashMap<u64, Option<NodeRef>> = HashMap::new();
191
192        // Get run count
193        let run = {
194            let diff_borrowed = diff.borrow();
195            match diff_borrowed.content() {
196                Some(XmlContent::Element(e)) => e
197                    .attributes()
198                    .get(DIFF_CPYRUN_ATTR)
199                    .and_then(|v| v.parse::<u64>().ok())
200                    .unwrap_or(1),
201                _ => 1,
202            }
203        };
204
205        // Process diff children to find stop nodes
206        {
207            let diff_borrowed = diff.borrow();
208            for child in diff_borrowed.children() {
209                let child_borrowed = child.borrow();
210                let child_content = child_borrowed.content();
211
212                // Verify it's a copy or insert command
213                let is_valid_cmd = match child_content {
214                    Some(XmlContent::Element(e)) => {
215                        let qname = e.qname();
216                        qname == DIFF_COPY_TAG || qname == DIFF_INSERT_TAG
217                    }
218                    _ => false,
219                };
220
221                if !is_valid_cmd {
222                    return Err(Error::Parse(
223                        "Only copy or insert commands may appear below a copy command".to_string(),
224                    ));
225                }
226
227                // Get destination node
228                let dst_str = match child_content {
229                    Some(XmlContent::Element(e)) => e.attributes().get(DIFF_CPYDST_ATTR).cloned(),
230                    _ => None,
231                };
232
233                if let Some(dst_str) = dst_str {
234                    let stop_node = index.lookup_str(&dst_str).ok_or_else(|| {
235                        Error::Parse(format!("Invalid dst in command: {}", dst_str))
236                    })?;
237
238                    let stop_id = stop_node.borrow().id();
239                    dst_nodes.push((stop_id, child.clone()));
240                    stop_nodes.entry(stop_id).or_insert(None);
241                }
242            }
243        }
244
245        // Get source root
246        let src_root = index
247            .lookup(src_id)
248            .ok_or_else(|| Error::Parse(format!("Invalid src in copy command: {}", src_id)))?;
249
250        // Perform copy with run count
251        let mut current_src = src_root;
252        let mut current_id = src_id;
253
254        for _i_run in 1..run {
255            self.dfs_copy(patch, &current_src, &mut HashMap::new())?;
256            current_id += 1;
257            if let Some(next) = index.lookup(current_id) {
258                current_src = next;
259            }
260        }
261
262        // Final copy with stop nodes
263        self.dfs_copy(patch, &current_src, &mut stop_nodes)?;
264
265        // Recurse for each diff child
266        for (stop_id, diff_child) in &dst_nodes {
267            if let Some(Some(created_node)) = stop_nodes.get(stop_id) {
268                self.apply_insert(created_node, diff_child, index)?;
269            }
270        }
271
272        Ok(())
273    }
274
275    /// Performs a DFS copy of the source tree.
276    #[allow(clippy::only_used_in_recursion)]
277    fn dfs_copy(
278        &self,
279        dst: &NodeRef,
280        src: &NodeRef,
281        stop_nodes: &mut HashMap<u64, Option<NodeRef>>,
282    ) -> Result<()> {
283        let (content, src_id, children) = {
284            let src_borrowed = src.borrow();
285            (
286                src_borrowed.content().cloned(),
287                src_borrowed.id(),
288                src_borrowed.children().to_vec(),
289            )
290        };
291
292        let copied = new_branch_node(content);
293        NodeInner::add_child_to_ref(dst, copied.clone());
294
295        if let std::collections::hash_map::Entry::Occupied(mut e) = stop_nodes.entry(src_id) {
296            // Mark this as the stop point and record the created node
297            e.insert(Some(copied));
298            return Ok(());
299        }
300
301        // Copy children
302        for child in children {
303            self.dfs_copy(&copied, &child, stop_nodes)?;
304        }
305
306        Ok(())
307    }
308
309    /// Writes the tree to the output.
310    #[allow(clippy::only_used_in_recursion)]
311    fn dump_tree<W: Write>(
312        &self,
313        node: &NodeRef,
314        writer: &mut W,
315        indent: usize,
316    ) -> std::io::Result<()> {
317        let borrowed = node.borrow();
318
319        match borrowed.content() {
320            Some(XmlContent::Text(text)) => {
321                // Convert char slice to String
322                let text_str: String = text.text().iter().collect();
323                write!(
324                    writer,
325                    "{}{}",
326                    Self::indent_str_for(indent),
327                    escape_xml(&text_str)
328                )?;
329            }
330            Some(XmlContent::Comment(comment)) => {
331                // Output comment
332                let comment_text: String = comment.text().iter().collect();
333                writeln!(
334                    writer,
335                    "{}<!-- {} -->",
336                    Self::indent_str_for(indent),
337                    comment_text
338                )?;
339            }
340            Some(XmlContent::Element(elem)) => {
341                write!(writer, "{}<{}", Self::indent_str_for(indent), elem.qname())?;
342                // Sort attributes for consistent output
343                let mut attr_names: Vec<_> = elem.attributes().keys().collect();
344                attr_names.sort();
345                for name in attr_names {
346                    if let Some(value) = elem.attributes().get(name) {
347                        write!(writer, " {}=\"{}\"", name, escape_xml_attr(value))?;
348                    }
349                }
350                writeln!(writer, ">")?;
351
352                for child in borrowed.children() {
353                    self.dump_tree(child, writer, indent + 1)?;
354                }
355
356                writeln!(
357                    writer,
358                    "{}</{}>",
359                    Self::indent_str_for(indent),
360                    elem.qname()
361                )?;
362            }
363            None => {}
364        }
365
366        Ok(())
367    }
368
369    /// Returns the indentation string for a given level.
370    fn indent_str_for(level: usize) -> String {
371        "  ".repeat(level)
372    }
373}
374
375/// Escapes special XML characters in text content.
376fn escape_xml(s: &str) -> String {
377    s.replace('&', "&amp;")
378        .replace('<', "&lt;")
379        .replace('>', "&gt;")
380}
381
382/// Escapes special XML characters in attribute values.
383fn escape_xml_attr(s: &str) -> String {
384    s.replace('&', "&amp;")
385        .replace('<', "&lt;")
386        .replace('>', "&gt;")
387        .replace('"', "&quot;")
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393    use crate::node::{new_base_node, XmlText};
394
395    fn create_base_tree() -> NodeRef {
396        let root = new_base_node(Some(XmlContent::Element(XmlElement::new(
397            "root".to_string(),
398            HashMap::new(),
399        ))));
400        let child1 = new_base_node(Some(XmlContent::Element(XmlElement::new(
401            "child1".to_string(),
402            HashMap::new(),
403        ))));
404        let child2 = new_base_node(Some(XmlContent::Element(XmlElement::new(
405            "child2".to_string(),
406            HashMap::new(),
407        ))));
408        let text = new_base_node(Some(XmlContent::Text(XmlText::new("hello"))));
409
410        NodeInner::add_child_to_ref(&child1, text);
411        NodeInner::add_child_to_ref(&root, child1);
412        NodeInner::add_child_to_ref(&root, child2);
413
414        root
415    }
416
417    fn create_simple_diff() -> NodeRef {
418        // Create a diff that just copies the root
419        // <$ROOT$><diff><copy src="0" /></diff></$ROOT$>
420        let fake_root = new_branch_node(Some(XmlContent::Element(XmlElement::new(
421            "$ROOT$".to_string(),
422            HashMap::new(),
423        ))));
424        let diff_elem = new_branch_node(Some(XmlContent::Element(XmlElement::new(
425            DIFF_ROOT_TAG.to_string(),
426            HashMap::new(),
427        ))));
428
429        NodeInner::add_child_to_ref(&fake_root, diff_elem);
430
431        fake_root
432    }
433
434    #[test]
435    fn test_patch_new() {
436        let _patch = Patch::new();
437        // Just verify construction works
438    }
439
440    #[test]
441    fn test_escape_functions() {
442        assert_eq!(escape_xml("<test>"), "&lt;test&gt;");
443        assert_eq!(escape_xml_attr("\"test\""), "&quot;test&quot;");
444    }
445
446    #[test]
447    fn test_simple_patch() {
448        let base = create_base_tree();
449        let diff = create_simple_diff();
450        let patch = Patch::new();
451
452        let mut output = Vec::new();
453        let result = patch.patch(&base, &diff, &mut output);
454
455        // This might fail due to diff structure, but shouldn't panic
456        // The actual integration test would use properly structured diffs
457        assert!(result.is_ok() || result.is_err());
458    }
459}