Skip to main content

xml_3dm/diff/
patch.rs

1//! Patch application algorithm.
2//!
3//! Applies a diff document to a base tree to reconstruct the modified version.
4
5use std::collections::HashMap;
6use std::io::Write;
7
8use crate::error::{Error, Result};
9use crate::node::{new_branch_node, NodeInner, NodeRef, XmlContent, XmlElement};
10
11use super::bfs_index::BfsIndex;
12use super::{
13    DIFF_COPY_TAG, DIFF_CPYDST_ATTR, DIFF_CPYRUN_ATTR, DIFF_CPYSRC_ATTR, DIFF_ESC_TAG,
14    DIFF_INSERT_TAG, DIFF_ROOTOP_ATTR, DIFF_ROOTOP_INS, DIFF_ROOT_TAG,
15};
16
17/// Patch application for applying diffs to base trees.
18pub struct Patch {
19    _placeholder: (),
20}
21
22impl Default for Patch {
23    fn default() -> Self {
24        Self::new()
25    }
26}
27
28impl Patch {
29    /// Creates a new patch applicator.
30    pub fn new() -> Self {
31        Patch { _placeholder: () }
32    }
33
34    /// Applies a diff to a base tree and writes the result.
35    ///
36    /// # Arguments
37    /// * `base` - The base tree to patch
38    /// * `diff` - The diff tree (parsed from diff XML)
39    /// * `writer` - Output writer for the patched tree
40    pub fn patch<W: Write>(&self, base: &NodeRef, diff: &NodeRef, writer: &mut W) -> Result<()> {
41        let index = BfsIndex::new(base);
42        let patched = self.apply_patch(diff, &index)?;
43
44        // Write output
45        writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?;
46
47        // Get first child (skip the dummy root)
48        let borrowed = patched.borrow();
49        if let Some(child) = borrowed.children().first() {
50            self.dump_tree(child, writer, 0)?;
51        }
52
53        Ok(())
54    }
55
56    /// Applies the patch and returns the patched tree.
57    fn apply_patch(&self, diff: &NodeRef, index: &BfsIndex) -> Result<NodeRef> {
58        // Create a dummy root to hold the result
59        let patch_root = new_branch_node(Some(XmlContent::Element(XmlElement::new(
60            "$DUMMY$".to_string(),
61            HashMap::new(),
62        ))));
63
64        // Get the diff element (skip $ROOT$ fake root if present)
65        let diff_elem = {
66            let borrowed = diff.borrow();
67            if let Some(first_child) = borrowed.children().first() {
68                first_child.clone()
69            } else {
70                return Err(Error::Parse("Empty diff document".to_string()));
71            }
72        };
73
74        // Verify it's a diff root tag
75        let is_diff_root = {
76            let borrowed = diff_elem.borrow();
77            match borrowed.content() {
78                Some(XmlContent::Element(e)) => e.qname() == DIFF_ROOT_TAG,
79                _ => false,
80            }
81        };
82
83        if !is_diff_root {
84            return Err(Error::Parse("Invalid root tag for diff".to_string()));
85        }
86
87        // Check for root operation attribute
88        let root_op = {
89            let borrowed = diff_elem.borrow();
90            if let Some(XmlContent::Element(e)) = borrowed.content() {
91                e.attributes().get(DIFF_ROOTOP_ATTR).cloned()
92            } else {
93                None
94            }
95        };
96
97        match root_op.as_deref() {
98            None | Some("") => {
99                // Default op is copy
100                let root_id = index.root_id();
101                self.apply_copy(&patch_root, &diff_elem, root_id, index)?;
102            }
103            Some(op) if op == DIFF_ROOTOP_INS => {
104                // Insert operation
105                self.apply_insert(&patch_root, &diff_elem, index)?;
106            }
107            Some(op) => {
108                return Err(Error::Parse(format!("Invalid rootop for diff: {}", op)));
109            }
110        }
111
112        // Return the first child of the patch root
113        let borrowed = patch_root.borrow();
114        borrowed
115            .children()
116            .first()
117            .cloned()
118            .ok_or_else(|| Error::Parse("Patch produced no output".to_string()))
119    }
120
121    /// Applies an insert operation.
122    fn apply_insert(&self, patch: &NodeRef, diff: &NodeRef, index: &BfsIndex) -> Result<()> {
123        let (content, is_command, qname) = {
124            let diff_borrowed = diff.borrow();
125            let content = diff_borrowed.content().cloned();
126
127            // Check if this is a command or just content to insert
128            let (is_command, qname) = match &content {
129                Some(XmlContent::Element(e)) => {
130                    let qn = e.qname().to_string();
131                    let is_cmd = qn == DIFF_COPY_TAG || qn == DIFF_INSERT_TAG || qn == DIFF_ESC_TAG;
132                    (is_cmd, Some(qn))
133                }
134                _ => (false, None),
135            };
136
137            (content, is_command, qname)
138        };
139
140        if !is_command {
141            // Simple insert - create node and recurse for children
142            let node = new_branch_node(content);
143
144            let children: Vec<_> = diff.borrow().children().to_vec();
145            for child in children {
146                self.apply_insert(&node, &child, index)?;
147            }
148
149            NodeInner::add_child_to_ref(patch, node);
150        } else if let Some(qn) = qname {
151            if qn == DIFF_ESC_TAG || qn == DIFF_INSERT_TAG {
152                // Process children
153                let children: Vec<_> = diff.borrow().children().to_vec();
154                for child in children {
155                    self.apply_insert(patch, &child, index)?;
156                }
157            } else if qn == DIFF_COPY_TAG {
158                // Copy operation
159                let src_str = {
160                    let diff_borrowed = diff.borrow();
161                    if let Some(XmlContent::Element(e)) = diff_borrowed.content() {
162                        e.attributes().get(DIFF_CPYSRC_ATTR).cloned()
163                    } else {
164                        None
165                    }
166                };
167
168                let src_str = src_str.ok_or_else(|| {
169                    Error::Parse("Missing src attribute in copy command".to_string())
170                })?;
171
172                let src_id = src_str.parse().unwrap_or(0);
173                self.apply_copy(patch, diff, src_id, index)?;
174            }
175        }
176
177        Ok(())
178    }
179
180    /// Applies a copy operation.
181    fn apply_copy(
182        &self,
183        patch: &NodeRef,
184        diff: &NodeRef,
185        src_id: u64,
186        index: &BfsIndex,
187    ) -> Result<()> {
188        // Gather stop nodes and their destinations
189        let mut dst_nodes: Vec<(u64, NodeRef)> = Vec::new();
190        let mut stop_nodes: HashMap<u64, Option<NodeRef>> = HashMap::new();
191
192        // Get run count
193        let run = {
194            let diff_borrowed = diff.borrow();
195            match diff_borrowed.content() {
196                Some(XmlContent::Element(e)) => e
197                    .attributes()
198                    .get(DIFF_CPYRUN_ATTR)
199                    .and_then(|v| v.parse::<u64>().ok())
200                    .unwrap_or(1),
201                _ => 1,
202            }
203        };
204
205        // Process diff children to find stop nodes
206        {
207            let diff_borrowed = diff.borrow();
208            for child in diff_borrowed.children() {
209                let child_borrowed = child.borrow();
210                let child_content = child_borrowed.content();
211
212                // Verify it's a copy or insert command
213                let is_valid_cmd = match child_content {
214                    Some(XmlContent::Element(e)) => {
215                        let qname = e.qname();
216                        qname == DIFF_COPY_TAG || qname == DIFF_INSERT_TAG
217                    }
218                    _ => false,
219                };
220
221                if !is_valid_cmd {
222                    return Err(Error::Parse(
223                        "Only copy or insert commands may appear below a copy command".to_string(),
224                    ));
225                }
226
227                // Get destination node
228                let dst_str = match child_content {
229                    Some(XmlContent::Element(e)) => e.attributes().get(DIFF_CPYDST_ATTR).cloned(),
230                    _ => None,
231                };
232
233                let dst_str = dst_str.ok_or_else(|| {
234                    Error::Parse("Missing dst attribute in diff command under copy".to_string())
235                })?;
236
237                let stop_node = index
238                    .lookup_str(&dst_str)
239                    .ok_or_else(|| Error::Parse(format!("Invalid dst in command: {}", dst_str)))?;
240
241                let stop_id = stop_node.borrow().id();
242                dst_nodes.push((stop_id, child.clone()));
243                stop_nodes.entry(stop_id).or_insert(None);
244            }
245        }
246
247        // Get source root
248        let src_root = index
249            .lookup(src_id)
250            .ok_or_else(|| Error::Parse(format!("Invalid src in copy command: {}", src_id)))?;
251
252        // Perform copy with run count
253        let mut current_src = src_root;
254        let mut current_id = src_id;
255
256        for _i_run in 1..run {
257            self.dfs_copy(patch, &current_src, &mut HashMap::new())?;
258            current_id += 1;
259            current_src = index.lookup(current_id).ok_or_else(|| {
260                Error::Parse(format!(
261                    "BFS index lookup failed for id {} during copy run",
262                    current_id
263                ))
264            })?;
265        }
266
267        // Final copy with stop nodes
268        self.dfs_copy(patch, &current_src, &mut stop_nodes)?;
269
270        // Recurse for each diff child
271        for (stop_id, diff_child) in &dst_nodes {
272            if let Some(Some(created_node)) = stop_nodes.get(stop_id) {
273                self.apply_insert(created_node, diff_child, index)?;
274            }
275        }
276
277        Ok(())
278    }
279
280    /// Performs a DFS copy of the source tree.
281    #[allow(clippy::only_used_in_recursion)]
282    fn dfs_copy(
283        &self,
284        dst: &NodeRef,
285        src: &NodeRef,
286        stop_nodes: &mut HashMap<u64, Option<NodeRef>>,
287    ) -> Result<()> {
288        let (content, src_id, children) = {
289            let src_borrowed = src.borrow();
290            (
291                src_borrowed.content().cloned(),
292                src_borrowed.id(),
293                src_borrowed.children().to_vec(),
294            )
295        };
296
297        let copied = new_branch_node(content);
298        NodeInner::add_child_to_ref(dst, copied.clone());
299
300        if let std::collections::hash_map::Entry::Occupied(mut e) = stop_nodes.entry(src_id) {
301            // Mark this as the stop point and record the created node
302            e.insert(Some(copied));
303            return Ok(());
304        }
305
306        // Copy children
307        for child in children {
308            self.dfs_copy(&copied, &child, stop_nodes)?;
309        }
310
311        Ok(())
312    }
313
314    /// Writes the tree to the output.
315    #[allow(clippy::only_used_in_recursion)]
316    fn dump_tree<W: Write>(
317        &self,
318        node: &NodeRef,
319        writer: &mut W,
320        indent: usize,
321    ) -> std::io::Result<()> {
322        let borrowed = node.borrow();
323
324        match borrowed.content() {
325            Some(XmlContent::Text(text)) => {
326                // Text nodes are written verbatim (no indentation) to avoid
327                // corrupting text content per thesis requirement.
328                let text_str: String = text.text().iter().collect();
329                write!(writer, "{}", escape_xml(&text_str))?;
330            }
331            Some(XmlContent::Comment(comment)) => {
332                // Output comment
333                let comment_text: String = comment.text().iter().collect();
334                writeln!(
335                    writer,
336                    "{}<!-- {} -->",
337                    Self::indent_str_for(indent),
338                    comment_text
339                )?;
340            }
341            Some(XmlContent::ProcessingInstruction(pi)) => {
342                // Output processing instruction
343                if pi.content().is_empty() {
344                    writeln!(
345                        writer,
346                        "{}<?{}?>",
347                        Self::indent_str_for(indent),
348                        pi.target()
349                    )?;
350                } else {
351                    writeln!(
352                        writer,
353                        "{}<?{} {}?>",
354                        Self::indent_str_for(indent),
355                        pi.target(),
356                        pi.content()
357                    )?;
358                }
359            }
360            Some(XmlContent::Element(elem)) => {
361                write!(writer, "{}<{}", Self::indent_str_for(indent), elem.qname())?;
362                // Namespace declarations (sorted for deterministic output)
363                let mut ns_prefixes: Vec<_> = elem.namespace_decls().keys().collect();
364                ns_prefixes.sort();
365                for prefix in ns_prefixes {
366                    if let Some(uri) = elem.namespace_decls().get(prefix) {
367                        if prefix.is_empty() {
368                            write!(writer, " xmlns=\"{}\"", escape_xml_attr(uri))?;
369                        } else {
370                            write!(writer, " xmlns:{}=\"{}\"", prefix, escape_xml_attr(uri))?;
371                        }
372                    }
373                }
374                // Sort attributes for consistent output
375                let mut attr_names: Vec<_> = elem.attributes().keys().collect();
376                attr_names.sort();
377                for name in attr_names {
378                    if let Some(value) = elem.attributes().get(name) {
379                        write!(writer, " {}=\"{}\"", name, escape_xml_attr(value))?;
380                    }
381                }
382                writeln!(writer, ">")?;
383
384                for child in borrowed.children() {
385                    self.dump_tree(child, writer, indent + 1)?;
386                }
387
388                writeln!(
389                    writer,
390                    "{}</{}>",
391                    Self::indent_str_for(indent),
392                    elem.qname()
393                )?;
394            }
395            None => {}
396        }
397
398        Ok(())
399    }
400
401    /// Returns the indentation string for a given level.
402    fn indent_str_for(level: usize) -> String {
403        "  ".repeat(level)
404    }
405}
406
407/// Escapes special XML characters in text content.
408fn escape_xml(s: &str) -> String {
409    s.replace('&', "&amp;")
410        .replace('<', "&lt;")
411        .replace('>', "&gt;")
412}
413
414/// Escapes special XML characters in attribute values.
415fn escape_xml_attr(s: &str) -> String {
416    s.replace('&', "&amp;")
417        .replace('<', "&lt;")
418        .replace('>', "&gt;")
419        .replace('"', "&quot;")
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425    use crate::node::{new_base_node, XmlText};
426
427    fn create_base_tree() -> NodeRef {
428        let root = new_base_node(Some(XmlContent::Element(XmlElement::new(
429            "root".to_string(),
430            HashMap::new(),
431        ))));
432        let child1 = new_base_node(Some(XmlContent::Element(XmlElement::new(
433            "child1".to_string(),
434            HashMap::new(),
435        ))));
436        let child2 = new_base_node(Some(XmlContent::Element(XmlElement::new(
437            "child2".to_string(),
438            HashMap::new(),
439        ))));
440        let text = new_base_node(Some(XmlContent::Text(XmlText::new("hello"))));
441
442        NodeInner::add_child_to_ref(&child1, text);
443        NodeInner::add_child_to_ref(&root, child1);
444        NodeInner::add_child_to_ref(&root, child2);
445
446        root
447    }
448
449    fn create_simple_diff() -> NodeRef {
450        // Create a diff that just copies the root
451        // <$ROOT$><diff><copy src="0" /></diff></$ROOT$>
452        let fake_root = new_branch_node(Some(XmlContent::Element(XmlElement::new(
453            "$ROOT$".to_string(),
454            HashMap::new(),
455        ))));
456        let diff_elem = new_branch_node(Some(XmlContent::Element(XmlElement::new(
457            DIFF_ROOT_TAG.to_string(),
458            HashMap::new(),
459        ))));
460
461        NodeInner::add_child_to_ref(&fake_root, diff_elem);
462
463        fake_root
464    }
465
466    #[test]
467    fn test_patch_new() {
468        let _patch = Patch::new();
469        // Just verify construction works
470    }
471
472    #[test]
473    fn test_escape_functions() {
474        assert_eq!(escape_xml("<test>"), "&lt;test&gt;");
475        assert_eq!(escape_xml_attr("\"test\""), "&quot;test&quot;");
476    }
477
478    #[test]
479    fn test_simple_patch() {
480        let base = create_base_tree();
481        let diff = create_simple_diff();
482        let patch = Patch::new();
483
484        let mut output = Vec::new();
485        let result = patch.patch(&base, &diff, &mut output);
486
487        // This might fail due to diff structure, but shouldn't panic
488        // The actual integration test would use properly structured diffs
489        assert!(result.is_ok() || result.is_err());
490    }
491}