x_diff_rs/
tree.rs

1use md5::Digest;
2use roxmltree::{Attribute, Document, ExpandedName, Node, NodeId};
3use std::{borrow::Cow, fmt::Display, hash::Hash};
4
5#[derive(Debug, Clone)]
6pub enum XTreeError {
7    ParseError(roxmltree::Error),
8}
9
10/// A tree representation of the XML format. It is a wrapper around [roxmltree::Document]
11#[derive(Debug)]
12pub struct XTree<'doc>(Document<'doc>);
13
14/// A node in the XML tree. It can be an element node, an attribute node, or a text node.
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub struct XNode<'a, 'doc: 'a> {
17    node: Node<'a, 'doc>,
18    attr: Option<Attribute<'a, 'doc>>,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq)]
22pub enum XNodeId<'a, 'doc> {
23    ElementOrText(NodeId),
24    Attribute {
25        node_id: NodeId,
26        attr: Attribute<'a, 'doc>,
27    },
28}
29
30impl Hash for XNode<'_, '_> {
31    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
32        self.id().to_string().hash(state);
33    }
34}
35
36impl Eq for XNode<'_, '_> {}
37
38#[derive(Debug, Clone)]
39pub enum XNodeName<'a, 'b> {
40    TagName(ExpandedName<'a, 'b>),
41    AttributeName(Attribute<'a, 'b>),
42    Text,
43}
44
45impl Display for XNodeId<'_, '_> {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        match self {
48            XNodeId::ElementOrText(node_id) => write!(f, "{}", node_id.get()),
49            XNodeId::Attribute { node_id, attr } => {
50                if let Some(ns) = attr.namespace() {
51                    write!(f, "{}[{{{}}}{}]", node_id.get(), ns, attr.name())
52                } else {
53                    write!(f, "{}[{}]", node_id.get(), attr.name())
54                }
55            }
56        }
57    }
58}
59
60impl<'doc> From<Document<'doc>> for XTree<'doc> {
61    fn from(value: Document<'doc>) -> Self {
62        Self(value)
63    }
64}
65
66impl<'a, 'doc: 'a> XNode<'a, 'doc> {
67    /// Get node id.
68    pub fn id(&'a self) -> XNodeId<'a, 'doc> {
69        if let Some(attr) = self.attr {
70            XNodeId::Attribute {
71                node_id: self.node.id(),
72                attr,
73            }
74        } else {
75            XNodeId::ElementOrText(self.node.id())
76        }
77    }
78
79    /// Get node name.
80    pub fn name(&self) -> XNodeName {
81        if let Some(attr) = self.attr {
82            XNodeName::AttributeName(attr)
83        } else if self.is_text() {
84            XNodeName::Text
85        } else {
86            XNodeName::TagName(self.node.tag_name())
87        }
88    }
89
90    /// Get the parent node.
91    pub fn parent(&self) -> Option<Self> {
92        if self.attr.is_some() {
93            Some(Self {
94                node: self.node,
95                attr: None,
96            })
97        } else {
98            self.node
99                .parent()
100                .filter(|p| !p.is_root())
101                .map(|parent| Self {
102                    node: parent,
103                    attr: None,
104                })
105        }
106    }
107
108    /// Get the children nodes.
109    pub fn children(&self) -> Vec<Self> {
110        if self.attr.is_some() {
111            return Vec::new();
112        }
113        let nodes = self
114            .node
115            .children()
116            .filter(|node| node.is_element() || node.is_text())
117            .filter(|node| !(node.is_text() && node.text().unwrap().trim().is_empty()))
118            .map(|node| Self { node, attr: None });
119        let attrs = self.node.attributes().map(|attr| Self {
120            node: self.node,
121            attr: Some(attr),
122        });
123        nodes.chain(attrs).collect()
124    }
125
126    pub fn is_attribute(&self) -> bool {
127        self.attr.is_some()
128    }
129
130    pub fn is_text(&self) -> bool {
131        self.attr.is_none() && self.node.is_text()
132    }
133
134    pub fn is_element(&self) -> bool {
135        self.attr.is_none() && self.node.is_element()
136    }
137
138    /// Get the node value. Only attribute and text node have value.
139    pub fn value(&self) -> Option<&str> {
140        if let Some(attr) = self.attr {
141            Some(attr.value())
142        } else {
143            self.node.text()
144        }
145    }
146
147    /// Get the byte range of this node from the original text.
148    pub fn range(&self) -> core::ops::Range<usize> {
149        if let Some(attr) = self.attr {
150            attr.range()
151        } else {
152            self.node.range()
153        }
154    }
155
156    pub(crate) fn hash(&self) -> Digest {
157        if let Some(attr) = self.attr {
158            md5::compute(format!(
159                "{}{}={}",
160                attr.namespace().unwrap_or_default(),
161                attr.name(),
162                attr.value()
163            ))
164        } else {
165            match self.node.node_type() {
166                roxmltree::NodeType::Element => {
167                    let name = self.node.tag_name().name();
168                    let namespace = self.node.tag_name().namespace().unwrap_or_default();
169                    md5::compute(format!("{}:{}", namespace, name))
170                }
171                roxmltree::NodeType::Text => {
172                    md5::compute(self.node.text().unwrap_or_default().trim())
173                }
174                _ => unreachable!(),
175            }
176        }
177    }
178
179    pub(crate) fn signature(&self) -> Cow<str> {
180        if let Some(attr) = self.attr {
181            Cow::Owned(format!(
182                "{}{}",
183                attr.namespace().unwrap_or_default(),
184                attr.name()
185            ))
186        } else {
187            match self.node.node_type() {
188                roxmltree::NodeType::Element => Cow::Owned(format!(
189                    "{}:{}",
190                    self.node.tag_name().namespace().unwrap_or_default(),
191                    self.node.tag_name().name()
192                )),
193                roxmltree::NodeType::Text => Cow::Borrowed("text"),
194                _ => unreachable!(),
195            }
196        }
197    }
198}
199
200impl<'a, 'doc: 'a> XTree<'doc> {
201    /// Parse XML to tree structure.
202    pub fn parse(text: &'doc str) -> Result<Self, XTreeError> {
203        Ok(Self::from(
204            Document::parse(text).map_err(XTreeError::ParseError)?,
205        ))
206    }
207
208    /// Get an [XNode] from [XNodeId].
209    pub fn get_node(&'doc self, id: XNodeId<'a, 'doc>) -> Option<XNode<'a, 'doc>> {
210        match id {
211            XNodeId::ElementOrText(node_id) => self
212                .0
213                .get_node(node_id)
214                .map(|node| XNode { node, attr: None }),
215            XNodeId::Attribute { node_id, attr } => self.0.get_node(node_id).map(|node| XNode {
216                node,
217                attr: Some(attr),
218            }),
219        }
220    }
221
222    /// Get the root node.
223    pub fn root(&self) -> XNode {
224        XNode {
225            node: self.0.root_element(),
226            attr: None,
227        }
228    }
229
230    /// Get the underlying roxmltree::Document.
231    pub fn get_roxmltree_doc(self) -> roxmltree::Document<'doc> {
232        self.0
233    }
234}
235
236#[cfg(feature = "print")]
237pub mod print {
238    use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
239
240    use crate::diff::{Edit, diff};
241
242    use super::{XNode, XTree};
243    use std::{collections::HashMap, io::Write};
244
245    #[derive(Debug, Clone)]
246    pub struct PrintTreeOptions {
247        with_id: bool,
248        with_namespace: bool,
249        indent: usize,
250    }
251
252    #[derive(Debug, Clone)]
253    pub struct PrintTreeDiffOptions {
254        with_namespace: bool,
255        indent: usize,
256        color: bool,
257    }
258
259    #[derive(Debug, Clone, Copy)]
260    enum GutterKind {
261        None,
262        Blank,
263        Add,
264        Delete,
265    }
266
267    impl GutterKind {
268        fn symbol(&self) -> &'static str {
269            match self {
270                GutterKind::None => "",
271                GutterKind::Blank => " ",
272                GutterKind::Add => "+",
273                GutterKind::Delete => "-",
274            }
275        }
276    }
277
278    impl Default for PrintTreeOptions {
279        fn default() -> Self {
280            Self {
281                indent: 3,
282                with_id: false,
283                with_namespace: false,
284            }
285        }
286    }
287
288    impl Default for PrintTreeDiffOptions {
289        fn default() -> Self {
290            Self {
291                indent: 3,
292                color: true,
293                with_namespace: false,
294            }
295        }
296    }
297
298    impl PrintTreeDiffOptions {
299        pub fn indent(mut self, n: usize) -> Self {
300            self.indent = n;
301            self
302        }
303
304        pub fn with_color(mut self, yes: bool) -> Self {
305            self.color = yes;
306            self
307        }
308
309        pub fn with_namespace(mut self, yes: bool) -> Self {
310            self.with_namespace = yes;
311            self
312        }
313    }
314
315    impl PrintTreeOptions {
316        pub fn with_indent(mut self, n: usize) -> Self {
317            assert!(n > 0);
318            self.indent = n;
319            self
320        }
321
322        /// Attach ID to nodes while printing. The node id will be wrapped around `[]`.
323        pub fn with_node_id(mut self) -> Self {
324            self.with_id = true;
325            self
326        }
327
328        pub fn with_namespace(mut self, yes: bool) -> Self {
329            self.with_namespace = yes;
330            self
331        }
332    }
333
334    pub fn write_tree_diff<W: WriteColor>(
335        w: &mut W,
336        tree1: &XTree,
337        tree2: &XTree,
338        options: PrintTreeDiffOptions,
339    ) -> std::io::Result<()> {
340        let edits = diff(tree1, tree2);
341
342        // trees are the same
343        if edits.is_empty() {
344            return write!(w, "The trees are the same.");
345        }
346
347        // trees are completely different
348        if matches!(edits[0], Edit::ReplaceRoot) {
349            let mut vlines = Vec::new();
350            write_subtree(
351                w,
352                tree1.root(),
353                &PrintTreeOptions::default()
354                    .with_indent(options.indent)
355                    .with_namespace(options.with_namespace),
356                GutterKind::Delete,
357                &mut vlines,
358            )?;
359            return write_subtree(
360                w,
361                tree2.root(),
362                &PrintTreeOptions::default()
363                    .with_indent(options.indent)
364                    .with_namespace(options.with_namespace),
365                GutterKind::Add,
366                &mut vlines,
367            );
368        }
369
370        let mut changed_nodes = HashMap::new();
371        for e in edits {
372            let key = match e {
373                crate::diff::Edit::Insert {
374                    child_node: _,
375                    to_node,
376                } => to_node.id().to_string(),
377                crate::diff::Edit::Delete(node) => node.id().to_string(),
378                crate::diff::Edit::Update { old, new: _ } => old.id().to_string(),
379                crate::diff::Edit::ReplaceRoot => unreachable!(),
380            };
381            changed_nodes.entry(key).or_insert(Vec::new()).push(e);
382        }
383
384        let mut vlines = Vec::new();
385        write_subtree_diff(w, tree1.root(), &changed_nodes, &options, &mut vlines)
386    }
387
388    fn write_subtree_diff<W: WriteColor>(
389        w: &mut W,
390        node: XNode,
391        changed_nodes: &HashMap<String, Vec<Edit>>,
392        options: &PrintTreeDiffOptions,
393        vlines: &mut Vec<bool>,
394    ) -> std::io::Result<()> {
395        if let Some(edits) = changed_nodes.get(&node.id().to_string()) {
396            if matches!(edits[0], Edit::Insert { .. }) {
397                write_node_line(
398                    w,
399                    node,
400                    &PrintTreeOptions::default()
401                        .with_indent(options.indent)
402                        .with_namespace(options.with_namespace),
403                    GutterKind::Blank,
404                    vlines,
405                )?;
406                let children = node.children();
407                if children.is_empty() {
408                    return Ok(());
409                }
410                vlines.push(true);
411                for child in children {
412                    write_subtree_diff(w, child, changed_nodes, options, vlines)?;
413                }
414            }
415            let last_index = edits.len() - 1;
416            for (i, e) in edits.iter().enumerate() {
417                match e {
418                    Edit::Insert {
419                        child_node,
420                        to_node: _,
421                    } => {
422                        if i == last_index {
423                            *vlines.last_mut().unwrap() = false;
424                        }
425                        write_subtree(
426                            w,
427                            *child_node,
428                            &PrintTreeOptions::default()
429                                .with_indent(options.indent)
430                                .with_namespace(options.with_namespace),
431                            GutterKind::Add,
432                            vlines,
433                        )?;
434                    }
435                    Edit::Delete(_) => write_subtree(
436                        w,
437                        node,
438                        &PrintTreeOptions::default()
439                            .with_indent(options.indent)
440                            .with_namespace(options.with_namespace),
441                        GutterKind::Delete,
442                        vlines,
443                    )?,
444                    Edit::Update { old, new } => {
445                        write_subtree(
446                            w,
447                            *old,
448                            &PrintTreeOptions::default()
449                                .with_indent(options.indent)
450                                .with_namespace(options.with_namespace),
451                            GutterKind::Delete,
452                            vlines,
453                        )?;
454                        write_subtree(
455                            w,
456                            *new,
457                            &PrintTreeOptions::default()
458                                .with_indent(options.indent)
459                                .with_namespace(options.with_namespace),
460                            GutterKind::Add,
461                            vlines,
462                        )?;
463                    }
464                    Edit::ReplaceRoot => unreachable!(),
465                }
466            }
467            if matches!(edits[0], Edit::Insert { .. }) {
468                vlines.pop();
469            }
470        } else {
471            write_node_line(
472                w,
473                node,
474                &PrintTreeOptions::default()
475                    .with_indent(options.indent)
476                    .with_namespace(options.with_namespace),
477                GutterKind::Blank,
478                vlines,
479            )?;
480            let children = node.children();
481            if children.is_empty() {
482                return Ok(());
483            }
484            vlines.push(true);
485            let last_index = children.len() - 1;
486            for (i, child) in children.into_iter().enumerate() {
487                if i == last_index {
488                    *vlines.last_mut().unwrap() = false;
489                }
490                write_subtree_diff(w, child, changed_nodes, options, vlines)?;
491            }
492            vlines.pop();
493        }
494        Ok(())
495    }
496
497    /// Print the tree to stdout
498    pub fn print_tree(tree: &XTree, options: PrintTreeOptions) {
499        let mut stdout = StandardStream::stdout(ColorChoice::Never);
500        write_tree(&mut stdout, tree, options).unwrap();
501        stdout.flush().unwrap();
502    }
503
504    /// Print the tree difference to stdout
505    pub fn print_tree_diff(tree1: &XTree, tree2: &XTree, options: PrintTreeDiffOptions) {
506        let mut stdout = StandardStream::stdout(if options.color {
507            ColorChoice::Always
508        } else {
509            ColorChoice::Never
510        });
511        write_tree_diff(&mut stdout, tree1, tree2, options).unwrap();
512        stdout.flush().unwrap();
513    }
514
515    pub fn write_tree<W: WriteColor>(
516        w: &mut W,
517        tree: &XTree,
518        options: PrintTreeOptions,
519    ) -> std::io::Result<()> {
520        let mut vlines = Vec::new();
521        write_subtree(w, tree.root(), &options, GutterKind::None, &mut vlines)
522    }
523
524    fn node_text_prefix(node: &XNode, with_id: bool) -> String {
525        if with_id {
526            format!("[{}] ", node.id())
527        } else {
528            String::new()
529        }
530    }
531
532    fn node_text(node: &XNode, prefix: &str, with_namespace: bool) -> String {
533        let node_str = if with_namespace {
534            match node.name() {
535                crate::tree::XNodeName::TagName(expanded_name) => {
536                    if let Some(ns) = expanded_name.namespace() {
537                        format!("<{{{}}}{}>", ns, expanded_name.name())
538                    } else {
539                        format!("<{}>", expanded_name.name())
540                    }
541                }
542                crate::tree::XNodeName::AttributeName(attribute) => {
543                    if let Some(ns) = attribute.namespace() {
544                        format!("{{{ns}}}{}: {}", attribute.name(), attribute.value())
545                    } else {
546                        format!("{}: {:?}", attribute.name(), attribute.value())
547                    }
548                }
549                crate::tree::XNodeName::Text => {
550                    let text = node.node.text().unwrap_or_default().trim();
551                    let mut short_text: String = text.chars().take(40).collect();
552                    if text.chars().count() > 40 {
553                        short_text.push_str("...");
554                    }
555                    format!("{:?}", short_text)
556                }
557            }
558        } else {
559            match node.name() {
560                crate::tree::XNodeName::TagName(expanded_name) => {
561                    format!("<{}>", expanded_name.name())
562                }
563                crate::tree::XNodeName::AttributeName(attribute) => {
564                    format!("{}: {:?}", attribute.name(), attribute.value())
565                }
566                crate::tree::XNodeName::Text => {
567                    let text = node.node.text().unwrap_or_default().trim();
568                    let mut short_text: String = text.chars().take(40).collect();
569                    if text.chars().count() > 40 {
570                        short_text.push_str("...");
571                    }
572                    format!("{:?}", short_text)
573                }
574            }
575        };
576        format!("{}{}", prefix, node_str)
577    }
578
579    fn set_color<W: WriteColor>(w: &mut W, gutter: GutterKind) -> std::io::Result<()> {
580        match gutter {
581            GutterKind::None => w.reset(),
582            GutterKind::Blank => w.reset(),
583            GutterKind::Add => w.set_color(ColorSpec::new().set_fg(Some(Color::Green))),
584            GutterKind::Delete => w.set_color(ColorSpec::new().set_fg(Some(Color::Red))),
585        }
586    }
587
588    fn write_node_line<W: WriteColor>(
589        w: &mut W,
590        node: XNode,
591        options: &PrintTreeOptions,
592        gutter: GutterKind,
593        vlines: &mut [bool],
594    ) -> std::io::Result<()> {
595        set_color(w, gutter)?;
596        let gutter_str = gutter.symbol();
597        let node_prefix = node_text_prefix(&node, options.with_id);
598        let node_line = if !vlines.is_empty() {
599            let mut prefix = String::new();
600            for pipe_char in &vlines[..vlines.len() - 1] {
601                prefix.push(if *pipe_char { '│' } else { ' ' });
602                prefix.push_str(&" ".repeat(options.indent - 1));
603            }
604            let suffix = if vlines[vlines.len() - 1] {
605                format!(
606                    "├─{}",
607                    node_text(&node, &node_prefix, options.with_namespace)
608                )
609            } else {
610                format!(
611                    "└─{}",
612                    node_text(&node, &node_prefix, options.with_namespace)
613                )
614            };
615            format!("{}{}", prefix, suffix)
616        } else {
617            node_text(&node, &node_prefix, options.with_namespace)
618        };
619        writeln!(w, "{}{}", gutter_str, node_line)?;
620        w.reset()
621    }
622
623    fn write_subtree<W: WriteColor>(
624        w: &mut W,
625        node: XNode,
626        options: &PrintTreeOptions,
627        gutter: GutterKind,
628        vlines: &mut Vec<bool>,
629    ) -> std::io::Result<()> {
630        set_color(w, gutter)?;
631        write_node_line(w, node, options, gutter, vlines)?;
632        let children = node.children();
633        if children.is_empty() {
634            return Ok(());
635        }
636        vlines.push(true);
637        let last_index = children.len() - 1;
638        for (i, child) in children.into_iter().enumerate() {
639            if i == last_index {
640                *vlines.last_mut().unwrap() = false;
641            }
642            write_subtree(w, child, options, gutter, vlines)?;
643        }
644        vlines.pop();
645        w.reset()?;
646        Ok(())
647    }
648
649    #[cfg(test)]
650    mod test {
651        use std::{fs, io::Cursor};
652
653        use termcolor::NoColor;
654
655        use super::*;
656        #[test]
657        fn test_print_tree() {
658            let content = fs::read_to_string("test/file1.xml").unwrap();
659            let tree = XTree::parse(&content).unwrap();
660            let mut buffer = Vec::new();
661            let cursor = Cursor::new(&mut buffer);
662            let mut no_color = NoColor::new(cursor);
663            write_tree(&mut no_color, &tree, PrintTreeOptions::default()).unwrap();
664            let expected = r#"
665<Profile>
666└─<Customer>
667   ├─<PersonName>
668   │  ├─<NameTitle>
669   │  │  └─"Mr."
670   │  ├─<GivenName>
671   │  │  └─"George"
672   │  ├─<MiddleName>
673   │  │  └─"A."
674   │  ├─<SurName>
675   │  │  └─"Smith"
676   │  ├─<Bio>
677   │  │  └─"A skilled engineer with a passion for so..."
678   │  └─NameType: "Default"
679   ├─<TelephoneInfo>
680   │  ├─<Telephone>
681   │  │  ├─<AreaCityCode>
682   │  │  │  └─"206"
683   │  │  └─<PhoneNumber>
684   │  │     └─"813-8698"
685   │  ├─PhoneTech: "Voice"
686   │  └─PhoneUse: "Work"
687   ├─<PaymentForm>
688   │  └─"..."
689   ├─<Address>
690   │  ├─<StreetNmbr>
691   │  │  ├─"From hell"
692   │  │  └─POBox: "4321-01"
693   │  ├─<BldgRoom>
694   │  │  └─"Suite 800"
695   │  ├─<CityName>
696   │  │  └─"Seattle"
697   │  ├─<StateProv>
698   │  │  ├─"WA"
699   │  │  └─PostalCode: "98108"
700   │  └─<CountryName>
701   │     └─"USA"
702   └─<Address>
703      ├─<StreetNmbr>
704      │  ├─"1200 Yakima St"
705      │  └─POBox: "4321-01"
706      ├─<BldgRoom>
707      │  └─"Suite 800"
708      ├─<CityName>
709      │  └─"Seattle"
710      ├─<StateProv>
711      │  ├─"WA"
712      │  └─PostalCode: "98108"
713      └─<CountryName>
714         └─"USA"
715"#;
716            assert_eq!(expected.trim(), String::from_utf8_lossy(&buffer).trim());
717        }
718
719        #[test]
720        fn test_print_diff() {
721            let text1 = fs::read_to_string("test/file1.xml").unwrap();
722            let tree1 = XTree::parse(&text1).unwrap();
723            let text2 = fs::read_to_string("test/file2.xml").unwrap();
724            let tree2 = XTree::parse(&text2).unwrap();
725            print_tree_diff(&tree1, &tree2, PrintTreeDiffOptions::default());
726        }
727    }
728}