1use dissimilar::{diff, Chunk};
2use xot::{Element, NameId, Node, Text, Value, Xot};
3
4use crate::comparison::Comparison;
5use crate::vtree::{Status, Vtree};
6
7#[derive(Debug, PartialEq, Eq)]
13pub enum Edit {
14 Insert(InsertPosition, InsertContent),
16 Delete(usize),
19 TextUpdate(usize, Vec<TextChange>),
22 AttributeUpdate(usize, Vec<AttributeChange>),
25}
26
27#[derive(Debug, PartialEq, Eq)]
29pub struct InsertPosition {
30 pub parent_node_id: usize,
32 pub child_position: u32,
35 pub descendant_count: u32,
39}
40
41#[derive(Debug, PartialEq, Eq)]
43pub enum InsertContent {
44 XmlNode(Node),
47 Text(String),
49}
50
51#[derive(Debug, PartialEq, Eq)]
53pub enum TextChange {
54 Equal(String),
56 Delete(String),
58 Insert(String),
60}
61
62#[derive(Debug, PartialEq, Eq)]
64pub enum AttributeChange {
65 Update(NameId, String),
67 Delete(NameId),
69 Insert(NameId, String),
71}
72
73impl Comparison {
74 pub(crate) fn edits(&mut self, xot: &mut Xot) -> Vec<Edit> {
83 self.diff_status(xot);
86
87 let mut edits = Vec::new();
88 for delete_id in self.deletes() {
89 edits.push(Edit::Delete(delete_id));
90 }
91 for (node_id, parent_node_id, (child_position, intervening)) in self.inserts() {
92 let insert_node = self.get_node_b(node_id);
93 let content = match xot.value(insert_node) {
94 Value::Element(_) => InsertContent::XmlNode(insert_node),
95 Value::Text(text) => {
96 let text = text.get();
97 InsertContent::Text(text.to_owned())
98 }
99 Value::ProcessingInstruction(_) => InsertContent::XmlNode(insert_node),
100 Value::Comment(_) => InsertContent::XmlNode(insert_node),
101 Value::Root => {
102 panic!("Unsupported root node type");
103 }
104 };
105 edits.push(Edit::Insert(
106 InsertPosition {
107 parent_node_id,
108 child_position,
109 descendant_count: intervening,
110 },
111 content,
112 ));
113 }
114
115 for (node_id_a, node_id_b) in self.updates() {
116 let node_a = self.get_node_a(node_id_a);
117 let node_b = self.get_node_b(node_id_b);
118 let value_pair = (xot.value(node_a), xot.value(node_b));
119 match value_pair {
120 (Value::Text(text_a), Value::Text(text_b)) => {
121 edits.push(Edit::TextUpdate(
122 node_id_a,
123 get_text_changes(text_a, text_b),
124 ));
125 }
126 (Value::Element(element_a), Value::Element(element_b)) => {
127 edits.push(Edit::AttributeUpdate(
128 node_id_a,
129 get_attribute_changes(element_a, element_b),
130 ));
131 }
132 (Value::Root, Value::Root) => {
133 }
135 _ => {
136 panic!("Unsupported node values for update");
137 }
138 }
139 }
140
141 edits.sort_by_key(|edit| match edit {
142 Edit::Delete(delete_id) => (*delete_id, 0),
143 Edit::TextUpdate(update_id, _changes) => (*update_id, 0),
144 Edit::AttributeUpdate(update_id, _changes) => (*update_id, 0),
145 Edit::Insert(position, _content) => (
149 position.parent_node_id + position.descendant_count as usize,
150 1,
151 ),
152 });
153 edits
154 }
155
156 fn deletes(&self) -> impl Iterator<Item = usize> + '_ {
158 let vtree_a = &self.vtree_a;
159 vtree_a
160 .nodes
161 .iter()
162 .enumerate()
163 .filter(|(_, vnode)| match vnode.status {
164 Status::Equal(_id_b) => false,
165 Status::Update(_id_b) => false,
166 Status::Different => true,
167 })
168 .filter(|(_, vnode)| {
171 let parent_id = vnode.parent_id;
172 if let Some(parent_id) = parent_id {
173 let parent = &vtree_a.nodes[parent_id];
174 match parent.status {
175 Status::Equal(_id_b) => true,
176 Status::Update(_id_b) => true,
177 Status::Different => false,
178 }
179 } else {
180 false
182 }
183 })
184 .map(|(id, _)| id)
185 }
186
187 fn inserts(&self) -> impl Iterator<Item = (usize, usize, (u32, u32))> + '_ {
190 let vtree_b = &self.vtree_b;
191 vtree_b
192 .nodes
193 .iter()
194 .enumerate()
195 .filter(|(_, vnode)| match vnode.status {
196 Status::Equal(_id_a) => false,
197 Status::Update(_id_a) => false,
198 Status::Different => true,
199 })
200 .filter_map(|(id, vnode)| {
205 let parent_id = vnode.parent_id;
206 if let Some(parent_id) = parent_id {
207 let vnode_parent = &self.vtree_b.nodes[parent_id];
208 match vnode_parent.status {
209 Status::Equal(id_a) | Status::Update(id_a) => Some((
210 id,
211 id_a as usize,
212 get_child_position(&self.vtree_b, parent_id, id),
213 )),
214 Status::Different => None,
215 }
216 } else {
217 None
219 }
220 })
221 }
222
223 fn updates(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
224 let vtree_a = &self.vtree_a;
225 vtree_a
226 .nodes
227 .iter()
228 .enumerate()
229 .filter_map(|(index, node)| match node.status {
230 Status::Equal(_id_b) => None,
231 Status::Update(id_b) => Some((index, id_b as usize)),
232 Status::Different => None,
233 })
234 }
235}
236
237fn get_child_position(vtree: &Vtree, parent: usize, child: usize) -> (u32, u32) {
238 let parent_vnode = &vtree.nodes[parent];
239 let child_vnode = &vtree.nodes[child];
240 for (i, candidate_child) in vtree.children(parent_vnode).enumerate() {
241 if std::ptr::eq(candidate_child, child_vnode) {
242 return (i as u32, parent_vnode.descendant_count);
243 }
244 }
245 unreachable!("child not found in parent");
246}
247
248fn get_text_changes(text_a: &Text, text_b: &Text) -> Vec<TextChange> {
249 diff(text_a.get(), text_b.get())
250 .iter()
251 .map(|chunk| match chunk {
252 Chunk::Equal(text) => TextChange::Equal(text.to_string()),
253 Chunk::Delete(text) => TextChange::Delete(text.to_string()),
254 Chunk::Insert(text) => TextChange::Insert(text.to_string()),
255 })
256 .collect()
257}
258
259fn get_attribute_changes(element_a: &Element, element_b: &Element) -> Vec<AttributeChange> {
260 let mut changes = Vec::new();
261 for (name, value) in element_a.attributes().iter() {
262 if let Some(value_b) = element_b.attributes().get(name) {
263 if value != value_b {
264 changes.push(AttributeChange::Update(*name, value_b.clone()));
265 }
266 } else {
267 changes.push(AttributeChange::Delete(*name));
268 }
269 }
270 for (name, value) in element_b.attributes().iter() {
271 if !element_a.attributes().contains_key(name) {
272 changes.push(AttributeChange::Insert(*name, value.clone()));
273 }
274 }
275 changes
276}
277
278#[cfg(test)]
279mod tests {
280 use super::*;
281 use xot::{ValueType, Xot};
282
283 #[test]
284 fn test_deletes() {
285 let mut xot = Xot::new();
286 let doc_a = xot
288 .parse(concat!(
289 "<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
290 "<chapter><title>Text 4</title><para>Text 5</para></chapter>",
291 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
292 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
293 "<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
294 ))
295 .unwrap();
296 let doc_b = xot
297 .parse(concat!(
298 "<book><chapter><para>Text 2</para></chapter>",
299 "<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
300 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
301 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
302 "<chapter><para>Text 12</para></chapter></book>"
303 ))
304 .unwrap();
305
306 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
307 comparison.diff_status(&xot);
308 let deletes = comparison.deletes().collect::<Vec<_>>();
309 assert_eq!(deletes, vec![3, 25]);
310 }
311
312 #[test]
313 fn test_inserts() {
314 let mut xot = Xot::new();
315 let doc_a = xot
317 .parse(concat!(
318 "<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
319 "<chapter><title>Text 4</title><para>Text 5</para></chapter>",
320 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
321 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
322 "<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
323 ))
324 .unwrap();
325 let doc_b = xot
326 .parse(concat!(
327 "<book><chapter><para>Text 2</para></chapter>",
328 "<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
329 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
330 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
331 "<chapter><para>Text 12</para></chapter></book>"
332 ))
333 .unwrap();
334
335 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
336 comparison.diff_status(&xot);
337 let inserts = comparison.inserts().collect::<Vec<_>>();
338 assert_eq!(inserts, vec![(10, 7, (2, 6))]);
339 }
340
341 #[test]
342 fn test_updates() {
343 let mut xot = Xot::new();
344 let doc_a = xot
346 .parse(concat!(
347 "<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
348 "<chapter><title>Text 4</title><para>Text 5</para></chapter>",
349 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
350 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
351 "<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
352 ))
353 .unwrap();
354 let doc_b = xot
355 .parse(concat!(
356 "<book><chapter><para>Text 2</para></chapter>",
357 "<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
358 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
359 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
360 "<chapter><para>Text 12</para></chapter></book>"
361 ))
362 .unwrap();
363
364 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
365 comparison.diff_status(&xot);
366 let updates = comparison.updates().collect::<Vec<_>>();
367 assert_eq!(updates, vec![(0, 0), (11, 9)]);
368 }
369
370 #[test]
371 fn test_edits() {
372 let mut xot = Xot::new();
373 let xml_a = concat!(
375 "<book><chapter><title>Text 1</title><para>Text 2</para></chapter>",
376 "<chapter><title>Text 4</title><para>Text 5</para></chapter>",
377 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
378 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
379 "<chapter><para>Text 11</para><para>Text 12</para></chapter></book>"
380 );
381 let xml_b = concat!(
382 "<book><chapter><para>Text 2</para></chapter>",
383 "<chapter><title>Text 4</title><para>Text 25</para><para>Text 11</para></chapter>",
384 "<chapter><title>Text 6</title><para>Text 7<img/>Text 8</para></chapter>",
385 "<chapter><title>Text 9</title><para>Text 10</para></chapter>",
386 "<chapter><para>Text 12</para></chapter></book>"
387 );
388 let doc_a = xot.parse(xml_a).unwrap();
389 let doc_b = xot.parse(xml_b).unwrap();
390
391 let inserted_node = xot
392 .descendants(doc_b)
393 .find(|node| xot.text_content_str(*node) == Some("Text 11"))
394 .unwrap();
395
396 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
397 let edits = comparison.edits(&mut xot);
398 assert_eq!(
399 edits,
400 vec![
401 Edit::Delete(3),
402 Edit::TextUpdate(
403 11,
404 vec![
405 (TextChange::Equal("Text ".to_string())),
406 (TextChange::Insert("2".to_string())),
407 (TextChange::Equal("5".to_string()))
408 ]
409 ),
410 Edit::Insert(
411 InsertPosition {
412 parent_node_id: 7,
413 child_position: 2,
414 descendant_count: 6
415 },
416 InsertContent::XmlNode(inserted_node)
417 ),
418 Edit::Delete(25)
419 ]
420 );
421 }
422
423 #[test]
424 fn test_edits_problematic() {
425 let mut xot = Xot::new();
426
427 let xml_a = r#"<section>
429 <title>T</title>
430 <para>P</para>
431
432 <section>First longer</section>
433
434 <section>Second</section>
435 </section>"#;
436 let xml_b = r#"<section>
437 <title>T</title>
438 <para>P</para>
439
440 <section>Second</section>
441
442 <section>First longer</section>
443 </section>"#;
444
445 let doc_a = xot.parse(xml_a).unwrap();
446 let doc_b = xot.parse(xml_b).unwrap();
447
448 let inserted_node = xot
449 .descendants(doc_b)
450 .find(|node| xot.text_content_str(*node) == Some("Second"))
451 .unwrap();
452
453 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
454 let edits = comparison.edits(&mut xot);
455
456 assert_eq!(
457 edits,
458 vec![
459 Edit::Delete(11),
460 Edit::Delete(12),
461 Edit::Insert(
462 InsertPosition {
463 parent_node_id: 1,
464 child_position: 5,
465 descendant_count: 13
466 },
467 InsertContent::XmlNode(inserted_node)
468 ),
469 Edit::Insert(
470 InsertPosition {
471 parent_node_id: 1,
472 child_position: 6,
473 descendant_count: 13
474 },
475 InsertContent::Text("\n\n ".to_owned())
476 ),
477 ]
478 );
479 }
480
481 #[test]
482 fn test_deletes_problematic() {
483 let mut xot = Xot::new();
484 let xml_a = r#"<section>
485 <title>Section title</title>
486 <para>Compare documents with different root element</para>
487
488 <section>
489 <title>Sit Ipsum Consectetur Sem Ligula</title>
490 <para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
491 </section>
492
493 <section>
494 <title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
495 <para>Morbi leo <code>porta ac consectetur</code>risus, ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
496 </section>
497</section>"#;
498 let xml_b = r#"<section>
499 <title>Section title</title>
500 <para>Compare documents with different root element</para>
501
502 <section>
503 <title>Ultricies Mollis Mattis Ullamcorper Ridiculus</title>
504 <para>Morbi leo <code>porta ac consectetur</code>risus, ac, vestibulum at eros. Cras mattis consectetur purus sit amet fermentum. Donec id elit non mi porta gravida at eget metus. <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
505 </section>
506
507 <section>
508 <title>Sit Ipsum Consectetur Sem Ligula</title>
509 <para>Etiam porta sem malesuada <phrase>magna mollis euismod</phrase>. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</para>
510 </section>
511</section>"#;
512 let doc_a = xot.parse(xml_a).unwrap();
513 let doc_b = xot.parse(xml_b).unwrap();
514 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
515 comparison.diff_status(&xot);
516 let deletes = comparison.deletes().collect::<Vec<_>>();
517 assert_eq!(deletes, vec![8, 9]);
518 }
519
520 #[test]
521 fn test_attribute_update() {
522 let mut xot = Xot::new();
523 let xml_a = r#"<r><a/></r>"#;
524 let xml_b = r#"<r><a id="1"/></r>"#;
525 let doc_a = xot.parse(xml_a).unwrap();
526 let doc_b = xot.parse(xml_b).unwrap();
527 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
528 comparison.diff_status(&xot);
529 let updates = comparison.updates().collect::<Vec<_>>();
530 assert_eq!(updates, vec![(0, 0), (2, 2)]);
531 }
532
533 #[test]
534 fn test_attribute_update_edits() -> Result<(), xot::Error> {
535 let mut xot = Xot::new();
536 let xml_a = r#"<r><a/></r>"#;
537 let xml_b = r#"<r><a id="1"/></r>"#;
538 let doc_a = xot.parse(xml_a)?;
539 let doc_b = xot.parse(xml_b)?;
540 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
541 let edits = comparison.edits(&mut xot);
542 let name_id = xot.add_name("id");
543 assert_eq!(
544 edits,
545 vec![Edit::AttributeUpdate(
546 2,
547 vec![AttributeChange::Insert(name_id, "1".to_string())]
548 )]
549 );
550 Ok(())
551 }
552
553 #[test]
554 fn test_attribute_propagation_edits() -> Result<(), xot::Error> {
555 let mut xot = Xot::new();
556 let xml_a = r#"<r><a>Text</a></r>"#;
557 let xml_b = r#"<r><a id="1">Text</a></r>"#;
558 let doc_a = xot.parse(xml_a)?;
559 let doc_b = xot.parse(xml_b)?;
560 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
561 let edits = comparison.edits(&mut xot);
562 let name_id = xot.add_name("id");
563 assert_eq!(
564 edits,
565 vec![Edit::AttributeUpdate(
566 2,
567 vec![AttributeChange::Insert(name_id, "1".to_string())]
568 )]
569 );
570 Ok(())
571 }
572
573 #[test]
574 fn test_root_no_update_detected() -> Result<(), xot::Error> {
575 let mut xot = Xot::new();
576 let xml_a = r#"<p>Hello</p>"#;
578 let xml_b = r#"<p>Bye</p>"#;
579 let doc_a = xot.parse(xml_a)?;
580 let doc_b = xot.parse(xml_b)?;
581
582 let inserted_node = xot.document_element(doc_b)?;
583
584 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
585 let edits = comparison.edits(&mut xot);
586 assert_eq!(
587 edits,
588 [
589 Edit::Delete(1),
590 Edit::Insert(
591 InsertPosition {
592 parent_node_id: 0,
593 child_position: 0,
594 descendant_count: 2
595 },
596 InsertContent::XmlNode(inserted_node)
597 )
598 ]
599 );
600 Ok(())
601 }
602
603 #[test]
604 fn test_processing_instruction_insert() {
605 let mut xot = Xot::new();
606 let doc_a = xot.parse("<book><p/></book>").unwrap();
607 let doc_b = xot.parse("<book><?pi?><p/></book>").unwrap();
608
609 let mut comparison = Comparison::new(&xot, doc_a, doc_b);
610 let edits = comparison.edits(&mut xot);
611
612 let inserted_node = xot
613 .descendants(doc_b)
614 .find(|node| xot.value_type(*node) == ValueType::ProcessingInstruction)
615 .unwrap();
616
617 assert_eq!(
618 edits,
619 vec![Edit::Insert(
620 InsertPosition {
621 parent_node_id: 1,
622 child_position: 0,
623 descendant_count: 2
624 },
625 InsertContent::XmlNode(inserted_node)
626 ),]
627 );
628 }
629}