1use std::io::Write;
7
8use crate::matching::Matching;
9use crate::node::{BranchNode, NodeRef, XmlContent};
10
11use super::bfs_index::BfsIndex;
12use super::diff_operation::{DiffOpType, DiffOperation};
13use super::{
14 DIFF_COPY_TAG, DIFF_CPYDST_ATTR, DIFF_CPYRUN_ATTR, DIFF_CPYSRC_ATTR, DIFF_INSERT_TAG,
15 DIFF_ROOTOP_ATTR, DIFF_ROOTOP_INS, DIFF_ROOT_TAG,
16};
17
18struct Sequence {
20 src: Option<u64>,
22 dst: Option<u64>,
24 tail: Option<u64>,
26 run: i64,
28}
29
30impl Sequence {
31 fn new() -> Self {
32 Sequence {
33 src: None,
34 dst: None,
35 tail: None,
36 run: -1,
37 }
38 }
39
40 fn set_empty(&mut self) {
41 self.run = -1;
42 }
43
44 fn is_empty(&self) -> bool {
45 self.run == -1
46 }
47
48 fn init(&mut self, src: u64, dst: Option<u64>) {
49 self.src = Some(src);
50 self.tail = Some(src);
51 self.dst = dst;
52 self.run = 1;
53 }
54
55 fn append(&mut self, src: u64) {
56 self.run += 1;
57 self.tail = Some(src);
58 }
59}
60
61pub struct Diff<'a> {
63 matching: &'a dyn Matching,
65 index: BfsIndex,
67 indent: std::cell::Cell<usize>,
69}
70
71impl<'a> Diff<'a> {
72 pub fn new(matching: &'a dyn Matching) -> Option<Self> {
74 let base_root = matching.base_root()?;
75 let index = BfsIndex::new(base_root);
76
77 Some(Diff {
78 matching,
79 index,
80 indent: std::cell::Cell::new(0),
81 })
82 }
83
84 pub fn with_index(matching: &'a dyn Matching, index: BfsIndex) -> Self {
86 Diff {
87 matching,
88 index,
89 indent: std::cell::Cell::new(0),
90 }
91 }
92
93 pub fn diff<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
95 if let Some(branch_root) = self.matching.branch_root() {
96 self.diff_from(writer, branch_root)
97 } else {
98 Err(std::io::Error::new(
99 std::io::ErrorKind::InvalidInput,
100 "No branch root",
101 ))
102 }
103 }
104
105 fn diff_from<W: Write>(&self, writer: &mut W, branch_root: &NodeRef) -> std::io::Result<()> {
107 let root_has_match = BranchNode::base_match(branch_root).is_some();
108
109 writeln!(writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
110
111 if root_has_match {
112 let stop_nodes = self.get_stop_nodes(branch_root);
114 let op = DiffOperation::root_copy();
115 self.write_op_open(writer, &op)?;
116 self.write_copy(writer, &stop_nodes)?;
117 self.write_op_close(writer, &op)?;
118 } else {
119 let op = DiffOperation::root_insert();
121 self.write_op_open(writer, &op)?;
122 self.write_insert(writer, branch_root)?;
123 self.write_op_close(writer, &op)?;
124 }
125
126 Ok(())
127 }
128
129 fn get_stop_nodes(&self, node: &NodeRef) -> Vec<NodeRef> {
131 let mut stop_nodes = Vec::new();
132 self.matching.get_area_stop_nodes(&mut stop_nodes, node);
133 stop_nodes
134 }
135
136 fn write_copy<W: Write>(&self, writer: &mut W, stop_nodes: &[NodeRef]) -> std::io::Result<()> {
138 let mut seq = Sequence::new();
139
140 for stop_node in stop_nodes {
141 let dst = BranchNode::base_match(stop_node).and_then(|n| self.index.get_id(&n));
142
143 if !self.emit_child_list(writer, &mut seq, stop_node, dst, false)? {
144 let op = DiffOperation::insert(dst);
146 self.write_op_open(writer, &op)?;
147 self.write_op_close(writer, &op)?;
148 }
149 }
150
151 Ok(())
152 }
153
154 fn write_insert<W: Write>(&self, writer: &mut W, branch: &NodeRef) -> std::io::Result<()> {
156 let mut seq = Sequence::new();
157 self.write_content_open(writer, branch)?;
158 self.emit_child_list(writer, &mut seq, branch, None, true)?;
159 self.write_content_close(writer, branch)?;
160 Ok(())
161 }
162
163 fn emit_child_list<W: Write>(
165 &self,
166 writer: &mut W,
167 seq: &mut Sequence,
168 parent: &NodeRef,
169 dst: Option<u64>,
170 ins_mode: bool,
171 ) -> std::io::Result<bool> {
172 let children: Vec<_> = parent.borrow().children().to_vec();
173 let child_count = children.len();
174
175 for (ic, child) in children.iter().enumerate() {
176 let last_stop_node = ic == child_count - 1;
177 let base_match = BranchNode::base_match(child);
178
179 if let Some(ref base) = base_match {
180 let child_stop_nodes = self.get_stop_nodes(child);
182 let src = self.index.get_id(base);
183
184 if let Some(src_id) = src {
185 if child_stop_nodes.is_empty() && !last_stop_node {
186 if seq.is_empty() {
188 seq.init(src_id, dst);
189 continue;
190 } else if self.sequence_appends(seq, src_id, dst) {
191 seq.append(src_id);
192 continue;
193 }
194 }
195
196 if !self.sequence_appends(seq, src_id, dst) {
198 if !seq.is_empty() {
200 let op = DiffOperation::copy(seq.src.unwrap(), seq.dst, seq.run as u64);
201 self.write_op_open(writer, &op)?;
202 self.write_op_close(writer, &op)?;
203 }
204
205 if !child_stop_nodes.is_empty() || last_stop_node {
206 let op = DiffOperation::copy(src_id, dst, 1);
207 self.write_op_open(writer, &op)?;
208 self.write_copy(writer, &child_stop_nodes)?;
209 self.write_op_close(writer, &op)?;
210 seq.set_empty();
211 } else {
212 seq.init(src_id, dst);
213 }
214 } else {
215 seq.append(src_id);
217 let op = DiffOperation::copy(seq.src.unwrap(), seq.dst, seq.run as u64);
218 self.write_op_open(writer, &op)?;
219 self.write_copy(writer, &child_stop_nodes)?;
220 self.write_op_close(writer, &op)?;
221 seq.set_empty();
222 }
223 }
224 } else {
225 if !seq.is_empty() {
227 let op = DiffOperation::copy(seq.src.unwrap(), seq.dst, seq.run as u64);
228 self.write_op_open(writer, &op)?;
229 self.write_op_close(writer, &op)?;
230 seq.set_empty();
231 }
232
233 if !ins_mode {
234 let prev_has_match =
236 ic > 0 && BranchNode::base_match(&children[ic - 1]).is_some();
237 if ic == 0 || prev_has_match {
238 let op = DiffOperation::insert(dst);
239 self.write_op_open(writer, &op)?;
240 }
241 }
242
243 self.write_insert(writer, child)?;
244
245 if !ins_mode {
246 let next_has_match = !last_stop_node
248 && ic + 1 < child_count
249 && BranchNode::base_match(&children[ic + 1]).is_some();
250 if last_stop_node || next_has_match {
251 let op = DiffOperation::insert(None);
252 self.write_op_close(writer, &op)?;
253 }
254 }
255 }
256 }
257
258 Ok(!children.is_empty())
259 }
260
261 fn sequence_appends(&self, seq: &Sequence, src: u64, dst: Option<u64>) -> bool {
263 if seq.is_empty() {
264 return false;
265 }
266 if dst != seq.dst {
267 return false;
268 }
269 match (seq.tail, self.index.lookup(seq.tail.unwrap())) {
270 (Some(_), Some(tail_node)) => {
271 if let Some(next_node) = self.index.lookup(src) {
272 self.index.appends(&tail_node, &next_node)
273 } else {
274 false
275 }
276 }
277 _ => false,
278 }
279 }
280
281 fn write_op_open<W: Write>(&self, writer: &mut W, op: &DiffOperation) -> std::io::Result<()> {
283 let indent_str = Self::indent_str_for(self.indent.get());
284 match op.op_type {
285 DiffOpType::RootCopy => {
286 writeln!(writer, "{}<{}>", indent_str, DIFF_ROOT_TAG)?;
287 self.indent.set(self.indent.get() + 1);
288 }
289 DiffOpType::RootInsert => {
290 writeln!(
291 writer,
292 "{}<{} {}=\"{}\">",
293 indent_str, DIFF_ROOT_TAG, DIFF_ROOTOP_ATTR, DIFF_ROOTOP_INS
294 )?;
295 self.indent.set(self.indent.get() + 1);
296 }
297 DiffOpType::Copy => {
298 write!(writer, "{}<{}", indent_str, DIFF_COPY_TAG)?;
299 if let Some(src) = op.source {
300 write!(writer, " {}=\"{}\"", DIFF_CPYSRC_ATTR, src)?;
301 }
302 if let Some(dst) = op.destination {
303 write!(writer, " {}=\"{}\"", DIFF_CPYDST_ATTR, dst)?;
304 }
305 if let Some(run) = op.run {
306 if run > 1 {
307 write!(writer, " {}=\"{}\"", DIFF_CPYRUN_ATTR, run)?;
308 }
309 }
310 writeln!(writer, ">")?;
311 self.indent.set(self.indent.get() + 1);
312 }
313 DiffOpType::Insert => {
314 write!(writer, "{}<{}", indent_str, DIFF_INSERT_TAG)?;
315 if let Some(dst) = op.destination {
316 write!(writer, " {}=\"{}\"", DIFF_CPYDST_ATTR, dst)?;
317 }
318 writeln!(writer, ">")?;
319 self.indent.set(self.indent.get() + 1);
320 }
321 }
322 Ok(())
323 }
324
325 fn write_op_close<W: Write>(&self, writer: &mut W, op: &DiffOperation) -> std::io::Result<()> {
327 self.indent.set(self.indent.get() - 1);
328 let indent_str = Self::indent_str_for(self.indent.get());
329 match op.op_type {
330 DiffOpType::RootCopy | DiffOpType::RootInsert => {
331 writeln!(writer, "{}</{}>", indent_str, DIFF_ROOT_TAG)?;
332 }
333 DiffOpType::Copy => {
334 writeln!(writer, "{}</{}>", indent_str, DIFF_COPY_TAG)?;
335 }
336 DiffOpType::Insert => {
337 writeln!(writer, "{}</{}>", indent_str, DIFF_INSERT_TAG)?;
338 }
339 }
340 Ok(())
341 }
342
343 fn write_content_open<W: Write>(&self, writer: &mut W, node: &NodeRef) -> std::io::Result<()> {
345 let borrowed = node.borrow();
346 match borrowed.content() {
347 Some(XmlContent::Text(text)) => {
348 let text_str: String = text.text().iter().collect();
350 write!(
351 writer,
352 "{}{}",
353 Self::indent_str_for(self.indent.get()),
354 escape_xml(&text_str)
355 )?;
356 }
357 Some(XmlContent::Comment(comment)) => {
358 let comment_text: String = comment.text().iter().collect();
360 writeln!(
361 writer,
362 "{}<!-- {} -->",
363 Self::indent_str_for(self.indent.get()),
364 comment_text
365 )?;
366 }
367 Some(XmlContent::Element(elem)) => {
368 write!(
369 writer,
370 "{}<{}",
371 Self::indent_str_for(self.indent.get()),
372 elem.qname()
373 )?;
374 let mut attr_names: Vec<_> = elem.attributes().keys().collect();
376 attr_names.sort();
377 for name in attr_names {
378 if let Some(value) = elem.attributes().get(name) {
379 write!(writer, " {}=\"{}\"", name, escape_xml_attr(value))?;
380 }
381 }
382 writeln!(writer, ">")?;
383 self.indent.set(self.indent.get() + 1);
384 }
385 None => {}
386 }
387 Ok(())
388 }
389
390 fn write_content_close<W: Write>(&self, writer: &mut W, node: &NodeRef) -> std::io::Result<()> {
392 let borrowed = node.borrow();
393 if let Some(XmlContent::Element(elem)) = borrowed.content() {
394 self.indent.set(self.indent.get() - 1);
395 writeln!(
396 writer,
397 "{}</{}>",
398 Self::indent_str_for(self.indent.get()),
399 elem.qname()
400 )?;
401 }
402 Ok(())
403 }
404
405 fn indent_str_for(level: usize) -> String {
407 " ".repeat(level)
408 }
409}
410
411fn escape_xml(s: &str) -> String {
413 s.replace('&', "&")
414 .replace('<', "<")
415 .replace('>', ">")
416}
417
418fn escape_xml_attr(s: &str) -> String {
420 s.replace('&', "&")
421 .replace('<', "<")
422 .replace('>', ">")
423 .replace('"', """)
424}
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429 use crate::matching::HeuristicMatching;
430 use crate::node::{new_base_node, new_branch_node, NodeInner, XmlElement};
431 use std::collections::HashMap;
432
433 #[test]
434 fn test_escape_xml() {
435 assert_eq!(escape_xml("hello"), "hello");
436 assert_eq!(escape_xml("<test>"), "<test>");
437 assert_eq!(escape_xml("a & b"), "a & b");
438 }
439
440 #[test]
441 fn test_escape_xml_attr() {
442 assert_eq!(escape_xml_attr("hello"), "hello");
443 assert_eq!(escape_xml_attr("say \"hi\""), "say "hi"");
444 }
445
446 #[test]
447 fn test_diff_identical_trees() {
448 let base = new_base_node(Some(XmlContent::Element(XmlElement::new(
450 "root".to_string(),
451 HashMap::new(),
452 ))));
453 let base_child = new_base_node(Some(XmlContent::Element(XmlElement::new(
454 "child".to_string(),
455 HashMap::new(),
456 ))));
457 NodeInner::add_child_to_ref(&base, base_child);
458
459 let branch = new_branch_node(Some(XmlContent::Element(XmlElement::new(
460 "root".to_string(),
461 HashMap::new(),
462 ))));
463 let branch_child = new_branch_node(Some(XmlContent::Element(XmlElement::new(
464 "child".to_string(),
465 HashMap::new(),
466 ))));
467 NodeInner::add_child_to_ref(&branch, branch_child);
468
469 let mut matching = HeuristicMatching::new();
471 matching.build_matching(&base, &branch);
472
473 if let Some(diff) = Diff::new(&matching) {
475 let mut output = Vec::new();
476 diff.diff(&mut output).unwrap();
477
478 let result = String::from_utf8(output).unwrap();
479 assert!(result.contains("<diff>"));
480 assert!(result.contains("</diff>"));
481 }
482 }
483}