html_compare_rs/
lib.rs

1//! A utility for comparing HTML output with configurable comparison options.
2//!
3//! This crate provides tools for comparing HTML strings while ignoring differences
4//! that don't affect the rendered output, such as whitespace, attribute order,
5//! and other configurable aspects.
6//!
7//! # Example
8//! ```ignore
9//! use html_compare::{HtmlComparer, HtmlCompareOptions};
10//!
11//! let html1 = "<div><p>Hello</p></div>";
12//! let html2 = "<div>\n  <p>Hello</p>\n</div>";
13//!
14//! let comparer = HtmlComparer::new();
15//! assert!(comparer.compare(html1, html2).unwrap());
16//! ```
17//!
18//! For testing, you can use the provided assertion macros:
19//! ```ignore
20//! # use html_compare::assert_html_eq;
21//! # use html_compare::HtmlCompareOptions;
22//! assert_html_eq!(
23//!     "<div><p>Hello</p></div>",
24//!     "<div>\n  <p>Hello</p>\n</div>"
25//! );
26//! ```
27
28/// Asserts that two HTML strings are equivalent according to the given comparison options.
29///
30/// # Examples
31/// ```ignore
32/// use html_compare::assert_html_eq;
33///
34/// assert_html_eq!(
35///     "<div><p>Hello</p></div>",
36///     "<div>\n  <p>Hello</p>\n</div>"
37/// );
38///
39/// // With custom options
40/// use html_compare::HtmlCompareOptions;
41/// assert_html_eq!(
42///     "<div><p>First</p><p>Second</p></div>",
43///     "<div><p>Second</p><p>First</p></div>",
44///     HtmlCompareOptions {
45///         ignore_sibling_order: true,
46///         ..Default::default()
47///     }
48/// );
49/// ```
50#[macro_export]
51macro_rules! assert_html_eq {
52    ($left:expr, $right:expr $(,)?) => {
53        $crate::assert_html_eq!($left, $right, $crate::HtmlCompareOptions::default())
54    };
55    ($left:expr, $right:expr, $options:expr $(,)?) => {{
56        match (&$left, &$right, &$options) {
57            (left_val, right_val, options) => {
58                let comparer = $crate::HtmlComparer::with_options(options.clone());
59                if let Err(err) = comparer.compare(left_val, right_val) {
60                    panic!(
61                        "\n\
62                        HTML comparison failed:\n\
63                        {}\n\n\
64                        left HTML:\n\
65                        {}\n\n\
66                        right HTML:\n\
67                        {}\n\n\
68                        options: {:#?}\
69                    ",
70                        err, left_val, right_val, options
71                    );
72                }
73            }
74        }
75    }};
76}
77
78/// Asserts that two HTML strings are not equivalent according to the given comparison options.
79///
80/// # Examples
81/// ```ignore
82/// use html_compare::assert_html_ne;
83///
84/// assert_html_ne!(
85///     "<div><p>Hello</p></div>",
86///     "<div><p>Different</p></div>"
87/// );
88/// ```
89#[macro_export]
90macro_rules! assert_html_ne {
91    ($left:expr, $right:expr $(,)?) => {
92        $crate::assert_html_ne!($left, $right, $crate::HtmlCompareOptions::default())
93    };
94    ($left:expr, $right:expr, $options:expr $(,)?) => {{
95        match (&$left, &$right, &$options) {
96            (left_val, right_val, options) => {
97                let comparer = $crate::HtmlComparer::with_options(options.clone());
98                if let Ok(_) = comparer.compare(left_val, right_val) {
99                    panic!(
100                        "\n\
101                        HTML strings were equal but expected to be different:\n\n\
102                        HTML:\n\
103                        {}\n\n\
104                        options: {:#?}\
105                    ",
106                        left_val, options
107                    );
108                }
109            }
110        }
111    }};
112}
113
114use ego_tree::NodeRef;
115use scraper::{ElementRef, Html, Node};
116use std::collections::HashSet;
117use thiserror::Error;
118
119#[derive(Debug, Error)]
120pub enum HtmlCompareError {
121    #[error("Node mismatch: {0}")]
122    NodeMismatch(String),
123    #[error("Missing expected node: {expected} at position {position}")]
124    MissingNode { expected: String, position: usize },
125    #[error("Extra node found: {found} at position {position}")]
126    ExtraNode { found: String, position: usize },
127}
128
129/// Configuration for HTML comparison
130#[derive(Debug, Clone)]
131pub struct HtmlCompareOptions {
132    /// Ignore whitespace differences between elements
133    pub ignore_whitespace: bool,
134    /// Ignore all HTML attributes
135    pub ignore_attributes: bool,
136    /// Specific attributes to ignore (if ignore_attributes is false)
137    pub ignored_attributes: HashSet<String>,
138    /// Ignore text node differences
139    pub ignore_text: bool,
140    /// Ignore comment nodes
141    pub ignore_comments: bool,
142    /// Ignore order of sibling elements
143    pub ignore_sibling_order: bool,
144    /// Ignore contents of <style> blocks
145    pub ignore_style_contents: bool,
146}
147
148impl Default for HtmlCompareOptions {
149    fn default() -> Self {
150        Self {
151            ignore_whitespace: true,
152            ignore_attributes: false,
153            ignored_attributes: HashSet::new(),
154            ignore_text: false,
155            ignore_comments: true,
156            ignore_sibling_order: false,
157            ignore_style_contents: false,
158        }
159    }
160}
161
162fn node_type_name(node: &Node) -> &'static str {
163    match node {
164        Node::Text(_) => "Text",
165        Node::Element(_) => "Element",
166        Node::Comment(_) => "Comment",
167        Node::ProcessingInstruction(_) => "ProcessingInstruction",
168        Node::Doctype(_) => "Doctype",
169        Node::Document => "Document",
170        Node::Fragment => "Fragment",
171    }
172}
173/// Main struct for comparing HTML
174#[derive(Debug)]
175pub struct HtmlComparer {
176    options: HtmlCompareOptions,
177}
178
179impl Default for HtmlComparer {
180    fn default() -> Self {
181        Self::new()
182    }
183}
184
185impl HtmlComparer {
186    /// Create a new HTML comparer with default options.
187    ///
188    /// Note about whitespace handling:
189    /// - Multiple spaces in text content are collapsed into a single space (standard HTML behavior)
190    /// - Whitespace between elements is ignored by default
191    /// - Setting `ignore_whitespace: false` only affects element whitespace, not text content
192    /// - Special elements like <pre> and attributes like xml:space are treated the same as regular elements
193    pub fn new() -> Self {
194        Self {
195            options: HtmlCompareOptions::default(),
196        }
197    }
198
199    /// Create a new HTML comparer with custom options
200    pub fn with_options(options: HtmlCompareOptions) -> Self {
201        Self { options }
202    }
203
204    /// Compare two HTML strings
205    pub fn compare(&self, expected: &str, actual: &str) -> Result<bool, HtmlCompareError> {
206        let expected_doc = Html::parse_document(expected);
207        let actual_doc = Html::parse_document(actual);
208
209        let expected_root = expected_doc.root_element();
210        let actual_root = actual_doc.root_element();
211
212        self.compare_element_refs(expected_root, actual_root)
213            .map(|_| true)
214    }
215
216    /// Compare two ElementRefs
217    fn compare_element_refs(
218        &self,
219        expected: ElementRef,
220        actual: ElementRef,
221    ) -> Result<(), HtmlCompareError> {
222        // Compare tag names
223        if expected.value().name() != actual.value().name() {
224            return Err(HtmlCompareError::NodeMismatch(format!(
225                "Tag name mismatch. Expected: {}, Actual: {}",
226                expected.value().name(),
227                actual.value().name()
228            )));
229        }
230
231        // Compare attributes if not ignored
232        if !self.options.ignore_attributes {
233            self.compare_attributes(expected, actual)?;
234        }
235
236        // Special handling for style tags if ignore_style_contents is true
237        if self.options.ignore_style_contents && expected.value().name() == "style" {
238            // When ignoring style contents, we only compare the tag existence
239            return Ok(());
240        }
241
242        // Get child nodes
243        let expected_children: Vec<_> = expected
244            .children()
245            .filter(|n| self.should_include_node(n))
246            .collect();
247        let actual_children: Vec<_> = actual
248            .children()
249            .filter(|n| self.should_include_node(n))
250            .collect();
251
252        if self.options.ignore_sibling_order {
253            self.compare_unordered_nodes(&expected_children, &actual_children)?;
254        } else {
255            self.compare_ordered_nodes(&expected_children, &actual_children)?;
256        }
257
258        Ok(())
259    }
260
261    /// Compare attributes between two ElementRefs
262    fn compare_attributes(
263        &self,
264        expected: ElementRef,
265        actual: ElementRef,
266    ) -> Result<(), HtmlCompareError> {
267        let expected_attrs: HashSet<_> = expected
268            .value()
269            .attrs()
270            .filter(|(name, _)| !self.options.ignored_attributes.contains(*name))
271            .collect();
272        let actual_attrs: HashSet<_> = actual
273            .value()
274            .attrs()
275            .filter(|(name, _)| !self.options.ignored_attributes.contains(*name))
276            .collect();
277
278        if expected_attrs != actual_attrs {
279            return Err(HtmlCompareError::NodeMismatch(format!(
280                "Attributes mismatch. Expected: {:?}, Actual: {:?}",
281                expected_attrs, actual_attrs
282            )));
283        }
284        Ok(())
285    }
286
287    /// Compare ordered nodes
288    fn compare_ordered_nodes(
289        &self,
290        expected: &[NodeRef<Node>],
291        actual: &[NodeRef<Node>],
292    ) -> Result<(), HtmlCompareError> {
293        if expected.len() != actual.len() {
294            return Err(HtmlCompareError::NodeMismatch(format!(
295                "Child count mismatch. Expected: {}, Actual: {}",
296                expected.len(),
297                actual.len()
298            )));
299        }
300
301        for (i, (expected_child, actual_child)) in expected.iter().zip(actual.iter()).enumerate() {
302            match (expected_child.value(), actual_child.value()) {
303                (Node::Text(expected_text), Node::Text(actual_text)) => {
304                    if !self.options.ignore_text {
305                        let expected_str = if self.options.ignore_whitespace {
306                            expected_text.trim()
307                        } else {
308                            expected_text
309                        };
310                        let actual_str = if self.options.ignore_whitespace {
311                            actual_text.trim()
312                        } else {
313                            actual_text
314                        };
315                        if expected_str != actual_str {
316                            return Err(HtmlCompareError::NodeMismatch(format!(
317                                "Text content mismatch at position {}. Expected: '{}', Actual: '{}'",
318                                i, expected_str, actual_str
319                            )));
320                        }
321                    }
322                }
323                (Node::Comment(_), Node::Comment(_)) => {
324                    // If we're not ignoring comments, we should compare their content
325                    if !self.options.ignore_comments {
326                        let expected_comment = match expected_child.value() {
327                            Node::Comment(c) => c.trim(),
328                            _ => unreachable!(),
329                        };
330                        let actual_comment = match actual_child.value() {
331                            Node::Comment(c) => c.trim(),
332                            _ => unreachable!(),
333                        };
334                        if expected_comment != actual_comment {
335                            return Err(HtmlCompareError::NodeMismatch(format!(
336                                "Comment content mismatch at position {}. Expected: '{}', Actual: '{}'",
337                                i, expected_comment, actual_comment
338                            )));
339                        }
340                    }
341                }
342                (Node::Element(_), Node::Element(_)) => {
343                    if let (Some(expected_el), Some(actual_el)) = (
344                        ElementRef::wrap(*expected_child),
345                        ElementRef::wrap(*actual_child),
346                    ) {
347                        self.compare_element_refs(expected_el, actual_el)?;
348                    }
349                }
350                (expected, actual) => {
351                    return Err(HtmlCompareError::NodeMismatch(format!(
352                        "Node type mismatch at position {}. Expected type: {:?}, Actual type: {:?}",
353                        i,
354                        node_type_name(expected),
355                        node_type_name(actual)
356                    )));
357                }
358            }
359        }
360        Ok(())
361    }
362
363    fn compare_unordered_nodes(
364        &self,
365        expected: &[NodeRef<Node>],
366        actual: &[NodeRef<Node>],
367    ) -> Result<(), HtmlCompareError> {
368        if expected.len() != actual.len() {
369            return Err(HtmlCompareError::NodeMismatch(format!(
370                "Child count mismatch. Expected: {}, Actual: {}",
371                expected.len(),
372                actual.len()
373            )));
374        }
375
376        let mut matched = vec![false; actual.len()];
377
378        for expected_child in expected {
379            let mut found = false;
380            for (i, actual_child) in actual.iter().enumerate() {
381                if !matched[i] {
382                    match (expected_child.value(), actual_child.value()) {
383                        (Node::Text(expected_text), Node::Text(actual_text)) => {
384                            if self.options.ignore_text
385                                || (!self.options.ignore_whitespace && expected_text == actual_text)
386                                || (self.options.ignore_whitespace
387                                    && expected_text.trim() == actual_text.trim())
388                            {
389                                matched[i] = true;
390                                found = true;
391                                break;
392                            }
393                        }
394                        (Node::Element(_), Node::Element(_)) => {
395                            if let (Some(expected_el), Some(actual_el)) = (
396                                ElementRef::wrap(*expected_child),
397                                ElementRef::wrap(*actual_child),
398                            ) {
399                                if self.compare_element_refs(expected_el, actual_el).is_ok() {
400                                    matched[i] = true;
401                                    found = true;
402                                    break;
403                                }
404                            }
405                        }
406                        (Node::Comment(_), Node::Comment(_)) if self.options.ignore_comments => {
407                            matched[i] = true;
408                            found = true;
409                            break;
410                        }
411                        _ => {}
412                    }
413                }
414            }
415            if !found {
416                return Err(HtmlCompareError::NodeMismatch(format!(
417                    "No matching node found for {:?}",
418                    expected_child.value()
419                )));
420            }
421        }
422        Ok(())
423    }
424
425    /// Determine if a node should be included in comparison
426    fn should_include_node(&self, node: &NodeRef<Node>) -> bool {
427        match node.value() {
428            Node::Text(text) => {
429                !self.options.ignore_text
430                    && (!self.options.ignore_whitespace || !text.trim().is_empty())
431            }
432            Node::Comment(_) => !self.options.ignore_comments,
433            _ => true,
434        }
435    }
436}
437
438/// Convenience functions for creating common comparison configurations
439pub mod presets {
440    use super::*;
441
442    /// Create a comparer that ignores all formatting differences
443    pub fn relaxed() -> HtmlCompareOptions {
444        HtmlCompareOptions {
445            ignore_whitespace: true,
446            ignore_attributes: true,
447            ignored_attributes: HashSet::new(),
448            ignore_text: false,
449            ignore_comments: true,
450            ignore_sibling_order: true,
451            ignore_style_contents: true,
452        }
453    }
454
455    /// Create a comparer that is strict about everything except whitespace
456    pub fn strict() -> HtmlCompareOptions {
457        HtmlCompareOptions {
458            ignore_whitespace: true,
459            ignore_attributes: false,
460            ignored_attributes: HashSet::new(),
461            ignore_text: false,
462            ignore_comments: false,
463            ignore_sibling_order: false,
464            ignore_style_contents: false,
465        }
466    }
467
468    /// Create a comparer that is suitable for testing markdown output
469    pub fn markdown() -> HtmlCompareOptions {
470        HtmlCompareOptions {
471            ignore_whitespace: true,
472            ignore_attributes: false,
473            ignored_attributes: {
474                let mut set = HashSet::new();
475                set.insert("id".to_string());
476                set
477            },
478            ignore_text: false,
479            ignore_comments: true,
480            ignore_sibling_order: false,
481            ignore_style_contents: true,
482        }
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489
490    #[test]
491    fn test_basic_comparison() {
492        assert_html_eq!("<div><p>Hello</p></div>", "<div><p>Hello</p></div>");
493    }
494
495    #[test]
496    fn test_empty_elements() {
497        assert_html_eq!("<div></div>", "<div></div>");
498        assert_html_eq!("<div></div>", "<div/>");
499        assert_html_eq!("<br>", "<br/>");
500        assert_html_eq!("<img src='test.jpg'>", "<img src='test.jpg'/>");
501
502        // Empty elements with whitespace
503        assert_html_eq!("<div></div>", "<div>   </div>");
504        assert_html_eq!("<p></p>", "<p>\n</p>");
505    }
506
507    #[test]
508    fn test_whitespace_handling() {
509        // ignore_whitespace only affects whitespace between elements, not within text content
510        assert_html_ne!("<p>Hello   World</p>", "<p>Hello World</p>");
511
512        // Whitespace between elements is ignored by default
513        assert_html_eq!(
514            "<div><p>Hello</p></div>",
515            "<div>\n  <p>\n    Hello\n  </p>\n</div>"
516        );
517
518        // Whitespace at start/end of text content
519        assert_html_eq!(
520            "<p>   Hello   </p>",
521            "<p>Hello</p>",
522            HtmlCompareOptions {
523                ignore_whitespace: true,
524                ..Default::default()
525            }
526        );
527
528        // With whitespace preservation, element whitespace matters
529        let strict_options = HtmlCompareOptions {
530            ignore_whitespace: false,
531            ..Default::default()
532        };
533
534        assert_html_ne!(
535            "<div><p>Hello</p></div>",
536            "<div>\n  <p>\n    Hello\n  </p>\n</div>",
537            strict_options.clone()
538        );
539
540        // Multiple consecutive spaces in text
541        assert_html_ne!("<p>Hello    World</p>", "<p>Hello World</p>");
542    }
543
544    #[test]
545    fn test_text_content_whitespace() {
546        // Text with various whitespace patterns
547        assert_html_ne!("<p>Hello   World</p>", "<p>Hello World</p>");
548
549        assert_html_ne!("<p>Hello \t World</p>", "<p>Hello World</p>");
550
551        assert_html_ne!("<p>Hello\nWorld</p>", "<p>Hello World</p>");
552
553        // Exact whitespace matches
554        assert_html_eq!("<p>Hello   World</p>", "<p>Hello   World</p>");
555
556        // Mixed whitespace and elements
557        assert_html_eq!(
558            "<div>\n    <p>Hello   World</p>\n</div>",
559            "<div><p>Hello   World</p></div>",
560            HtmlCompareOptions {
561                ignore_whitespace: true,
562                ..Default::default()
563            }
564        );
565    }
566
567    #[test]
568    fn test_whitespace_with_multiple_text_nodes() {
569        // Text nodes with elements between
570        assert_html_eq!(
571            "<p>Hello <strong>beautiful</strong> World</p>",
572            "<p>Hello <strong>beautiful</strong> World</p>"
573        );
574
575        // Different whitespace around elements should be ignored
576        assert_html_eq!(
577            "<p>Hello<strong>beautiful</strong>World</p>",
578            "<p>Hello <strong>beautiful</strong> World</p>"
579        );
580    }
581
582    #[test]
583    fn test_attribute_handling() {
584        // Different attribute order
585        assert_html_eq!(
586            "<div class='test' id='1'>Test</div>",
587            "<div id='1' class='test'>Test</div>"
588        );
589
590        // Different attribute values
591        assert_html_ne!(
592            "<div class='test'>Test</div>",
593            "<div class='different'>Test</div>"
594        );
595
596        // Multiple attributes
597        assert_html_eq!(
598            "<div class='a b' id='1' data-test='value'>Content</div>",
599            "<div data-test='value' class='a b' id='1'>Content</div>"
600        );
601
602        // Boolean attributes
603        assert_html_eq!(
604            "<input type='checkbox' checked>",
605            "<input checked type='checkbox'>"
606        );
607
608        // Ignored attributes
609        let mut ignored_attrs = HashSet::new();
610        ignored_attrs.insert("class".to_string());
611        ignored_attrs.insert("id".to_string());
612
613        assert_html_eq!(
614            "<div class='test' id='1'>Test</div>",
615            "<div class='different' id='2'>Test</div>",
616            HtmlCompareOptions {
617                ignored_attributes: ignored_attrs,
618                ..Default::default()
619            }
620        );
621
622        // All attributes ignored
623        assert_html_eq!(
624            "<div class='test' id='1' data-custom='value'>Test</div>",
625            "<div class='different' id='2' data-custom='other'>Test</div>",
626            HtmlCompareOptions {
627                ignore_attributes: true,
628                ..Default::default()
629            }
630        );
631    }
632
633    #[test]
634    fn test_text_handling() {
635        // Basic text comparison
636        assert_html_eq!("<p>Hello World</p>", "<p>Hello World</p>");
637
638        // Different text content
639        assert_html_ne!("<p>Hello World</p>", "<p>Goodbye World</p>");
640
641        // Text with special characters
642        assert_html_eq!("<p>Hello &amp; World</p>", "<p>Hello &amp; World</p>");
643
644        // Mixed text and elements
645        assert_html_eq!(
646            "<div>Hello <strong>World</strong>!</div>",
647            "<div>Hello <strong>World</strong>!</div>"
648        );
649
650        // Text ignored
651        assert_html_eq!(
652            "<p>Hello World</p>",
653            "<p>Goodbye World</p>",
654            HtmlCompareOptions {
655                ignore_text: true,
656                ..Default::default()
657            }
658        );
659    }
660
661    #[test]
662    fn test_nested_structure() {
663        // Basic nesting
664        assert_html_eq!(
665            "<div><section><h1>Title</h1><p>Text</p></section></div>",
666            "<div><section><h1>Title</h1><p>Text</p></section></div>"
667        );
668
669        // Different nesting
670        assert_html_ne!(
671            "<div><section><h1>Title</h1><p>Text</p></section></div>",
672            "<div><h1>Title</h1><section><p>Text</p></section></div>"
673        );
674
675        // Deep nesting
676        assert_html_eq!(
677            "<div><article><section><header><h1>Title</h1></header><p>Text</p></section></article></div>",
678            "<div><article><section><header><h1>Title</h1></header><p>Text</p></section></article></div>"
679        );
680
681        // Multiple nested elements
682        assert_html_eq!(
683            "<div><section><h1>Title</h1><p>Text</p></section><section><h2>Another</h2><p>More</p></section></div>",
684            "<div><section><h1>Title</h1><p>Text</p></section><section><h2>Another</h2><p>More</p></section></div>"
685        );
686    }
687
688    #[test]
689    fn test_comment_handling() {
690        // Comments ignored by default
691        assert_html_eq!(
692            "<div><!-- Comment --><p>Test</p></div>",
693            "<div><p>Test</p></div>"
694        );
695
696        assert_html_eq!(
697            "<div><!-- Multiple --><!-- Comments --><p>Test</p></div>",
698            "<div><p>Test</p></div>"
699        );
700
701        // Comments preserved
702        let preserve_comments = HtmlCompareOptions {
703            ignore_comments: false,
704            ..Default::default()
705        };
706
707        // Same comments
708        assert_html_eq!(
709            "<div><!-- Comment --><p>Test</p></div>",
710            "<div><!-- Comment --><p>Test</p></div>",
711            preserve_comments.clone()
712        );
713
714        // Different comments
715        assert_html_ne!(
716            "<div><!-- Comment 1 --><p>Test</p></div>",
717            "<div><!-- Comment 2 --><p>Test</p></div>",
718            preserve_comments.clone()
719        );
720
721        // Missing comment
722        assert_html_ne!(
723            "<div><!-- Comment --><p>Test</p></div>",
724            "<div><p>Test</p></div>",
725            preserve_comments
726        );
727    }
728
729    #[test]
730    fn test_sibling_order() {
731        // Order matters by default
732        assert_html_ne!(
733            "<div><p>First</p><p>Second</p></div>",
734            "<div><p>Second</p><p>First</p></div>"
735        );
736
737        // Order ignored
738        let ignore_order = HtmlCompareOptions {
739            ignore_sibling_order: true,
740            ..Default::default()
741        };
742
743        // Simple sibling swap
744        assert_html_eq!(
745            "<div><p>First</p><p>Second</p></div>",
746            "<div><p>Second</p><p>First</p></div>",
747            ignore_order.clone()
748        );
749
750        // Multiple siblings
751        assert_html_eq!(
752            "<div><p>1</p><p>2</p><p>3</p></div>",
753            "<div><p>3</p><p>1</p><p>2</p></div>",
754            ignore_order.clone()
755        );
756
757        // Nested siblings
758        assert_html_eq!(
759            "<div><section><p>A</p><p>B</p></section><section><p>C</p><p>D</p></section></div>",
760            "<div><section><p>B</p><p>A</p></section><section><p>D</p><p>C</p></section></div>",
761            ignore_order
762        );
763    }
764
765    #[test]
766    fn test_special_characters() {
767        // HTML entities
768        assert_html_eq!(
769            "<p>&lt;div&gt; &amp; &quot;quotes&quot;</p>",
770            "<p>&lt;div&gt; &amp; &quot;quotes&quot;</p>"
771        );
772
773        // Unicode characters
774        assert_html_eq!("<p>Hello δΈ–η•Œ 🌍</p>", "<p>Hello δΈ–η•Œ 🌍</p>");
775
776        // Mixed entities and Unicode
777        assert_html_eq!(
778            "<p>&copy; 2024 β€’ Hello δΈ–η•Œ</p>",
779            "<p>&copy; 2024 β€’ Hello δΈ–η•Œ</p>"
780        );
781
782        // Different entities representing same character
783        assert_html_eq!("<p>&quot;quoted&quot;</p>", "<p>&#34;quoted&#34;</p>");
784    }
785
786    #[test]
787    fn test_error_messages() {
788        // Test tag mismatch error
789        let result = HtmlComparer::new().compare("<div>Test</div>", "<span>Test</span>");
790        assert!(result.is_err());
791        assert_eq!(
792            result.unwrap_err().to_string(),
793            "Node mismatch: Tag name mismatch. Expected: div, Actual: span"
794        );
795
796        // Test attribute mismatch error
797        let result = HtmlComparer::new().compare(
798            "<div class='test'>Content</div>",
799            "<div class='different'>Content</div>",
800        );
801        assert!(result.is_err());
802        assert_eq!(
803            result.unwrap_err().to_string(),
804            "Node mismatch: Attributes mismatch. Expected: {(\"class\", \"test\")}, Actual: {(\"class\", \"different\")}"
805        );
806
807        // Test content mismatch error
808        let result = HtmlComparer::new().compare("<div>Hello</div>", "<div>World</div>");
809        assert!(result.is_err());
810        assert_eq!(
811            result.unwrap_err().to_string(),
812            "Node mismatch: Text content mismatch at position 0. Expected: 'Hello', Actual: 'World'"
813        );
814
815        // Test structure mismatch error
816        let result = HtmlComparer::new().compare("<div><p>Text</p></div>", "<div>Text</div>");
817        assert!(result.is_err());
818        assert_eq!(
819            result.unwrap_err().to_string(),
820            r#"Node mismatch: Node type mismatch at position 0. Expected type: "Element", Actual type: "Text""#
821        );
822    }
823
824    #[test]
825    fn test_preset_configurations() {
826        // Test relaxed preset
827        let _relaxed = HtmlComparer::with_options(presets::relaxed());
828        assert_html_eq!(
829            "<div class='a'><p>First</p><p>Second</p></div>",
830            "<div class='b'><p>Second</p><p>First</p></div>",
831            presets::relaxed()
832        );
833
834        // Test strict preset
835        assert_html_eq!(
836            "<div class='test'><!--comment--><p>Content</p></div>",
837            "<div class='test'><!--comment--><p>Content</p></div>",
838            presets::strict()
839        );
840
841        assert_html_ne!(
842            "<div class='test'>Content</div>",
843            "<div class='different'>Content</div>",
844            presets::strict()
845        );
846
847        // Test markdown preset
848        assert_html_eq!(
849            "<h1 id='heading-1'>Title</h1><p>Content</p>",
850            "<h1 id='different-id'>Title</h1><p>Content</p>",
851            presets::markdown()
852        );
853    }
854    #[test]
855    fn test_mixed_scenarios() {
856        // Combine multiple options
857        let custom_options = HtmlCompareOptions {
858            ignore_whitespace: true,
859            ignore_comments: true,
860            ignore_sibling_order: true,
861            ignored_attributes: {
862                let mut set = HashSet::new();
863                set.insert("class".to_string());
864                set
865            },
866            ..Default::default()
867        };
868
869        assert_html_eq!(
870            "<div class='a'><!-- comment -->\n  <p>First</p>\n  <p>Second</p>\n</div>",
871            "<div class='b'><p>Second</p><p>First</p></div>",
872            custom_options
873        );
874
875        // Mix text and structural comparison
876        let mixed_content = HtmlCompareOptions {
877            ignore_whitespace: true,
878            ignore_sibling_order: true,
879            ..Default::default()
880        };
881
882        assert_html_eq!(
883            "<div>\n  <p>Text</p>\n  <ul><li>A</li><li>B</li></ul>\n</div>",
884            "<div><ul><li>B</li><li>A</li></ul><p>Text</p></div>",
885            mixed_content
886        );
887    }
888
889    #[test]
890    fn test_edge_cases() {
891        // Empty HTML
892        assert_html_eq!("", "");
893
894        // Just whitespace
895        assert_html_eq!("   ", "");
896        assert_html_eq!("\n\t  \n", "");
897
898        // Single text node
899        assert_html_eq!("Hello", "Hello");
900
901        // Deeply nested single element
902        assert_html_eq!(
903            "<div><div><div><div><div>Text</div></div></div></div></div>",
904            "<div><div><div><div><div>Text</div></div></div></div></div>"
905        );
906
907        // Many siblings
908        let mut many_siblings1 = String::with_capacity(1000);
909        let mut many_siblings2 = String::with_capacity(1000);
910        for i in 0..100 {
911            many_siblings1.push_str("<p>");
912            many_siblings1.push_str(&i.to_string());
913            many_siblings1.push_str("</p>");
914
915            many_siblings2.push_str("<p>");
916            many_siblings2.push_str(&i.to_string());
917            many_siblings2.push_str("</p>");
918        }
919        assert_html_eq!(
920            &format!("<div>{}</div>", many_siblings1),
921            &format!("<div>{}</div>", many_siblings2)
922        );
923
924        // HTML with all sorts of content
925        assert_html_eq!(
926            r#"<div class="wrapper" id="main">
927                <!-- Header section -->
928                <header class="header">
929                    <h1>Title &amp; Subtitle</h1>
930                </header>
931                <main>
932                    <p>Hello δΈ–η•Œ!</p>
933                    <ul>
934                        <li>Item 1</li>
935                        <li>Item 2</li>
936                    </ul>
937                    <img src="test.jpg" alt="Test Image"/>
938                </main>
939                <!-- Footer section -->
940                <footer>
941                    <p>&copy; 2024</p>
942                </footer>
943            </div>"#,
944            r#"<div class="wrapper" id="main"><header class="header"><h1>Title &amp; Subtitle</h1></header><main><p>Hello δΈ–η•Œ!</p><ul><li>Item 1</li><li>Item 2</li></ul><img src="test.jpg" alt="Test Image"/></main><footer><p>&copy; 2024</p></footer></div>"#
945        );
946    }
947
948    #[test]
949    fn test_malformed_html() {
950        // Unclosed tags (should be handled by HTML parser)
951        assert_html_eq!("<p>Text", "<p>Text</p>");
952
953        // Extra closing tags - parser treats them as additional elements
954        assert_html_ne!("<p>Text</p></p>", "<p>Text</p>");
955
956        // Test the specific error we get with extra closing tags
957        let result = HtmlComparer::new().compare("<p>Text</p></p>", "<p>Text</p>");
958        assert!(result.is_err());
959        assert_eq!(
960            result.unwrap_err().to_string(),
961            "Node mismatch: Child count mismatch. Expected: 2, Actual: 1"
962        );
963
964        // Mismatched tags are typically corrected by the parser
965        // Let's verify the actual behavior
966        let result = HtmlComparer::new().compare(
967            "<p><strong>Text</p></strong>",
968            "<p><strong>Text</strong></p>",
969        );
970        if let Err(e) = result {
971            println!("Actual parser behavior for mismatched tags: {}", e);
972        }
973    }
974    #[test]
975    fn test_style_block_handling() {
976        // Test that style contents are compared by default
977        assert_html_ne!(
978            "<style>body { color: red; }</style>",
979            "<style>body { color: blue; }</style>"
980        );
981
982        // Test ignoring style contents
983        let ignore_style = HtmlCompareOptions {
984            ignore_style_contents: true,
985            ..Default::default()
986        };
987
988        assert_html_eq!(
989            "<style>body { color: red; }</style>",
990            "<style>body { color: blue; }</style>",
991            ignore_style.clone()
992        );
993
994        assert_html_eq!(
995            "<style>\n  body { color: red; }\n  .class { font-size: 12px; }\n</style>",
996            "<style>body{background:white}</style>",
997            ignore_style.clone()
998        );
999
1000        // Test multiple style blocks
1001        assert_html_eq!(
1002            "<div><style>body{color:red}</style><p>Text</p><style>.class{margin:0}</style></div>",
1003            "<div><style>body{color:blue}</style><p>Text</p><style>.other{padding:10px}</style></div>",
1004            ignore_style.clone()
1005        );
1006
1007        // Test empty style blocks
1008        assert_html_eq!(
1009            "<style></style>",
1010            "<style>body { color: blue; }</style>",
1011            ignore_style.clone()
1012        );
1013
1014        // Test that other aspects still work with style ignoring
1015        assert_html_eq!(
1016            "<div class='test'><style>body{color:red}</style><p>Text</p></div>",
1017            "<div class='test'><style>body{color:blue}</style><p>Text</p></div>",
1018            ignore_style
1019        );
1020    }
1021
1022    #[test]
1023    fn test_style_block_with_attributes() {
1024        let ignore_style = HtmlCompareOptions {
1025            ignore_style_contents: true,
1026            ..Default::default()
1027        };
1028
1029        // Test that attributes are still compared when ignore_style_contents is true
1030        assert_html_ne!(
1031            "<style type='text/css'>body{color:red}</style>",
1032            "<style>body{color:blue}</style>",
1033            ignore_style.clone()
1034        );
1035
1036        // Test with both ignore_attributes and ignore_style_contents
1037        let ignore_both = HtmlCompareOptions {
1038            ignore_style_contents: true,
1039            ignore_attributes: true,
1040            ..Default::default()
1041        };
1042
1043        assert_html_eq!(
1044            "<style type='text/css'>body{color:red}</style>",
1045            "<style>body{color:blue}</style>",
1046            ignore_both
1047        );
1048    }
1049}