html_compare/
lib.rs

1#![allow(clippy::result_large_err)]
2
3mod error;
4mod helper;
5mod stack;
6mod token;
7
8use std::collections::{BTreeMap, BTreeSet};
9
10use htmlparser::{ElementEnd as HtmlElementEnd, StrSpan, Token};
11
12use crate::error::*;
13use crate::helper::cleanup_text;
14use crate::token::*;
15
16struct Cursor<'a> {
17    expected: crate::stack::TokenStack<'a>,
18    generated: crate::stack::TokenStack<'a>,
19}
20
21impl<'a> Cursor<'a> {
22    fn new(expected_str: &'a str, generated_str: &'a str) -> Self {
23        Self {
24            expected: crate::stack::TokenStack::parse(expected_str).sanitize(),
25            generated: crate::stack::TokenStack::parse(generated_str).sanitize(),
26        }
27    }
28
29    fn next(&mut self) -> (Option<Token<'a>>, Option<Token<'a>>) {
30        (self.expected.next(), self.generated.next())
31    }
32
33    fn next_attributes(
34        &mut self,
35    ) -> (
36        (Vec<Attribute<'a>>, ElementEnd<'a>),
37        (Vec<Attribute<'a>>, ElementEnd<'a>),
38    ) {
39        (
40            Attribute::parse_all(&mut self.expected),
41            Attribute::parse_all(&mut self.generated),
42        )
43    }
44}
45
46fn compare_attr_classes<'a>(
47    expected: StrSpan<'a>,
48    generated: StrSpan<'a>,
49) -> Result<(), ErrorKind<'a>> {
50    let exp_values = expected
51        .as_str()
52        .split(' ')
53        .filter(|item| !item.is_empty())
54        .collect::<BTreeSet<_>>();
55    let res_values = generated
56        .as_str()
57        .split(' ')
58        .filter(|item| !item.is_empty())
59        .collect::<BTreeSet<_>>();
60
61    let diff = exp_values
62        .difference(&res_values)
63        .copied()
64        .collect::<BTreeSet<_>>();
65    if !diff.is_empty() {
66        return Err(ErrorKind::ExpectedClassesNotFound {
67            expected,
68            generated,
69            difference: diff,
70        });
71    }
72
73    let diff = res_values
74        .difference(&exp_values)
75        .copied()
76        .collect::<BTreeSet<_>>();
77    if !diff.is_empty() {
78        return Err(ErrorKind::UnexpectedClassesFound {
79            expected,
80            generated,
81            difference: diff,
82        });
83    }
84
85    Ok(())
86}
87
88fn compare_attr_styles<'a>(
89    expected: StrSpan<'a>,
90    generated: StrSpan<'a>,
91) -> Result<(), ErrorKind<'a>> {
92    let exp_values = expected
93        .as_str()
94        .split(';')
95        .filter(|item| !item.is_empty())
96        .filter_map(|item| item.split_once(':'))
97        .map(|(k, v)| (k.trim(), v.trim()))
98        .collect::<BTreeMap<_, _>>();
99    let gen_values = generated
100        .as_str()
101        .split(';')
102        .filter(|item| !item.is_empty())
103        .filter_map(|item| item.split_once(':'))
104        .map(|(k, v)| (k.trim(), v.trim()))
105        .collect::<BTreeMap<_, _>>();
106
107    let exp_keys = exp_values.keys().cloned().collect::<BTreeSet<_>>();
108    let res_keys = gen_values.keys().cloned().collect::<BTreeSet<_>>();
109
110    let diff = exp_keys
111        .difference(&res_keys)
112        .copied()
113        .collect::<BTreeSet<_>>();
114    if !diff.is_empty() {
115        return Err(ErrorKind::ExpectedStylesNotFound {
116            expected,
117            generated,
118            difference: diff,
119        });
120    }
121
122    let diff = res_keys
123        .difference(&exp_keys)
124        .copied()
125        .collect::<BTreeSet<_>>();
126    if !diff.is_empty() {
127        return Err(ErrorKind::UnexpectedStylesFound {
128            expected,
129            generated,
130            difference: diff,
131        });
132    }
133
134    for (key, exp_value) in exp_values.iter() {
135        if let Some(res_value) = gen_values.get(key) {
136            if exp_value != res_value {
137                return Err(ErrorKind::InvalidStyleValue {
138                    expected,
139                    generated,
140                    key,
141                    expected_value: exp_value,
142                    generated_value: res_value,
143                });
144            }
145        } else {
146            return Err(ErrorKind::ExpectedStyleNotFound {
147                expected,
148                generated,
149                missing: key,
150            });
151        }
152    }
153
154    Ok(())
155}
156
157fn compare_attributes<'a>(
158    cursor: &mut Cursor<'a>,
159    expected: ElementStart<'a>,
160    generated: ElementStart<'a>,
161) -> Result<ElementEnd<'a>, ErrorKind<'a>> {
162    let ((exp_attrs, exp_end), (gen_attrs, gen_end)) = cursor.next_attributes();
163
164    if !exp_end.end.eq(&gen_end.end) {
165        return Err(ErrorKind::EndOfElementMismatch {
166            expected: exp_end,
167            generated: gen_end,
168        });
169    }
170
171    let exp_keys = exp_attrs
172        .iter()
173        .filter(|attr| {
174            // if attribute is `class` or `style`, and the value is empty, we can ignore it
175            if ["class", "style"].contains(&attr.local.as_str()) {
176                attr.value.is_some_and(|v| !v.is_empty())
177            } else {
178                true
179            }
180        })
181        .map(|attr| (attr.local.as_str(), attr.local))
182        .collect::<BTreeMap<_, _>>();
183    let gen_keys = gen_attrs
184        .iter()
185        .map(|attr| (attr.local.as_str(), attr.local))
186        .collect::<BTreeMap<_, _>>();
187
188    let exp_str_keys = exp_keys.keys().collect::<BTreeSet<_>>();
189    let gen_str_keys = gen_keys.keys().collect::<BTreeSet<_>>();
190
191    let diff = exp_str_keys
192        .difference(&gen_str_keys)
193        .filter_map(|key| exp_keys.get(*key).copied())
194        .collect::<Vec<_>>();
195    if !diff.is_empty() {
196        return Err(ErrorKind::ExpectedAttributesNotFound {
197            expected,
198            generated,
199            expected_attributes: exp_attrs,
200            generated_attributes: gen_attrs,
201            difference: diff,
202        });
203    }
204    let diff = gen_str_keys
205        .difference(&exp_str_keys)
206        .filter_map(|key| exp_keys.get(*key).copied())
207        .collect::<Vec<_>>();
208    if !diff.is_empty() {
209        return Err(ErrorKind::UnexpectedAttributesFound(diff));
210    }
211
212    let exp_attrs_map = exp_attrs
213        .iter()
214        .map(|attr| (attr.local.as_str(), attr))
215        .collect::<BTreeMap<_, _>>();
216    let gen_attrs_map = gen_attrs
217        .iter()
218        .map(|attr| (attr.local.as_str(), attr))
219        .collect::<BTreeMap<_, _>>();
220
221    for (exp_attr, gen_attr) in exp_attrs_map.iter().filter_map(|(key, exp_value)| {
222        gen_attrs_map
223            .get(key)
224            .map(|gen_value| (*exp_value, *gen_value))
225    }) {
226        match (exp_attr.value, gen_attr.value) {
227            (Some(exp_value), Some(gen_value)) => {
228                if exp_attr.local == "style" {
229                    compare_attr_styles(exp_value, gen_value)?
230                } else if exp_attr.local == "class" {
231                    compare_attr_classes(exp_value, gen_value)?
232                } else if exp_value.as_str() != gen_value.as_str() {
233                    return Err(ErrorKind::InvalidAttributeValue {
234                        expected: exp_attr.clone(),
235                        generated: exp_attr.clone(),
236                    });
237                }
238            }
239            (None, Some(inner)) | (Some(inner), None) if !inner.as_str().is_empty() => {
240                return Err(ErrorKind::InvalidAttributeValue {
241                    expected: exp_attr.clone(),
242                    generated: exp_attr.clone(),
243                });
244            }
245            _ => {}
246        }
247    }
248
249    Ok(exp_end)
250}
251
252fn compare_elements<'a>(
253    cursor: &mut Cursor<'a>,
254    expected: ElementStart<'a>,
255    generated: ElementStart<'a>,
256) -> Result<(), ErrorKind<'a>> {
257    if !expected.local.as_str().eq(generated.local.as_str()) {
258        return Err(ErrorKind::InvalidElementTag {
259            expected,
260            generated,
261        });
262    }
263
264    let ending = compare_attributes(cursor, expected.clone(), generated.clone())?;
265
266    if matches!(ending.end, HtmlElementEnd::Open)
267        && !matches!(expected.local.as_str(), "br" | "meta")
268    {
269        compare_all(cursor, &expected.local, expected.span, generated.span)?;
270    }
271
272    Ok(())
273}
274
275fn compare_text<'a>(expected: StrSpan<'a>, generated: StrSpan<'a>) -> Result<(), ErrorKind<'a>> {
276    if cleanup_text(&expected) != cleanup_text(&generated) {
277        Err(ErrorKind::TextMismatch {
278            expected,
279            generated,
280        })
281    } else {
282        Ok(())
283    }
284}
285
286fn compare_comment<'a>(expected: StrSpan<'a>, result: StrSpan<'a>) -> Result<(), ErrorKind<'a>> {
287    compare_text(expected, result)
288}
289
290fn compare_tokens<'a>(
291    cursor: &mut Cursor<'a>,
292    parent: &str,
293    expected: Token<'a>,
294    generated: Token<'a>,
295) -> Result<(), ErrorKind<'a>> {
296    match (expected, generated) {
297        (Token::Comment { text: exp_text, .. }, Token::Comment { text: res_text, .. }) => {
298            compare_comment(exp_text, res_text)?;
299        }
300        (Token::Text { text: exp_text }, Token::Text { text: res_text }) => {
301            if parent == "style" {
302                css_compare::compare(exp_text.as_str(), res_text.as_str()).map_err(|error| {
303                    ErrorKind::CssMismatch {
304                        expected: exp_text,
305                        generated: res_text,
306                        error,
307                    }
308                })?;
309            } else {
310                compare_text(exp_text, res_text)?;
311            }
312        }
313        (
314            Token::ElementStart {
315                span: exp_span,
316                prefix: exp_prefix,
317                local: exp_local,
318            },
319            Token::ElementStart {
320                span: gen_span,
321                prefix: gen_prefix,
322                local: gen_local,
323            },
324        ) => {
325            compare_elements(
326                cursor,
327                ElementStart {
328                    span: exp_span,
329                    prefix: exp_prefix,
330                    local: exp_local,
331                },
332                ElementStart {
333                    span: gen_span,
334                    prefix: gen_prefix,
335                    local: gen_local,
336                },
337            )?;
338        }
339        (Token::ElementEnd { .. }, Token::ElementEnd { .. }) => {
340            // END OF ELEMENT
341            return Ok(());
342        }
343        (Token::ConditionalCommentStart { .. }, Token::ConditionalCommentStart { .. }) => {
344            if expected.span().as_str() != generated.span().as_str() {
345                return Err(ErrorKind::ElementMismatch {
346                    expected,
347                    generated,
348                });
349            }
350        }
351        (Token::ConditionalCommentEnd { .. }, Token::ConditionalCommentEnd { .. }) => {
352            if expected.span().as_str() != generated.span().as_str() {
353                return Err(ErrorKind::ElementMismatch {
354                    expected,
355                    generated,
356                });
357            }
358        }
359        (Token::EmptyDtd { name: exp_name, .. }, Token::EmptyDtd { name: gen_name, .. }) => {
360            if exp_name.as_str() != gen_name.as_str() {
361                return Err(ErrorKind::ElementMismatch {
362                    expected,
363                    generated,
364                });
365            }
366        }
367        (exp, gen) => {
368            return Err(ErrorKind::ElementMismatch {
369                expected: exp,
370                generated: gen,
371            });
372        }
373    }
374    Ok(())
375}
376
377fn compare_next<'a>(
378    cursor: &mut Cursor<'a>,
379    parent: &str,
380    expected_parent: StrSpan<'a>,
381    generated_parent: StrSpan<'a>,
382) -> Result<bool, ErrorKind<'a>> {
383    match cursor.next() {
384        (Some(expected), Some(generated)) => {
385            compare_tokens(cursor, parent, expected, generated)?;
386            Ok(true)
387        }
388        (None, None) => {
389            // nothing to do
390            Ok(false)
391        }
392        (Some(expected_element), None) => Err(ErrorKind::ExpectedElementNotFound {
393            expected_parent,
394            generated_parent,
395            expected_element,
396        }),
397        (None, Some(generated)) => Err(ErrorKind::UnexpectedElementFound { generated }),
398    }
399}
400
401fn compare_all<'a>(
402    cursor: &mut Cursor<'a>,
403    parent: &str,
404    expected_parent: StrSpan<'a>,
405    generated_parent: StrSpan<'a>,
406) -> Result<(), ErrorKind<'a>> {
407    loop {
408        match compare_next(cursor, parent, expected_parent, generated_parent) {
409            Ok(true) => {}
410            Ok(false) => return Ok(()),
411            Err(kind) => return Err(kind),
412        }
413    }
414}
415
416/// Compare html values without being too extreme
417pub fn compare<'a>(expected: &'a str, generated: &'a str) -> Result<(), Error<'a>> {
418    let mut cursor = Cursor::new(expected, generated);
419    if let Err(kind) = compare_all(
420        &mut cursor,
421        "",
422        StrSpan::from(expected),
423        StrSpan::from(generated),
424    ) {
425        Err(Error::<'a> {
426            expected,
427            generated,
428            kind,
429        })
430    } else {
431        Ok(())
432    }
433}
434
435pub fn assert_similar(expected: &str, generated: &str) {
436    println!("=== expected");
437    println!("{expected}");
438    println!("=== generated");
439    println!("{generated}");
440    if let Err(error) = compare(expected, generated) {
441        panic!("{error}");
442    }
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448
449    #[test]
450    fn should_be_equal() {
451        assert_similar(
452            include_str!("../resources/expected.html"),
453            include_str!("../resources/generated.html"),
454        );
455    }
456
457    #[test]
458    fn simple_same_classes() {
459        compare_attr_classes("foo bar baz".into(), "baz foo bar".into()).unwrap();
460    }
461
462    #[test]
463    fn expected_class_not_found() {
464        let err = compare_attr_classes("foo bar baz".into(), "baz bar".into()).unwrap_err();
465        assert_eq!(err.display(), "ExpectedClassesNotFound { expected: StrSpan(\"foo bar baz\" 0..11), generated: StrSpan(\"baz bar\" 0..7), difference: {\"foo\"} }");
466    }
467
468    #[test]
469    fn simple_same_styles() {
470        compare_attr_styles(
471            "width:100%;height:12px".into(),
472            "width: 100%; height: 12px;".into(),
473        )
474        .unwrap();
475    }
476
477    #[test]
478    fn expected_style_not_found() {
479        let err =
480            compare_attr_styles("width:100%;height:12px".into(), "width:100%".into()).unwrap_err();
481        assert_eq!(err.display(), "ExpectedStylesNotFound { expected: StrSpan(\"width:100%;height:12px\" 0..22), generated: StrSpan(\"width:100%\" 0..10), difference: {\"height\"} }");
482    }
483
484    #[test]
485    fn simple_same_dom() {
486        compare(
487            r#"<!doctype html>
488<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"></html>
489"#,
490            r#"<!doctype html>
491<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"></html>
492"#,
493        ).unwrap();
494        compare(
495            r#"<!doctype html>
496<html xmlns="http://www.w3.org/1999/xhtml"
497    xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office">
498</html>
499"#,
500            r#"<!doctype html>
501<html  xmlns="http://www.w3.org/1999/xhtml" xmlns:o="urn:schemas-microsoft-com:office:office"  xmlns:v="urn:schemas-microsoft-com:vml">
502</html>"#,
503        ).unwrap();
504    }
505
506    #[test]
507    fn with_conditional_dom() {
508        compare(
509            r#"<!doctype html>
510<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office">
511    <head>
512        <!--[if !mso]><!-->
513        <meta http-equiv="X-UA-Compatible" content="IE=edge">
514        <!--<![endif]-->
515    </head>
516</html>
517"#,
518            r#"<!doctype html>
519<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office">
520    <head>
521        <!--[if !mso]><!-->
522        <meta content="IE=edge" http-equiv="X-UA-Compatible">
523        <!--<![endif]-->
524    </head>
525</html>
526"#,
527        ).unwrap();
528    }
529
530    #[test]
531    fn with_head_style() {
532        let _ = compare(r#"<!doctype html>
533<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office">
534    <head>
535        <title></title>
536        <!--[if !mso]><!-->
537        <meta http-equiv="X-UA-Compatible" content="IE=edge">
538        <!--<![endif]-->
539        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
540        <meta name="viewport" content="width=device-width, initial-scale=1">
541        <style type="text/css">
542            #outlook a {
543                padding: 0;
544            }
545
546            body {
547                margin: 0;
548                padding: 0;
549                -webkit-text-size-adjust: 100%;
550                -ms-text-size-adjust: 100%;
551            }
552        </style>
553    </head>
554</html>"#, r#"<!doctype html>
555<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office">
556    <head>
557        <title></title>
558        <!--[if !mso]><!-->
559        <meta http-equiv="X-UA-Compatible" content="IE=edge">
560        <!--<![endif]-->
561        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
562        <meta name="viewport" content="width=device-width, initial-scale=1">
563        <style type="text/css">
564            body {
565                margin: 0;
566                padding: 0;
567                -webkit-text-size-adjust: 100%;
568                -ms-text-size-adjust: 100%;
569            }
570        </style>
571    </head>
572</html>"#).unwrap_err();
573    }
574
575    #[test]
576    fn with_empty_head_style() {
577        compare(
578            r#"<!doctype html>
579<html>
580    <head>
581        <title></title>
582        <style type="text/css">
583        </style>
584    </head>
585</html>"#,
586            r#"<!doctype html>
587<html>
588    <head>
589        <title></title>
590    </head>
591</html>"#,
592        )
593        .expect("should be equal");
594    }
595}