1use regex::Regex;
2
3const HTML_TAGS: [&str; 117] = [
4 "a",
5 "abbr",
6 "address",
7 "area",
8 "article",
9 "aside",
10 "audio",
11 "b",
12 "base",
13 "bdi",
14 "bdo",
15 "blockquote",
16 "body",
17 "br",
18 "button",
19 "canvas",
20 "caption",
21 "cite",
22 "code",
23 "col",
24 "colgroup",
25 "data",
26 "datalist",
27 "dd",
28 "del",
29 "details",
30 "dfn",
31 "dialog",
32 "div",
33 "dl",
34 "dt",
35 "em",
36 "embed",
37 "fieldset",
38 "figcaption",
39 "figure",
40 "footer",
41 "form",
42 "h1",
43 "h2",
44 "h3",
45 "h4",
46 "h5",
47 "h6",
48 "head",
49 "header",
50 "hgroup",
51 "hr",
52 "html",
53 "i",
54 "iframe",
55 "img",
56 "input",
57 "ins",
58 "kbd",
59 "label",
60 "legend",
61 "li",
62 "link",
63 "main",
64 "map",
65 "mark",
66 "math",
67 "menu",
68 "menuitem",
69 "meta",
70 "meter",
71 "nav",
72 "noscript",
73 "object",
74 "ol",
75 "optgroup",
76 "option",
77 "output",
78 "p",
79 "param",
80 "picture",
81 "pre",
82 "progress",
83 "q",
84 "rb",
85 "rp",
86 "rt",
87 "rtc",
88 "ruby",
89 "s",
90 "samp",
91 "script",
92 "section",
93 "select",
94 "slot",
95 "small",
96 "source",
97 "span",
98 "strong",
99 "style",
100 "sub",
101 "summary",
102 "sup",
103 "svg",
104 "table",
105 "tbody",
106 "td",
107 "template",
108 "textarea",
109 "tfoot",
110 "th",
111 "thead",
112 "time",
113 "title",
114 "tr",
115 "track",
116 "u",
117 "ul",
118 "var",
119 "video",
120 "wbr",
121];
122
123pub fn is_html(str: &str) -> bool {
125 let re = Regex::new(r"\s?<!doctype html>|(<html\b[^>]*>|<body\b[^>]*>|<x-[^>]+>)+").unwrap();
126 let re_full_str = HTML_TAGS.map(|x| format!("<{}\\b[^>]*>", x)).join("|");
127 let re_full = Regex::new(re_full_str.as_str()).unwrap();
128 re.is_match(str) || re_full.is_match(str)
129}
130
131#[cfg(test)]
132mod tests {
133 use crate::is_html;
134
135 #[test]
136 fn it_works() {
137 assert!(is_html("<!doctype html>"));
138 assert!(is_html("\n\n<!doctype html><html>"));
139 assert!(is_html("<html>"));
140 assert!(is_html("<html></html>"));
141 assert!(is_html("<html lang='en'></html>"));
142 assert!(is_html("<html><body></html>"));
143 assert!(is_html("<html><body class='no-js'></html>"));
144
145 assert!(is_html("<p>foo</p>"));
146 assert!(is_html("<a href='#'>foo</a>"));
147 }
148
149 #[test]
150 fn it_not_works() {
151 assert!(!is_html("<cake>foo</cake>"));
152 assert!(!is_html("<any>rocks</any>"));
153 assert!(!is_html("<cake>foo</cake>"));
154 assert!(!is_html("<bodyx>not</bodyx>"));
155 }
156}