1#[derive(Debug, Default)]
13pub struct HeadParser {
14 buf: String,
15 head_parts: Vec<String>,
17 body_start: Option<String>,
19 is_html: Option<bool>,
21}
22
23impl HeadParser {
24 pub fn new() -> Self {
25 Self::default()
26 }
27
28 pub fn feed(&mut self, data: &str) -> bool {
31 if let Some(body) = self.body_start.as_mut() {
32 body.push_str(data);
34 return true;
35 }
36 self.buf.push_str(data);
37 self.parse();
38 self.body_start.is_some()
39 }
40
41 pub fn complete(&self) -> bool {
43 self.body_start.is_some()
44 }
45
46 pub fn is_html(&self) -> bool {
48 self.is_html.unwrap_or(false)
49 }
50
51 pub fn head(&self) -> String {
54 self.head_parts.join("\n")
55 }
56
57 pub fn take_body(&mut self) -> String {
60 self.body_start.take().unwrap_or_default()
61 }
62
63 fn parse(&mut self) {
66 loop {
67 if self.is_html.is_none() {
69 let trimmed = self.buf.trim_start();
70 if trimmed.is_empty() {
71 return; }
73 self.is_html = Some(trimmed.starts_with('<'));
74 }
75
76 if !self.is_html.unwrap_or(false) {
77 let body = std::mem::take(&mut self.buf);
79 self.body_start = Some(body);
80 return;
81 }
82
83 let trimmed_start = self.buf.len() - self.buf.trim_start().len();
85 if trimmed_start > 0 {
86 self.buf = self.buf[trimmed_start..].to_string();
87 }
88
89 if self.buf.is_empty() {
90 return;
91 }
92
93 if let Some(consumed) = self.try_consume_head_tag() {
95 if !consumed.trim().is_empty() {
96 self.head_parts.push(consumed);
97 }
98 continue;
99 }
100
101 let body = std::mem::take(&mut self.buf);
103 self.body_start = Some(body);
104 return;
105 }
106 }
107
108 fn try_consume_head_tag(&mut self) -> Option<String> {
111 let buf = &self.buf;
112
113 if buf.to_ascii_uppercase().starts_with("<!DOCTYPE") {
115 if let Some(end) = buf.find('>') {
116 self.buf = self.buf[end + 1..].to_string();
117 return Some(String::new()); }
119 return None; }
121
122 if let Some(rest) = buf.strip_prefix("<html")
124 && rest.starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/')
125 {
126 if let Some(end) = buf.find('>') {
127 self.buf = self.buf[end + 1..].to_string();
128 return Some(String::new());
129 }
130 return None;
131 }
132 if buf.starts_with("</html") {
133 if let Some(end) = buf.find('>') {
134 self.buf = self.buf[end + 1..].to_string();
135 return Some(String::new());
136 }
137 return None;
138 }
139
140 if let Some(rest) = buf.strip_prefix("<head")
142 && rest.starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/')
143 {
144 if let Some(end) = buf.find('>') {
145 self.buf = self.buf[end + 1..].to_string();
146 return Some(String::new());
147 }
148 return None;
149 }
150 if buf.starts_with("</head") {
151 if let Some(end) = buf.find('>') {
152 self.buf = self.buf[end + 1..].to_string();
153 return Some(String::new());
154 }
155 return None;
156 }
157
158 for tag in &["title", "script", "style", "meta", "link", "base"] {
160 let open = format!("<{}", tag);
161 if buf.to_ascii_lowercase().starts_with(&open) {
162 let rest = &buf[open.len()..];
163 if rest.starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/') {
164 let close = format!("</{}>", tag);
166 if let Some(end) = buf.to_ascii_lowercase().find(&close) {
167 let full_end = end + close.len();
168 let matched = self.buf[..full_end].to_string();
169 self.buf = self.buf[full_end..].to_string();
170 return Some(matched);
171 }
172 if let Some(end) = buf.find("/>") {
174 let full_end = end + 2;
175 let matched = self.buf[..full_end].to_string();
176 self.buf = self.buf[full_end..].to_string();
177 return Some(matched);
178 }
179 if let Some(end) = buf.find('>') {
181 let full_end = end + 1;
182 let matched = self.buf[..full_end].to_string();
183 self.buf = self.buf[full_end..].to_string();
184 return Some(matched);
185 }
186 return None; }
188 }
189 }
190
191 if buf.starts_with("<!--") {
193 if let Some(end) = buf.find("-->") {
194 let full_end = end + 3;
195 let matched = self.buf[..full_end].to_string();
196 self.buf = self.buf[full_end..].to_string();
197 return Some(matched);
198 }
199 return None;
200 }
201
202 None
203 }
204}
205
206pub struct TextFilter {
213 opened: bool,
214}
215
216impl TextFilter {
217 pub fn new() -> Self {
218 Self { opened: false }
219 }
220
221 pub fn filter(&mut self, chunk: &str) -> String {
223 let escaped = html_escape::encode_text(chunk).into_owned();
224 if !self.opened {
225 self.opened = true;
226 format!("<pre>{}", escaped)
227 } else {
228 escaped
229 }
230 }
231
232 pub fn finish(&self) -> &'static str {
234 if self.opened { "</pre>" } else { "" }
235 }
236}
237
238impl Default for TextFilter {
239 fn default() -> Self {
240 Self::new()
241 }
242}
243
244#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
253 fn detects_plain_text() {
254 let mut p = HeadParser::new();
255 p.feed("hello world");
256 assert!(!p.is_html());
257 assert!(p.complete());
258 assert_eq!(p.take_body(), "hello world");
259 }
260
261 #[test]
262 fn detects_html() {
263 let mut p = HeadParser::new();
264 p.feed("<p>hello</p>");
265 assert!(p.is_html());
266 }
267
268 #[test]
269 fn strips_doctype() {
270 let mut p = HeadParser::new();
271 p.feed("<!DOCTYPE html>\n<p>body</p>");
272 assert!(p.is_html());
273 assert!(p.complete());
274 assert!(!p.head().contains("DOCTYPE"));
275 assert!(p.take_body().contains("<p>body</p>"));
276 }
277
278 #[test]
279 fn strips_html_head_tags() {
280 let mut p = HeadParser::new();
281 p.feed("<html><head></head><body><p>content</p></body></html>");
282 assert!(p.complete());
283 assert!(p.take_body().contains("<p>content</p>"));
284 }
285
286 #[test]
287 fn preserves_head_content_tags() {
288 let mut p = HeadParser::new();
289 p.feed(
290 "<html><head><title>My Page</title><style>body{}</style></head><body>hi</body></html>",
291 );
292 assert!(p.complete());
293 let head = p.head();
294 assert!(head.contains("<title>My Page</title>"));
295 assert!(head.contains("<style>body{}</style>"));
296 }
297
298 #[test]
299 fn fragment_with_no_head() {
300 let mut p = HeadParser::new();
301 p.feed("<p>just a fragment</p>");
302 assert!(p.is_html());
303 assert!(p.complete());
304 assert!(p.take_body().contains("<p>just a fragment</p>"));
305 }
306
307 #[test]
308 fn whitespace_before_html() {
309 let mut p = HeadParser::new();
310 p.feed(" \n <p>text</p>");
311 assert!(p.is_html());
312 }
313
314 #[test]
317 fn text_filter_wraps_in_pre() {
318 let mut f = TextFilter::new();
319 let out = f.filter("hello");
320 assert!(out.starts_with("<pre>"));
321 assert!(out.contains("hello"));
322 }
323
324 #[test]
325 fn text_filter_escapes_entities() {
326 let mut f = TextFilter::new();
327 let out = f.filter("<b>&</b>");
328 assert!(out.contains("<b>"));
329 assert!(out.contains("&"));
330 }
331
332 #[test]
333 fn text_filter_no_double_pre() {
334 let mut f = TextFilter::new();
335 let a = f.filter("first");
336 let b = f.filter("second");
337 assert!(a.starts_with("<pre>"));
338 assert!(!b.starts_with("<pre>"));
339 }
340
341 #[test]
342 fn text_filter_finish() {
343 let mut f = TextFilter::new();
344 f.filter("x");
345 assert_eq!(f.finish(), "</pre>");
346 }
347}