1#[derive(Debug, Default)]
14pub struct HeadParser {
15 buf: String,
16 head_parts: Vec<String>,
18 body_start: Option<String>,
20 is_html: Option<bool>,
22}
23
24impl HeadParser {
25 pub fn new() -> Self {
26 Self::default()
27 }
28
29 pub fn feed(&mut self, data: &str) -> bool {
32 if self.body_start.is_some() {
33 self.body_start.as_mut().unwrap().push_str(data);
35 return true;
36 }
37 self.buf.push_str(data);
38 self.parse();
39 self.body_start.is_some()
40 }
41
42 pub fn complete(&self) -> bool {
44 self.body_start.is_some()
45 }
46
47 pub fn is_html(&self) -> bool {
49 self.is_html.unwrap_or(false)
50 }
51
52 pub fn head(&self) -> String {
55 self.head_parts.join("\n")
56 }
57
58 pub fn take_body(&mut self) -> String {
61 self.body_start.take().unwrap_or_default()
62 }
63
64 fn parse(&mut self) {
67 loop {
68 if self.is_html.is_none() {
70 let trimmed = self.buf.trim_start();
71 if trimmed.is_empty() {
72 return; }
74 self.is_html = Some(trimmed.starts_with('<'));
75 }
76
77 if !self.is_html.unwrap_or(false) {
78 let body = std::mem::take(&mut self.buf);
80 self.body_start = Some(body);
81 return;
82 }
83
84 let trimmed_start = self.buf.len() - self.buf.trim_start().len();
86 if trimmed_start > 0 {
87 self.buf = self.buf[trimmed_start..].to_string();
88 }
89
90 if self.buf.is_empty() {
91 return;
92 }
93
94 if let Some(consumed) = self.try_consume_head_tag() {
96 if !consumed.trim().is_empty() {
97 self.head_parts.push(consumed);
98 }
99 continue;
100 }
101
102 let body = std::mem::take(&mut self.buf);
104 self.body_start = Some(body);
105 return;
106 }
107 }
108
109 fn try_consume_head_tag(&mut self) -> Option<String> {
112 let buf = &self.buf;
113
114 if buf.to_ascii_uppercase().starts_with("<!DOCTYPE") {
116 if let Some(end) = buf.find('>') {
117 self.buf = self.buf[end + 1..].to_string();
118 return Some(String::new()); }
120 return None; }
122
123 if let Some(rest) = buf.strip_prefix("<html") {
125 if rest.starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/') {
126 if let Some(end) = buf.find('>') {
127 self.buf = self.buf[end + 1..].to_string();
128 return Some(String::new());
129 }
130 return None;
131 }
132 }
133 if buf.starts_with("</html") {
134 if let Some(end) = buf.find('>') {
135 self.buf = self.buf[end + 1..].to_string();
136 return Some(String::new());
137 }
138 return None;
139 }
140
141 if let Some(rest) = buf.strip_prefix("<head") {
143 if rest.starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/') {
144 if let Some(end) = buf.find('>') {
145 self.buf = self.buf[end + 1..].to_string();
146 return Some(String::new());
147 }
148 return None;
149 }
150 }
151 if buf.starts_with("</head") {
152 if let Some(end) = buf.find('>') {
153 self.buf = self.buf[end + 1..].to_string();
154 return Some(String::new());
155 }
156 return None;
157 }
158
159 for tag in &["title", "script", "style", "meta", "link", "base"] {
161 let open = format!("<{}", tag);
162 if buf.to_ascii_lowercase().starts_with(&open) {
163 let rest = &buf[open.len()..];
164 if rest.starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/') {
165 let close = format!("</{}>", tag);
167 if let Some(end) = buf.to_ascii_lowercase().find(&close) {
168 let full_end = end + close.len();
169 let matched = self.buf[..full_end].to_string();
170 self.buf = self.buf[full_end..].to_string();
171 return Some(matched);
172 }
173 if let Some(end) = buf.find("/>") {
175 let full_end = end + 2;
176 let matched = self.buf[..full_end].to_string();
177 self.buf = self.buf[full_end..].to_string();
178 return Some(matched);
179 }
180 if let Some(end) = buf.find('>') {
182 let full_end = end + 1;
183 let matched = self.buf[..full_end].to_string();
184 self.buf = self.buf[full_end..].to_string();
185 return Some(matched);
186 }
187 return None; }
189 }
190 }
191
192 if buf.starts_with("<!--") {
194 if let Some(end) = buf.find("-->") {
195 let full_end = end + 3;
196 let matched = self.buf[..full_end].to_string();
197 self.buf = self.buf[full_end..].to_string();
198 return Some(matched);
199 }
200 return None;
201 }
202
203 None
204 }
205}
206
207pub struct TextFilter {
214 opened: bool,
215}
216
217impl TextFilter {
218 pub fn new() -> Self {
219 Self { opened: false }
220 }
221
222 pub fn filter(&mut self, chunk: &str) -> String {
224 let escaped = html_escape::encode_text(chunk).into_owned();
225 if !self.opened {
226 self.opened = true;
227 format!("<pre>{}", escaped)
228 } else {
229 escaped
230 }
231 }
232
233 pub fn finish(&self) -> &'static str {
235 if self.opened { "</pre>" } else { "" }
236 }
237}
238
239impl Default for TextFilter {
240 fn default() -> Self {
241 Self::new()
242 }
243}
244
245#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
254 fn detects_plain_text() {
255 let mut p = HeadParser::new();
256 p.feed("hello world");
257 assert!(!p.is_html());
258 assert!(p.complete());
259 assert_eq!(p.take_body(), "hello world");
260 }
261
262 #[test]
263 fn detects_html() {
264 let mut p = HeadParser::new();
265 p.feed("<p>hello</p>");
266 assert!(p.is_html());
267 }
268
269 #[test]
270 fn strips_doctype() {
271 let mut p = HeadParser::new();
272 p.feed("<!DOCTYPE html>\n<p>body</p>");
273 assert!(p.is_html());
274 assert!(p.complete());
275 assert!(!p.head().contains("DOCTYPE"));
276 assert!(p.take_body().contains("<p>body</p>"));
277 }
278
279 #[test]
280 fn strips_html_head_tags() {
281 let mut p = HeadParser::new();
282 p.feed("<html><head></head><body><p>content</p></body></html>");
283 assert!(p.complete());
284 assert!(p.take_body().contains("<p>content</p>"));
285 }
286
287 #[test]
288 fn preserves_head_content_tags() {
289 let mut p = HeadParser::new();
290 p.feed("<html><head><title>My Page</title><style>body{}</style></head><body>hi</body></html>");
291 assert!(p.complete());
292 let head = p.head();
293 assert!(head.contains("<title>My Page</title>"));
294 assert!(head.contains("<style>body{}</style>"));
295 }
296
297 #[test]
298 fn fragment_with_no_head() {
299 let mut p = HeadParser::new();
300 p.feed("<p>just a fragment</p>");
301 assert!(p.is_html());
302 assert!(p.complete());
303 assert!(p.take_body().contains("<p>just a fragment</p>"));
304 }
305
306 #[test]
307 fn whitespace_before_html() {
308 let mut p = HeadParser::new();
309 p.feed(" \n <p>text</p>");
310 assert!(p.is_html());
311 }
312
313 #[test]
316 fn text_filter_wraps_in_pre() {
317 let mut f = TextFilter::new();
318 let out = f.filter("hello");
319 assert!(out.starts_with("<pre>"));
320 assert!(out.contains("hello"));
321 }
322
323 #[test]
324 fn text_filter_escapes_entities() {
325 let mut f = TextFilter::new();
326 let out = f.filter("<b>&</b>");
327 assert!(out.contains("<b>"));
328 assert!(out.contains("&"));
329 }
330
331 #[test]
332 fn text_filter_no_double_pre() {
333 let mut f = TextFilter::new();
334 let a = f.filter("first");
335 let b = f.filter("second");
336 assert!(a.starts_with("<pre>"));
337 assert!(!b.starts_with("<pre>"));
338 }
339
340 #[test]
341 fn text_filter_finish() {
342 let mut f = TextFilter::new();
343 f.filter("x");
344 assert_eq!(f.finish(), "</pre>");
345 }
346}