1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
use std::io;
use html5ever::{
parse_document,
Parser,
tree_builder::TreeSink,
tendril::TendrilSink,
tendril::stream::Utf8LossyDecoder,
};
pub struct ParserSink<D: TreeSink> {
inner: Utf8LossyDecoder<Parser<D>>,
}
impl<D> ParserSink<D> where D: TreeSink {
pub fn new(dom: D) -> Self {
let parser = parse_document(dom, Default::default()).from_utf8();
return ParserSink{
inner: parser,
}
}
pub fn finish(self) -> D::Output {
self.inner.finish()
}
}
impl<D> io::Write for ParserSink<D> where D: TreeSink {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.inner.process(buf.into());
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
extern crate reqwest;
use html5ever::rcdom::RcDom;
use std::io::Write;
use super::*;
const TEST_HTML: &'static str = "<html> <head> <title> test </title> </head> </html>";
#[test]
fn test_write() {
let mut ps = ParserSink::new(RcDom::default());
assert_eq!(ps.write(TEST_HTML.as_bytes()).unwrap(), TEST_HTML.len());
ps.finish();
}
}