1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
use std::io;
use html5ever::{
    parse_document,
    Parser,
    tree_builder::TreeSink,
    tendril::TendrilSink,
    tendril::stream::Utf8LossyDecoder,
};


/// ParserSink is a simple wrapper around a html5ever parser. It implements
/// `std::io::Write` and allows you to stream data into it via the `std::io` primitives
pub struct ParserSink<D: TreeSink> {
    inner: Utf8LossyDecoder<Parser<D>>,
}

impl<D> ParserSink<D> where D: TreeSink {
    /// new creates a new html5ever parser and wraps it in a structure that implements
    /// `std::io::Write`
    pub fn new(dom: D) -> Self {
        let parser = parse_document(dom, Default::default()).from_utf8();
        return ParserSink{
            inner: parser,
        }
    }

    /// finish comsumes the ParserSink and returns the document structure completed by
    /// the inner parser.
    pub fn finish(self) -> D::Output {
        self.inner.finish()
    }
}

impl<D> io::Write for ParserSink<D> where D: TreeSink {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.inner.process(buf.into());
        Ok(buf.len())
    }

    fn flush(&mut self) -> io::Result<()> {
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    extern crate reqwest;
    use html5ever::rcdom::RcDom;
    use std::io::Write;

    use super::*;
    const TEST_HTML: &'static str = "<html> <head> <title> test </title> </head> </html>";
    #[test]
    fn test_write() {
        let mut ps = ParserSink::new(RcDom::default());
        assert_eq!(ps.write(TEST_HTML.as_bytes()).unwrap(), TEST_HTML.len());
        ps.finish();
    }
}