1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use tendril::StrTendril;

use node::{self, Node};
use predicate::Predicate;
use selection::Selection;

/// An HTML document.
#[derive(Clone, Debug, PartialEq)]
pub struct Document {
    pub nodes: Vec<node::Raw>
}

impl Document {
    /// Returns a `Selection` containing nodes passing the given predicate `p`.
    pub fn find<'a, P: Predicate>(&'a self, p: P) -> Selection<'a> {
        Selection::new(self, (0..self.nodes.len()).filter(|&index| {
            p.matches(&self.nth(index).unwrap())
        }).collect())
    }

    /// Returns the `n`th node of the document as a `Some(Node)`, indexed from
    /// 0, or `None` if n is greater than or equal to the number of nodes.
    pub fn nth(&self, n: usize) -> Option<Node> {
        Node::new(self, n)
    }
}

impl From<StrTendril> for Document {
    /// Parses the given `StrTendril` into a `Document`.
    fn from(tendril: StrTendril) -> Document {
        use html5ever::{parse_document, rcdom};
        use tendril::stream::TendrilSink;

        let mut document = Document {
            nodes: vec![]
        };

        let rc_dom = parse_document(rcdom::RcDom::default(),
                                    Default::default()).one(tendril);
        recur(&mut document, &rc_dom.document, None, None);
        return document;

        fn recur(document: &mut Document,
                 node: &rcdom::Handle,
                 parent: Option<usize>,
                 prev: Option<usize>) -> Option<usize> {
            match node.borrow().node {
                rcdom::Document => {
                    let mut prev = None;
                    for child in &node.borrow().children {
                        prev = recur(document, &child, None, prev)
                    }
                    None
                },
                rcdom::Doctype(..) => None,
                rcdom::Text(ref text) => {
                    let data = node::Data::Text(text.clone());
                    Some(append(document, data, parent, prev))
                },
                rcdom::Comment(ref comment) => {
                    let data = node::Data::Comment(comment.clone());
                    Some(append(document, data, parent, prev))
                },
                rcdom::Element(ref name, ref _element, ref attrs) => {
                    let name = name.local.clone();
                    let attrs = attrs.iter().map(|attr| {
                        (attr.name.local.clone(), attr.value.clone())
                    }).collect();
                    let data = node::Data::Element(name, attrs, vec![]);
                    let index = append(document, data, parent, prev);
                    let mut prev = None;
                    for child in &node.borrow().children {
                        prev = recur(document, &child, Some(index), prev)
                    }
                    Some(index)
                }
            }
        }

        fn append(document: &mut Document,
                  data: node::Data,
                  parent: Option<usize>,
                  prev: Option<usize>) -> usize {
            let index = document.nodes.len();

            document.nodes.push(node::Raw {
                index: index,
                parent: parent,
                prev: prev,
                next: None,
                data: data
            });

            if let Some(parent) = parent {
                match document.nodes[parent].data {
                    node::Data::Element(_, _, ref mut children) => {
                        children.push(index);
                    },
                    _ => unreachable!()
                }
            }

            if let Some(prev) = prev {
                document.nodes[prev].next = Some(index);
            }

            index
        }
    }
}

impl<'a> From<&'a str> for Document {
    /// Parses the given `&str` into a `Document`.
    fn from(str: &str) -> Document {
        Document::from(StrTendril::from(str))
    }
}