1#![forbid(unsafe_code)]
5
6
7use markup5ever_rcdom::{Handle, NodeData, RcDom};
8
9
10use html5ever::{driver as html};
11
12
13
14
15use std::io;
16use std::iter::IntoIterator as IntoIter;
17use std::mem::replace;
18
19
20
21use tendril::stream::TendrilSink;
22
23pub use url::Url;
24
25pub use url;
26
27#[derive(Debug)]
28pub struct Builder {
29 pub base: Url,
30}
31
32impl Builder {
33 pub fn run(&self, src: &str) -> Vec<Url> {
34 let parser = Self::make_parser();
35 let dom = parser.one(src);
36 self.run_dom(dom)
37 }
38
39 pub fn run_from_reader<R>(&self, mut src: R) -> io::Result<Vec<Url>>
40 where
41 R: io::Read,
42 {
43 let parser = Self::make_parser().from_utf8();
44 let dom = parser.read_from(&mut src)?;
45 Ok(self.run_dom(dom))
46 }
47
48 fn check_child(&self, child: &mut Handle) -> Option<Url> {
49 match child.data {
50 NodeData::Text { .. } => None,
51 NodeData::Comment { .. } => None,
52 NodeData::Doctype { .. }
53 | NodeData::Document
54 | NodeData::ProcessingInstruction { .. } => None,
55 NodeData::Element {
56 ref name,
57 ref attrs,
58 ..
59 } => {
60 if &name.local[..] == "link" {
61 let mut is_alternate = false;
62 let mut is_feed = false;
63 let mut url = None;
64 for attr in &*attrs.borrow_mut() {
65 if &attr.name.local[..] == "rel" && attr.value.to_ascii_lowercase() == "alternate" {
66 is_alternate = true;
67 }
68 if &attr.name.local[..] == "type" && attr.value.to_ascii_lowercase() == "application/atom+xml" {
69 is_feed = true;
70 }
71 if &attr.name.local[..] == "type" && attr.value.to_ascii_lowercase() == "application/rss+xml" {
72 is_feed = true;
73 }
74 if &attr.name.local[..] == "href" {
75 url = self.base.join(&attr.value).ok();
76 }
77 }
78 if is_alternate && is_feed { url } else { None }
79 } else {
80 None
81 }
82 }
83 }
84 }
85
86 fn run_dom(&self, dom: RcDom) -> Vec<Url> {
87 let mut urls = Vec::new();
88 let mut stack = Vec::new();
89 stack.extend(
90 replace(&mut *dom.document.children.borrow_mut(), Vec::new())
91 .into_iter()
92 .rev(),
93 );
94 while let Some(mut node) = stack.pop() {
99 let pass = self.check_child(&mut node);
100 if let Some(url) = pass {
101 urls.push(url);
102 }
103 stack.extend(
104 replace(&mut *node.children.borrow_mut(), Vec::new())
105 .into_iter()
106 .rev(),
107 );
108 }
109 urls
110 }
111
112 fn make_parser() -> html::Parser<RcDom> {
117 html::parse_document(
118 RcDom::default(),
119 html::ParseOpts::default(),
120 )
121 }
122}
123