netscape_to_universal/
lib.rs1use crate::node_ref_ext::*;
2use kuchiki::{parse_html, traits::TendrilSink, NodeRef};
3use std::{
4 fs::File,
5 io::{self, Error, ErrorKind, Read, Write},
6 path::PathBuf,
7};
8
9mod node_ref_ext;
10
11pub fn read_path(input_path: PathBuf) -> Result<Box<dyn Read>, Error> {
12 if input_path.as_os_str() == "-" {
13 Ok(Box::new(io::stdin()) as Box<dyn Read>)
15 } else {
16 match File::open(&input_path) {
18 Ok(file) => Ok(Box::new(file) as Box<dyn Read>),
19 Err(err) => {
20 let msg = format!("{}: {}", input_path.display(), err);
21 Err(Error::new(ErrorKind::InvalidData, msg))
22 }
23 }
24 }
25}
26
27pub fn convert<R: Read, W: Write>(input: &mut R, output: &mut W) -> Result<(), Error> {
28 let node = parse_html().from_utf8().read_from(input).unwrap();
29 to_universal(node, output)
30}
31
32fn to_universal(node: NodeRef, output: &mut dyn Write) -> Result<(), Error> {
42 if let Some(root) = node.children().find(|n| n.is_element("HTML")) {
43 if let Some(body) = root.children().find(|child| child.is_element("BODY")) {
44 if let Some(content) = body.children().find(|child| child.is_element("DL")) {
45 for item in content.children() {
46 to_universal_rec(&item, vec![], output)?;
47 }
48 } else {
49 return Err(Error::new(
50 ErrorKind::InvalidData,
51 "Invalid file format: missing content element DL.",
52 ));
53 }
54 }
55 }
56 Ok(())
57}
58
59fn to_universal_rec(
60 node: &NodeRef,
61 mut ancestors: Vec<String>,
62 output: &mut dyn Write,
63) -> Result<(), Error> {
64 if node.is_element("DT") {
65 if let Some(node_a) = node.children().find(|n| n.is_element("A")) {
67 if let Some(attribute) = node_a.select_attribute("HREF") {
68 writeln!(
69 output,
70 "{}{}{}",
71 attribute.value,
72 if ancestors.is_empty() { "" } else { " " },
73 ancestors.join(" ")
74 )?;
75 return Ok(());
76 }
77 }
78
79 if let Some(node_h3) = node.children().find(|n| n.is_element("H3")) {
82 let title = node_h3.text_contents();
83 ancestors.push(format!("#{}", title.replace(' ', "-")));
85 for sibling in node_h3.following_siblings() {
87 if sibling.is_element("DL") {
88 for child in sibling.children() {
89 to_universal_rec(&child, ancestors.clone(), output)?;
90 }
91 }
92 }
93 }
94 }
95 Ok(())
96}