html2md/
anchors.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use super::StructuredPrinter;
use super::TagHandler;
use percent_encoding::percent_decode_str;
use std::borrow::Cow;

use markup5ever_rcdom::{Handle, NodeData};

#[derive(Default)]
pub struct AnchorHandler {
    start_pos: usize,
    url: String,
}

impl TagHandler for AnchorHandler {
    fn handle(&mut self, tag: &Handle, printer: &mut StructuredPrinter) {
        self.start_pos = printer.data.len();
        self.url = match tag.data {
            NodeData::Element { ref attrs, .. } => {
                let attrs = attrs.borrow();
                let href = attrs
                    .iter()
                    .find(|attr| attr.name.local.as_bytes() == b"href");

                match href {
                    Some(link) => link.value.trim().into(),
                    None => String::new(),
                }
            }
            _ => String::new(),
        };
    }

    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
        // Percent decode url.
        let url = percent_decode_str(&self.url).decode_utf8_lossy();
        // [CommonMark Spec](https://spec.commonmark.org/0.31.2/#link-destination)
        let url = if url.contains(|c: char| c.is_ascii_control() || c == ' ') {
            Cow::Owned(format!("<{}>", url))
        } else {
            url
        };

        match printer.data.get(self.start_pos..) {
            Some(d) => {
                let starts_new_line = d.starts_with("\n");
                let ends_new_line = d.ends_with("\n");

                if starts_new_line || ends_new_line {
                    // handle start
                    if starts_new_line {
                        printer.insert_str(self.start_pos + 1, "[");
                    } else {
                        printer.insert_str(self.start_pos, "[");
                    }

                    // handle end
                    if ends_new_line {
                        let next_position = printer.data.len();
                        printer.insert_str(next_position - 1, &format!("]({})", url));
                    } else {
                        printer.append_str(&format!("]({})", url));
                    }
                } else {
                    printer.insert_str(self.start_pos, "[");
                    printer.append_str(&format!("]({})", url));
                }
            }
            _ => {
                printer.insert_str(self.start_pos, "[");
                printer.append_str(&format!("]({})", url));
            }
        }
    }
}