gmi2html/
lib.rs

1//! An implementation of gmi -> HTML conversion, based on
2//! the [text/gemini](https://gemini.circumlunar.space/docs/specification.html) spec v0.14.2
3//!
4//! Example usage:
5//! ```
6//! use gmi2html::GeminiConverter;
7//!
8//!let res = GeminiConverter::new(r#"
9//! ## Hello, Gemini
10//! Lorem Ipseum
11//! => gemini://gemini.circumlunar.space
12//! "#)
13//!    .proxy_url("https://portal.mozz.us/gemini/")
14//!    .inline_images(true)
15//!    .to_html();
16//! ```
17
18use std::collections::HashSet;
19use url::{ParseError, Url};
20
21// All 4 characters for efficiency
22static IMAGE_EXTENSIONS: &[&str] = &[".jpg", "jpeg", ".png", ".gif", ".ico", ".svg", "webp"];
23
24pub struct GeminiConverter<'a> {
25    proxy_url: Option<Url>,
26    // TODO allow disallowed configuration
27    input_text: &'a str,
28    inline_images: bool,
29}
30
31impl<'a> GeminiConverter<'a> {
32    /// Initialize the builder with default configuration values.
33    pub fn new(gmi_text: &'a str) -> Self {
34        Self {
35            proxy_url: None,
36            input_text: gmi_text,
37            inline_images: false,
38        }
39    }
40
41    /// Replace `gemini://` in URLS with this prefix for proxying, i.e. over HTTP. Requires trailing slash.
42    pub fn proxy_url(&mut self, proxy_url: &'a str) -> &mut Self {
43        self.proxy_url = Some(Url::parse(proxy_url).unwrap());
44        self
45    }
46
47    /// Render relative-path images in-line. Default false. Beware that this can expose you
48    /// to security issues if you're not careful (e.g. malicious SVG)
49    pub fn inline_images(&mut self, option: bool) -> &mut Self {
50        self.inline_images = option;
51        self
52    }
53
54    /// Convert Gemini text to HTML.
55    pub fn to_html(&self) -> String {
56        // This function sometimes priorities performance over readability
57        let mut output = String::new();
58        let mut is_pre = false;
59        let mut is_list = false;
60        for line in self.input_text.lines() {
61            // See 5.4.3 "Preformatting toggle lines"
62            if line.starts_with("```") {
63                is_pre = !is_pre;
64                if is_pre {
65                    if line.len() > 3 {
66                        // This is marginally faster than using format!, albeit a bit uglier
67                        output.push_str("<pre alt=\"");
68                        xml_safe(&mut output, &line[3..]);
69                        output.push_str("\">\n");
70                    } else {
71                        output.push_str("<pre>\n");
72                    }
73                } else {
74                    output.push_str("</pre>\n")
75                }
76                continue;
77            }
78            if is_pre {
79                xml_safe(&mut output, line);
80                output.push('\n');
81                continue;
82            }
83            // See 5.5.2 "Unordered list items"
84            if line.starts_with("* ") {
85                if !is_list {
86                    output.push_str("<ul>\n");
87                    is_list = true;
88                }
89                output.push_str("<li>");
90                xml_safe(&mut output, &line[2..].trim());
91                output.push_str("</li>\n");
92                continue;
93            } else {
94                if is_list {
95                    output.push_str("</ul>\n");
96                }
97                is_list = false;
98            }
99            // 5.5.1 heading lines
100            if line.starts_with("#") {
101                let mut count = 0;
102                for ch in line.chars() {
103                    if ch == '#' {
104                        count += 1;
105                        // Limit to 3 headers.
106                        if count == 3 {
107                            break;
108                        }
109                    }
110                }
111                // String allocation for readability
112                output.push_str(&format!("<h{}>", count));
113                xml_safe(&mut output, &line[count..].trim());
114                output.push_str(&format!("</h{}>\n", count));
115            // 5.5.3 Quote lines
116            } else if line.starts_with(">") {
117                output.push_str("<q>");
118                xml_safe(&mut output, &line[1..]);
119                output.push_str("</q><br>\n");
120            } else if line.starts_with("=>") {
121                let mut i = line[2..].split_whitespace();
122                let first: &str = i.next().unwrap_or("");
123                // inefficient
124                let second: String = i.collect::<Vec<&str>>().join(" ");
125                // This is much slower than surrounding code
126                // TODO consider blacklist
127                let parsed = Url::parse(first);
128                let mut is_image = false;
129                if parsed == Err(ParseError::RelativeUrlWithoutBase) {
130                    let extension: &str = &first[first.len() - 4..first.len()].to_ascii_lowercase();
131                    if self.inline_images && IMAGE_EXTENSIONS.contains(&extension) {
132                        output.push_str("<img src=\"");
133                        is_image = true;
134                    } else {
135                        output.push_str("<a href=\"");
136                    }
137                    let relative_url = String::new();
138                    xml_safe(&mut output, first);
139                    output.push_str(&relative_url);
140                } else {
141                    output.push_str("<a href=\"");
142                }
143                if let Ok(p) = parsed {
144                    if p.scheme() == "gemini" {
145                        // TODO FIX
146                        if let Some(s) = &self.proxy_url {
147                            // Never fail, just use blank string if cant parse
148                            let join =
149                                |a: &Url, b: Url| -> Result<String, Box<dyn std::error::Error>> {
150                                    Ok(a.join(b.host_str().ok_or("err")?)?
151                                        .join(b.path())?
152                                        .as_str()
153                                        .to_string())
154                                };
155                            let proxied = join(s, p).unwrap_or("".to_string()); // Dont fail
156                            output.push_str(&proxied);
157                        } else {
158                            output.push_str(p.as_str());
159                        }
160                    } else {
161                        output.push_str(p.as_str());
162                    }
163                }
164                let link_text = match second.as_str() {
165                    "" => first,
166                    t => t,
167                };
168                if !is_image {
169                    output.push_str("\">");
170                    xml_safe(&mut output, link_text);
171                    output.push_str("</a>");
172                } else {
173                    output.push_str("\" alt=\"");
174                    xml_safe(&mut output, link_text);
175                    output.push_str("\">");
176                }
177                output.push_str("<br>\n");
178            } else {
179                xml_safe(&mut output, line);
180                output.push_str("<br>\n");
181            }
182        }
183        // Check outstanding tags that need to be closed
184        if is_list {
185            output.push_str("</ul>");
186        }
187        if is_pre {
188            output.push_str("</pre>")
189        }
190        return output;
191    }
192}
193
194pub fn xml_safe(dest: &mut String, text: &str) {
195    for c in text.chars() {
196        match c {
197            '&' => dest.push_str("&amp;"),
198            '<' => dest.push_str("&lt;"),
199            '>' => dest.push_str("&gt;"),
200            '"' => dest.push_str("&quot;"),
201            '\'' => dest.push_str("&#39;"),
202            _ => dest.push(c),
203        }
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    #[test]
211    fn test_basic() {
212        assert_eq!(
213            GeminiConverter::new("hello world").to_html(),
214            "hello world<br>\n"
215        )
216    }
217
218    #[test]
219    fn test_unsafe_html() {
220        assert_eq!(
221            GeminiConverter::new("<b>hacked</b>").to_html(),
222            "&lt;b&gt;hacked&lt;/b&gt;<br>\n"
223        );
224        // TODO add more tests
225    }
226
227    #[test]
228    fn test_whitespace() {
229        assert_eq!(
230            GeminiConverter::new("\n\n\n").to_html(),
231            "<br>\n<br>\n<br>\n"
232        )
233    }
234
235    #[test]
236    fn test_list() {
237        assert_eq!(
238            GeminiConverter::new("hi\n* cool\n* vibes\nok").to_html(),
239            "hi<br>\n<ul>\n<li>cool</li>\n<li>vibes</li>\n</ul>\nok<br>\n"
240        )
241    }
242
243    #[test]
244    fn test_quote() {
245        assert_eq!(
246            GeminiConverter::new("> stay cool\n-coolguy").to_html(),
247            "<q> stay cool</q><br>\n-coolguy<br>\n"
248        )
249    }
250    #[test]
251    fn test_headers() {
252        assert_eq!(
253            GeminiConverter::new("#header").to_html(),
254            "<h1>header</h1>\n"
255        );
256        assert_eq!(
257            GeminiConverter::new("##header").to_html(),
258            "<h2>header</h2>\n"
259        );
260        assert_eq!(
261            GeminiConverter::new("### header").to_html(),
262            "<h3>header</h3>\n"
263        );
264        assert_eq!(
265            GeminiConverter::new("####header").to_html(),
266            "<h3>#header</h3>\n"
267        );
268    }
269
270    #[test]
271    fn test_pre() {
272        assert_eq!(
273            GeminiConverter::new("```\nhello world\n```").to_html(),
274            "<pre>\nhello world\n</pre>\n"
275        );
276    }
277
278    #[test]
279    fn test_pre_alt() {
280        assert_eq!(
281            GeminiConverter::new("```alt\"\nhello world\n```").to_html(),
282            "<pre alt=\"alt&quot;\">\nhello world\n</pre>\n"
283        );
284    }
285
286    #[test]
287    fn test_hyperlink() {
288        assert_eq!(
289            // TODO resolve trailing slash issue
290            GeminiConverter::new("=> https://google.com").to_html(),
291            "<a href=\"https://google.com/\">https://google.com</a><br>\n"
292        )
293    }
294
295    #[test]
296    fn test_replace_image() {
297        assert_eq!(
298            GeminiConverter::new("=> something.jpg cool pic")
299                .inline_images(true)
300                .to_html(),
301            "<img src=\"something.jpg\" alt=\"cool pic\"><br>\n"
302        )
303    }
304
305    #[test]
306    fn test_proxy() {
307        assert_eq!(
308            GeminiConverter::new("=> gemini://alexwrites.xyz")
309            .proxy_url("https://flounder.online/proxy/")
310            .to_html(),
311            "<a href=\"https://flounder.online/proxy/alexwrites.xyz\">gemini://alexwrites.xyz</a><br>\n"
312            )
313    }
314}