gmi2html 0.1.8

Convert text/gemini into HTML
Documentation
//! An implementation of gmi -> HTML conversion, based on
//! the [text/gemini](https://gemini.circumlunar.space/docs/specification.html) spec v0.14.2
//!
//! Example usage:
//! ```
//! use gmi2html::GeminiConverter;
//!
//!let res = GeminiConverter::new(r#"
//! ## Hello, Gemini
//! Lorem Ipseum
//! => gemini://gemini.circumlunar.space
//! "#)
//!    .proxy_url("https://portal.mozz.us/gemini/")
//!    .inline_images(true)
//!    .to_html();
//! ```

use std::collections::HashSet;
use url::{ParseError, Url};

// All 4 characters for efficiency
static IMAGE_EXTENSIONS: &[&str] = &[".jpg", "jpeg", ".png", ".gif", ".ico", ".svg", "webp"];

pub struct GeminiConverter<'a> {
    proxy_url: Option<Url>,
    // TODO allow disallowed configuration
    input_text: &'a str,
    inline_images: bool,
}

impl<'a> GeminiConverter<'a> {
    /// Initialize the builder with default configuration values.
    pub fn new(gmi_text: &'a str) -> Self {
        Self {
            proxy_url: None,
            input_text: gmi_text,
            inline_images: false,
        }
    }

    /// Replace `gemini://` in URLS with this prefix for proxying, i.e. over HTTP. Requires trailing slash.
    pub fn proxy_url(&mut self, proxy_url: &'a str) -> &mut Self {
        self.proxy_url = Some(Url::parse(proxy_url).unwrap());
        self
    }

    /// Render relative-path images in-line. Default false. Beware that this can expose you
    /// to security issues if you're not careful (e.g. malicious SVG)
    pub fn inline_images(&mut self, option: bool) -> &mut Self {
        self.inline_images = option;
        self
    }

    /// Convert Gemini text to HTML.
    pub fn to_html(&self) -> String {
        // This function sometimes priorities performance over readability
        let mut output = String::new();
        let mut is_pre = false;
        let mut is_list = false;
        for line in self.input_text.lines() {
            // See 5.4.3 "Preformatting toggle lines"
            if line.starts_with("```") {
                is_pre = !is_pre;
                if is_pre {
                    if line.len() > 3 {
                        // This is marginally faster than using format!, albeit a bit uglier
                        output.push_str("<pre alt=\"");
                        xml_safe(&mut output, &line[3..]);
                        output.push_str("\">\n");
                    } else {
                        output.push_str("<pre>\n");
                    }
                } else {
                    output.push_str("</pre>\n")
                }
                continue;
            }
            if is_pre {
                xml_safe(&mut output, line);
                output.push('\n');
                continue;
            }
            // See 5.5.2 "Unordered list items"
            if line.starts_with("* ") {
                if !is_list {
                    output.push_str("<ul>\n");
                    is_list = true;
                }
                output.push_str("<li>");
                xml_safe(&mut output, &line[2..].trim());
                output.push_str("</li>\n");
                continue;
            } else {
                if is_list {
                    output.push_str("</ul>\n");
                }
                is_list = false;
            }
            // 5.5.1 heading lines
            if line.starts_with("#") {
                let mut count = 0;
                for ch in line.chars() {
                    if ch == '#' {
                        count += 1;
                        // Limit to 3 headers.
                        if count == 3 {
                            break;
                        }
                    }
                }
                // String allocation for readability
                output.push_str(&format!("<h{}>", count));
                xml_safe(&mut output, &line[count..].trim());
                output.push_str(&format!("</h{}>\n", count));
            // 5.5.3 Quote lines
            } else if line.starts_with(">") {
                output.push_str("<q>");
                xml_safe(&mut output, &line[1..]);
                output.push_str("</q><br>\n");
            } else if line.starts_with("=>") {
                let mut i = line[2..].split_whitespace();
                let first: &str = i.next().unwrap_or("");
                // inefficient
                let second: String = i.collect::<Vec<&str>>().join(" ");
                // This is much slower than surrounding code
                // TODO consider blacklist
                let parsed = Url::parse(first);
                let mut is_image = false;
                if parsed == Err(ParseError::RelativeUrlWithoutBase) {
                    let extension: &str = &first[first.len() - 4..first.len()].to_ascii_lowercase();
                    if self.inline_images && IMAGE_EXTENSIONS.contains(&extension) {
                        output.push_str("<img src=\"");
                        is_image = true;
                    } else {
                        output.push_str("<a href=\"");
                    }
                    let relative_url = String::new();
                    xml_safe(&mut output, first);
                    output.push_str(&relative_url);
                } else {
                    output.push_str("<a href=\"");
                }
                if let Ok(p) = parsed {
                    if p.scheme() == "gemini" {
                        // TODO FIX
                        if let Some(s) = &self.proxy_url {
                            // Never fail, just use blank string if cant parse
                            let join =
                                |a: &Url, b: Url| -> Result<String, Box<dyn std::error::Error>> {
                                    Ok(a.join(b.host_str().ok_or("err")?)?
                                        .join(b.path())?
                                        .as_str()
                                        .to_string())
                                };
                            let proxied = join(s, p).unwrap_or("".to_string()); // Dont fail
                            output.push_str(&proxied);
                        } else {
                            output.push_str(p.as_str());
                        }
                    } else {
                        output.push_str(p.as_str());
                    }
                }
                let link_text = match second.as_str() {
                    "" => first,
                    t => t,
                };
                if !is_image {
                    output.push_str("\">");
                    xml_safe(&mut output, link_text);
                    output.push_str("</a>");
                } else {
                    output.push_str("\" alt=\"");
                    xml_safe(&mut output, link_text);
                    output.push_str("\">");
                }
                output.push_str("<br>\n");
            } else {
                xml_safe(&mut output, line);
                output.push_str("<br>\n");
            }
        }
        // Check outstanding tags that need to be closed
        if is_list {
            output.push_str("</ul>");
        }
        if is_pre {
            output.push_str("</pre>")
        }
        return output;
    }
}

pub fn xml_safe(dest: &mut String, text: &str) {
    for c in text.chars() {
        match c {
            '&' => dest.push_str("&amp;"),
            '<' => dest.push_str("&lt;"),
            '>' => dest.push_str("&gt;"),
            '"' => dest.push_str("&quot;"),
            '\'' => dest.push_str("&#39;"),
            _ => dest.push(c),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_basic() {
        assert_eq!(
            GeminiConverter::new("hello world").to_html(),
            "hello world<br>\n"
        )
    }

    #[test]
    fn test_unsafe_html() {
        assert_eq!(
            GeminiConverter::new("<b>hacked</b>").to_html(),
            "&lt;b&gt;hacked&lt;/b&gt;<br>\n"
        );
        // TODO add more tests
    }

    #[test]
    fn test_whitespace() {
        assert_eq!(
            GeminiConverter::new("\n\n\n").to_html(),
            "<br>\n<br>\n<br>\n"
        )
    }

    #[test]
    fn test_list() {
        assert_eq!(
            GeminiConverter::new("hi\n* cool\n* vibes\nok").to_html(),
            "hi<br>\n<ul>\n<li>cool</li>\n<li>vibes</li>\n</ul>\nok<br>\n"
        )
    }

    #[test]
    fn test_quote() {
        assert_eq!(
            GeminiConverter::new("> stay cool\n-coolguy").to_html(),
            "<q> stay cool</q><br>\n-coolguy<br>\n"
        )
    }
    #[test]
    fn test_headers() {
        assert_eq!(
            GeminiConverter::new("#header").to_html(),
            "<h1>header</h1>\n"
        );
        assert_eq!(
            GeminiConverter::new("##header").to_html(),
            "<h2>header</h2>\n"
        );
        assert_eq!(
            GeminiConverter::new("### header").to_html(),
            "<h3>header</h3>\n"
        );
        assert_eq!(
            GeminiConverter::new("####header").to_html(),
            "<h3>#header</h3>\n"
        );
    }

    #[test]
    fn test_pre() {
        assert_eq!(
            GeminiConverter::new("```\nhello world\n```").to_html(),
            "<pre>\nhello world\n</pre>\n"
        );
    }

    #[test]
    fn test_pre_alt() {
        assert_eq!(
            GeminiConverter::new("```alt\"\nhello world\n```").to_html(),
            "<pre alt=\"alt&quot;\">\nhello world\n</pre>\n"
        );
    }

    #[test]
    fn test_hyperlink() {
        assert_eq!(
            // TODO resolve trailing slash issue
            GeminiConverter::new("=> https://google.com").to_html(),
            "<a href=\"https://google.com/\">https://google.com</a><br>\n"
        )
    }

    #[test]
    fn test_replace_image() {
        assert_eq!(
            GeminiConverter::new("=> something.jpg cool pic")
                .inline_images(true)
                .to_html(),
            "<img src=\"something.jpg\" alt=\"cool pic\"><br>\n"
        )
    }

    #[test]
    fn test_proxy() {
        assert_eq!(
            GeminiConverter::new("=> gemini://alexwrites.xyz")
            .proxy_url("https://flounder.online/proxy/")
            .to_html(),
            "<a href=\"https://flounder.online/proxy/alexwrites.xyz\">gemini://alexwrites.xyz</a><br>\n"
            )
    }
}