skymark 0.1.0

HTML-to-Markdown converter prioritizing proper conversion for human readability
Documentation
pub(crate) fn trim_newlines(s: &str) -> String {
    s.trim_matches(['\n', '\r']).to_owned()
}

pub(crate) fn surround(source: &str, surround_str: &str) -> String {
    format!("{surround_str}{source}{surround_str}")
}

pub(crate) fn is_whitespace_only(s: &str) -> bool {
    !s.chars().any(|c| !c.is_whitespace())
}

pub(crate) fn split_special(s: &str) -> Vec<(String, String)> {
    let mut lines = Vec::new();
    let mut start = 0;
    let bytes = s.as_bytes();
    let mut index = 0;

    while index < bytes.len() {
        let newline = match bytes[index] {
            b'\r' if bytes.get(index + 1) == Some(&b'\n') => Some("\r\n"),
            b'\r' => Some("\r"),
            b'\n' => Some("\n"),
            _ => None,
        };

        if let Some(newline) = newline {
            lines.push((s[start..index].to_owned(), newline.to_owned()));
            index += newline.len();
            start = index;
        } else {
            index += 1;
        }
    }

    if start < s.len() {
        lines.push((s[start..].to_owned(), String::new()));
    } else if s.ends_with(['\n', '\r']) {
        lines.push((String::new(), String::new()));
    }

    lines
}

pub(crate) fn tag_surround(content: &str, surround_str: &str) -> String {
    let mut content = content.to_owned();
    if content.contains(surround_str) {
        content = content.replace(surround_str, "");
    }

    let mut result = String::new();
    for (text, newline) in split_special(&content) {
        let Some(start) = text.find(|c: char| !c.is_whitespace()) else {
            result.push_str(&text);
            result.push_str(&newline);
            continue;
        };
        let end = text
            .rfind(|c: char| !c.is_whitespace())
            .expect("start implies end exists");

        let leading = if start > 0 {
            &text[start - 1..start]
        } else {
            ""
        };
        let trailing = if end + 1 < text.len() {
            &text[end + 1..end + 2]
        } else {
            ""
        };
        result.push_str(leading);
        result.push_str(surround_str);
        result.push_str(&text[start..=end]);
        result.push_str(surround_str);
        result.push_str(trailing);
        result.push_str(&newline);
    }

    result
}

pub(crate) fn get_trailing_whitespace_info(s: &str) -> (usize, usize) {
    let mut whitespace = 0;
    let mut newlines = 0;

    for ch in s.chars().rev().take(10) {
        if !ch.is_whitespace() {
            break;
        }
        whitespace += 1;
        if matches!(ch, '\n' | '\r') {
            newlines += 1;
        }
    }

    (whitespace, newlines)
}

pub(crate) fn collapse_whitespace(s: &str) -> String {
    let mut result = String::new();
    let mut in_whitespace = false;

    for ch in s.chars() {
        if ch.is_whitespace() {
            if !in_whitespace {
                result.push(' ');
                in_whitespace = true;
            }
        } else {
            result.push(ch);
            in_whitespace = false;
        }
    }

    result
}

pub(crate) fn markdown_url_encode(href: &str) -> String {
    href.chars()
        .map(|chr| match chr {
            '(' => "%28".to_owned(),
            ')' => "%29".to_owned(),
            '_' => "%5F".to_owned(),
            '*' => "%2A".to_owned(),
            _ => chr.to_string(),
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::{markdown_url_encode, split_special, tag_surround};

    #[test]
    fn tag_surround_preserves_outer_spaces() {
        assert_eq!(tag_surround(" Label: ", "**"), " **Label:** ");
    }

    #[test]
    fn tag_surround_removes_nested_delimiters() {
        assert_eq!(tag_surround("My **bold** text", "**"), "**My bold text**");
    }

    #[test]
    fn markdown_url_encoding_matches_node_behavior() {
        assert_eq!(
            markdown_url_encode("http://www.github.com/crosstype/**/_test(123)"),
            "http://www.github.com/crosstype/%2A%2A/%5Ftest%28123%29"
        );
    }

    #[test]
    fn split_special_preserves_mixed_newlines() {
        assert_eq!(
            split_special("a\rb\nc\r\nd"),
            vec![
                ("a".to_owned(), "\r".to_owned()),
                ("b".to_owned(), "\n".to_owned()),
                ("c".to_owned(), "\r\n".to_owned()),
                ("d".to_owned(), String::new()),
            ]
        );
    }
}