skymark 0.1.1

HTML-to-Markdown converter prioritizing proper conversion for human readability
Documentation
pub(crate) fn trim_newlines(s: &str) -> String {
    s.trim_matches(['\n', '\r']).to_owned()
}

pub(crate) fn surround(source: &str, surround_str: &str) -> String {
    format!("{surround_str}{source}{surround_str}")
}

pub(crate) fn is_whitespace_only(s: &str) -> bool {
    !s.chars().any(|c| !c.is_whitespace())
}

pub(crate) fn split_special(s: &str) -> Vec<(String, String)> {
    let mut lines = Vec::new();
    let mut line = String::new();
    let mut chars = s.chars().peekable();

    while let Some(ch) = chars.next() {
        match ch {
            '\r' => {
                let mut newline = String::from("\r");
                if chars.next_if_eq(&'\n').is_some() {
                    newline.push('\n');
                }
                lines.push((std::mem::take(&mut line), newline));
            }
            '\n' => lines.push((std::mem::take(&mut line), "\n".to_owned())),
            _ => line.push(ch),
        }
    }

    if !line.is_empty() {
        lines.push((line, String::new()));
    } else if s.ends_with(['\n', '\r']) {
        lines.push((String::new(), String::new()));
    }

    lines
}

pub(crate) fn tag_surround(content: &str, surround_str: &str) -> String {
    let mut content = content.to_owned();
    if content.contains(surround_str) {
        content = content.replace(surround_str, "");
    }

    let mut result = String::new();
    for (text, newline) in split_special(&content) {
        let chars = text.chars().collect::<Vec<_>>();
        let Some(start) = chars.iter().position(|c| !c.is_whitespace()) else {
            result.push_str(&text);
            result.push_str(&newline);
            continue;
        };
        let end = chars
            .iter()
            .rposition(|c| !c.is_whitespace())
            .expect("start implies end exists");

        if start > 0 {
            result.push(chars[start - 1]);
        }
        result.push_str(surround_str);
        result.extend(chars[start..=end].iter());
        result.push_str(surround_str);
        if end + 1 < chars.len() {
            result.push(chars[end + 1]);
        }
        result.push_str(&newline);
    }

    result
}

pub(crate) fn get_trailing_whitespace_info(s: &str) -> (usize, usize) {
    let mut whitespace = 0;
    let mut newlines = 0;

    for ch in s.chars().rev().take(10) {
        if !ch.is_whitespace() {
            break;
        }
        whitespace += 1;
        if matches!(ch, '\n' | '\r') {
            newlines += 1;
        }
    }

    (whitespace, newlines)
}

pub(crate) fn collapse_whitespace(s: &str) -> String {
    let mut result = String::new();
    let mut in_whitespace = false;

    for ch in s.chars() {
        if ch.is_whitespace() {
            if !in_whitespace {
                result.push(' ');
                in_whitespace = true;
            }
        } else {
            result.push(ch);
            in_whitespace = false;
        }
    }

    result
}

pub(crate) fn markdown_url_encode(href: &str) -> String {
    href.chars()
        .map(|chr| match chr {
            '(' => "%28".to_owned(),
            ')' => "%29".to_owned(),
            '_' => "%5F".to_owned(),
            '*' => "%2A".to_owned(),
            _ => chr.to_string(),
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::{markdown_url_encode, split_special, tag_surround};

    #[test]
    fn tag_surround_preserves_outer_spaces() {
        assert_eq!(tag_surround(" Label: ", "**"), " **Label:** ");
    }

    #[test]
    fn tag_surround_preserves_multibyte_outer_spaces() {
        assert_eq!(tag_surround(" 重要 ", "**"), " **重要** ");
    }

    #[test]
    fn tag_surround_removes_nested_delimiters() {
        assert_eq!(tag_surround("My **bold** text", "**"), "**My bold text**");
    }

    #[test]
    fn markdown_url_encoding_matches_node_behavior() {
        assert_eq!(
            markdown_url_encode("http://www.github.com/crosstype/**/_test(123)"),
            "http://www.github.com/crosstype/%2A%2A/%5Ftest%28123%29"
        );
    }

    #[test]
    fn split_special_preserves_mixed_newlines() {
        assert_eq!(
            split_special("a\rb\nc\r\nd"),
            vec![
                ("a".to_owned(), "\r".to_owned()),
                ("b".to_owned(), "\n".to_owned()),
                ("c".to_owned(), "\r\n".to_owned()),
                ("d".to_owned(), String::new()),
            ]
        );
    }
}