skymark 0.1.1

HTML-to-Markdown converter prioritizing proper conversion for human readability
Documentation
use regex::Regex;
use std::sync::LazyLock;

static DEFAULT_GLOBAL_ESCAPE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r#"[\\`*_~\[\]]"#).expect("default global escape regex is valid"));
static DEFAULT_LINE_START_ESCAPE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r#"(?m)^(\s*?)((?:\+\s)|(?:[=>-])|(?:#{1,6}\s))|(?:(\d+)(\.\s))"#)
        .expect("default line-start escape regex is valid")
});
static DOCTYPE_REPLACEMENT: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r#"(?im)^<!DOCTYPE.*>"#).expect("doctype regex is valid"));

/// Code block rendering style.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CodeBlockStyle {
    /// Render code blocks using a fence marker.
    Fenced,
    /// Render code blocks using four-space indentation.
    Indented,
}

/// Converter options.
#[derive(Clone, Debug)]
pub struct Options {
    /// Code block fence string.
    pub code_fence: String,
    /// Bullet marker for unordered lists.
    pub bullet_marker: String,
    /// Indentation string for nested lists.
    pub indent: String,
    /// Code block style.
    pub code_block_style: CodeBlockStyle,
    /// Delimiter for emphasis.
    pub em_delimiter: String,
    /// Delimiter for strong text.
    pub strong_delimiter: String,
    /// Delimiter for strike text.
    pub strike_delimiter: String,
    /// Elements to ignore entirely.
    pub ignore: Vec<String>,
    /// Elements treated as blocks.
    pub block_elements: Vec<String>,
    /// Maximum consecutive newlines kept in output.
    pub max_consecutive_newlines: usize,
    /// Escape pattern applied at line starts.
    pub line_start_escape: (Regex, String),
    /// Escape pattern applied globally.
    pub global_escape: (Regex, String),
    /// User-provided text replacement rules.
    pub text_replace: Vec<(Regex, String)>,
    /// Preserve `data:` images.
    pub keep_data_images: bool,
    /// Emit reference links instead of inline links.
    pub use_link_reference_definitions: bool,
    /// Use autolinks like `<https://...>` when possible.
    pub use_inline_links: bool,
}

impl Default for Options {
    #[inline]
    fn default() -> Self {
        Self {
            code_fence: "```".to_owned(),
            bullet_marker: "*".to_owned(),
            indent: "  ".to_owned(),
            code_block_style: CodeBlockStyle::Fenced,
            em_delimiter: "_".to_owned(),
            strong_delimiter: "**".to_owned(),
            strike_delimiter: "~~".to_owned(),
            ignore: Vec::new(),
            block_elements: Vec::new(),
            max_consecutive_newlines: 3,
            line_start_escape: (DEFAULT_LINE_START_ESCAPE.clone(), "$1$3\\$2$4".to_owned()),
            global_escape: (DEFAULT_GLOBAL_ESCAPE.clone(), r#"\$0"#.to_owned()),
            text_replace: Vec::new(),
            keep_data_images: false,
            use_link_reference_definitions: false,
            use_inline_links: true,
        }
    }
}

impl Options {
    pub(crate) fn add_default_text_replacements(&mut self) {
        self.text_replace
            .push((DOCTYPE_REPLACEMENT.clone(), String::new()));
    }
}