Skip to main content

html_to_markdown_rs/options/
validation.rs

1//! Validation and parsing utilities for option enums.
2//!
3//! This module provides parsing and serialization logic for configuration
4//! enums (HeadingStyle, ListIndentType, etc.) with string conversion support.
5
6/// Heading style options for Markdown output.
7///
8/// Controls how headings (h1-h6) are rendered in the output Markdown.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum HeadingStyle {
11    /// Underlined style (=== for h1, --- for h2).
12    Underlined,
13    /// ATX style (# for h1, ## for h2, etc.). Default.
14    #[default]
15    Atx,
16    /// ATX closed style (# title #, with closing hashes).
17    AtxClosed,
18}
19
20impl HeadingStyle {
21    /// Parse a heading style from a string.
22    ///
23    /// Accepts "atx", "atxclosed", or defaults to Underlined.
24    /// Input is normalized (lowercased, alphanumeric only).
25    #[must_use]
26    pub fn parse(value: &str) -> Self {
27        match normalize_token(value).as_str() {
28            "atx" => Self::Atx,
29            "atxclosed" => Self::AtxClosed,
30            _ => Self::Underlined,
31        }
32    }
33}
34
35/// List indentation character type.
36///
37/// Controls whether list items are indented with spaces or tabs.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub enum ListIndentType {
40    /// Use spaces for indentation. Default. Width controlled by `list_indent_width`.
41    #[default]
42    Spaces,
43    /// Use tabs for indentation.
44    Tabs,
45}
46
47impl ListIndentType {
48    /// Parse a list indentation type from a string.
49    ///
50    /// Accepts "tabs" or defaults to Spaces.
51    /// Input is normalized (lowercased, alphanumeric only).
52    #[must_use]
53    pub fn parse(value: &str) -> Self {
54        match normalize_token(value).as_str() {
55            "tabs" => Self::Tabs,
56            _ => Self::Spaces,
57        }
58    }
59}
60
61/// Whitespace handling strategy during conversion.
62///
63/// Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
65pub enum WhitespaceMode {
66    /// Collapse multiple whitespace characters to single spaces. Default. Matches browser behavior.
67    #[default]
68    Normalized,
69    /// Preserve all whitespace exactly as it appears in the HTML.
70    Strict,
71}
72
73impl WhitespaceMode {
74    /// Parse a whitespace mode from a string.
75    ///
76    /// Accepts "strict" or defaults to Normalized.
77    /// Input is normalized (lowercased, alphanumeric only).
78    #[must_use]
79    pub fn parse(value: &str) -> Self {
80        match normalize_token(value).as_str() {
81            "strict" => Self::Strict,
82            _ => Self::Normalized,
83        }
84    }
85}
86
87/// Line break syntax in Markdown output.
88///
89/// Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
91pub enum NewlineStyle {
92    /// Two trailing spaces at end of line. Default. Standard Markdown syntax.
93    #[default]
94    Spaces,
95    /// Backslash at end of line. Alternative Markdown syntax.
96    Backslash,
97}
98
99impl NewlineStyle {
100    /// Parse a newline style from a string.
101    ///
102    /// Accepts "backslash" or defaults to Spaces.
103    /// Input is normalized (lowercased, alphanumeric only).
104    #[must_use]
105    pub fn parse(value: &str) -> Self {
106        match normalize_token(value).as_str() {
107            "backslash" => Self::Backslash,
108            _ => Self::Spaces,
109        }
110    }
111}
112
113/// Code block fence style in Markdown output.
114///
115/// Determines how code blocks (`<pre><code>`) are rendered in Markdown.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
117pub enum CodeBlockStyle {
118    /// Indented code blocks (4 spaces). Default. `CommonMark` standard.
119    #[default]
120    Indented,
121    /// Fenced code blocks with backticks (```). Supports language hints.
122    Backticks,
123    /// Fenced code blocks with tildes (~~~). Supports language hints.
124    Tildes,
125}
126
127impl CodeBlockStyle {
128    /// Parse a code block style from a string.
129    ///
130    /// Accepts "backticks", "tildes", or defaults to Indented.
131    /// Input is normalized (lowercased, alphanumeric only).
132    #[must_use]
133    pub fn parse(value: &str) -> Self {
134        match normalize_token(value).as_str() {
135            "backticks" => Self::Backticks,
136            "tildes" => Self::Tildes,
137            _ => Self::Indented,
138        }
139    }
140}
141
142/// Highlight rendering style for `<mark>` elements.
143///
144/// Controls how highlighted text is rendered in Markdown output.
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
146pub enum HighlightStyle {
147    /// Double equals syntax (==text==). Default. Pandoc-compatible.
148    #[default]
149    DoubleEqual,
150    /// Preserve as HTML (==text==). Original HTML tag.
151    Html,
152    /// Render as bold (**text**). Uses strong emphasis.
153    Bold,
154    /// Strip formatting, render as plain text. No markup.
155    None,
156}
157
158impl HighlightStyle {
159    /// Parse a highlight style from a string.
160    ///
161    /// Accepts "doubleequal", "html", "bold", "none", or defaults to None.
162    /// Input is normalized (lowercased, alphanumeric only).
163    #[must_use]
164    pub fn parse(value: &str) -> Self {
165        match normalize_token(value).as_str() {
166            "doubleequal" => Self::DoubleEqual,
167            "html" => Self::Html,
168            "bold" => Self::Bold,
169            "none" => Self::None,
170            _ => Self::None,
171        }
172    }
173}
174
175/// Output format for conversion.
176///
177/// Specifies the target markup language format for the conversion output.
178#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
179pub enum OutputFormat {
180    /// Standard Markdown (CommonMark compatible). Default.
181    #[default]
182    Markdown,
183    /// Djot lightweight markup language.
184    Djot,
185    /// Plain text output (no markup, visible text only).
186    Plain,
187}
188
189impl OutputFormat {
190    /// Parse an output format from a string.
191    ///
192    /// Accepts "djot" or defaults to Markdown.
193    /// Input is normalized (lowercased, alphanumeric only).
194    #[must_use]
195    pub fn parse(value: &str) -> Self {
196        match normalize_token(value).as_str() {
197            "djot" => Self::Djot,
198            "plain" | "plaintext" | "text" => Self::Plain,
199            _ => Self::Markdown,
200        }
201    }
202}
203
204/// Normalize a configuration string by lowercasing and removing non-alphanumeric characters.
205pub(crate) fn normalize_token(value: &str) -> String {
206    let mut out = String::with_capacity(value.len());
207    for ch in value.chars() {
208        if ch.is_ascii_alphanumeric() {
209            out.push(ch.to_ascii_lowercase());
210        }
211    }
212    out
213}
214
215#[cfg(any(feature = "serde", feature = "metadata"))]
216mod serde_impls {
217    use super::{
218        CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, OutputFormat, WhitespaceMode,
219    };
220    use serde::{Deserialize, Serialize, Serializer};
221
222    macro_rules! impl_deserialize_from_parse {
223        ($ty:ty, $parser:expr) => {
224            impl<'de> Deserialize<'de> for $ty {
225                fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
226                where
227                    D: serde::Deserializer<'de>,
228                {
229                    let value = String::deserialize(deserializer)?;
230                    Ok($parser(&value))
231                }
232            }
233        };
234    }
235
236    impl_deserialize_from_parse!(HeadingStyle, HeadingStyle::parse);
237    impl_deserialize_from_parse!(ListIndentType, ListIndentType::parse);
238    impl_deserialize_from_parse!(WhitespaceMode, WhitespaceMode::parse);
239    impl_deserialize_from_parse!(NewlineStyle, NewlineStyle::parse);
240    impl_deserialize_from_parse!(CodeBlockStyle, CodeBlockStyle::parse);
241    impl_deserialize_from_parse!(HighlightStyle, HighlightStyle::parse);
242    impl_deserialize_from_parse!(OutputFormat, OutputFormat::parse);
243
244    // Serialize implementations that convert enum variants to their string representations
245    impl Serialize for HeadingStyle {
246        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
247        where
248            S: Serializer,
249        {
250            let s = match self {
251                Self::Underlined => "underlined",
252                Self::Atx => "atx",
253                Self::AtxClosed => "atxclosed",
254            };
255            serializer.serialize_str(s)
256        }
257    }
258
259    impl Serialize for ListIndentType {
260        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
261        where
262            S: Serializer,
263        {
264            let s = match self {
265                Self::Spaces => "spaces",
266                Self::Tabs => "tabs",
267            };
268            serializer.serialize_str(s)
269        }
270    }
271
272    impl Serialize for WhitespaceMode {
273        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
274        where
275            S: Serializer,
276        {
277            let s = match self {
278                Self::Normalized => "normalized",
279                Self::Strict => "strict",
280            };
281            serializer.serialize_str(s)
282        }
283    }
284
285    impl Serialize for NewlineStyle {
286        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
287        where
288            S: Serializer,
289        {
290            let s = match self {
291                Self::Spaces => "spaces",
292                Self::Backslash => "backslash",
293            };
294            serializer.serialize_str(s)
295        }
296    }
297
298    impl Serialize for CodeBlockStyle {
299        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
300        where
301            S: Serializer,
302        {
303            let s = match self {
304                Self::Indented => "indented",
305                Self::Backticks => "backticks",
306                Self::Tildes => "tildes",
307            };
308            serializer.serialize_str(s)
309        }
310    }
311
312    impl Serialize for HighlightStyle {
313        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
314        where
315            S: Serializer,
316        {
317            let s = match self {
318                Self::DoubleEqual => "doubleequal",
319                Self::Html => "html",
320                Self::Bold => "bold",
321                Self::None => "none",
322            };
323            serializer.serialize_str(s)
324        }
325    }
326
327    impl Serialize for OutputFormat {
328        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
329        where
330            S: Serializer,
331        {
332            let s = match self {
333                Self::Markdown => "markdown",
334                Self::Djot => "djot",
335                Self::Plain => "plain",
336            };
337            serializer.serialize_str(s)
338        }
339    }
340}
341
342#[cfg(all(test, any(feature = "serde", feature = "metadata")))]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn test_enum_serialization() {
348        // Test that enums serialize to lowercase strings
349        let heading = HeadingStyle::AtxClosed;
350        let json = serde_json::to_string(&heading).expect("Failed to serialize");
351        assert_eq!(json, r#""atxclosed""#);
352
353        let list_indent = ListIndentType::Tabs;
354        let json = serde_json::to_string(&list_indent).expect("Failed to serialize");
355        assert_eq!(json, r#""tabs""#);
356
357        let whitespace = WhitespaceMode::Strict;
358        let json = serde_json::to_string(&whitespace).expect("Failed to serialize");
359        assert_eq!(json, r#""strict""#);
360    }
361
362    #[test]
363    fn test_enum_deserialization() {
364        // Test that enums deserialize from strings (case insensitive)
365        let heading: HeadingStyle = serde_json::from_str(r#""atxclosed""#).expect("Failed");
366        assert_eq!(heading, HeadingStyle::AtxClosed);
367
368        let heading: HeadingStyle = serde_json::from_str(r#""ATXCLOSED""#).expect("Failed");
369        assert_eq!(heading, HeadingStyle::AtxClosed);
370
371        let list_indent: ListIndentType = serde_json::from_str(r#""tabs""#).expect("Failed");
372        assert_eq!(list_indent, ListIndentType::Tabs);
373    }
374}