Skip to main content

html_to_markdown_rs/options/
validation.rs

1//! Validation and parsing utilities for option enums.
2//!
3//! This module provides parsing and serialization logic for configuration
4//! enums (HeadingStyle, ListIndentType, etc.) with string conversion support.
5
6/// Heading style options for Markdown output.
7///
8/// Controls how headings (h1-h6) are rendered in the output Markdown.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum HeadingStyle {
11    /// Underlined style (=== for h1, --- for h2).
12    Underlined,
13    /// ATX style (# for h1, ## for h2, etc.). Default.
14    #[default]
15    Atx,
16    /// ATX closed style (# title #, with closing hashes).
17    AtxClosed,
18}
19
20impl HeadingStyle {
21    /// Parse a heading style from a string.
22    ///
23    /// Accepts "atx", "atxclosed", or defaults to Underlined.
24    /// Input is normalized (lowercased, alphanumeric only).
25    #[must_use]
26    pub fn parse(value: &str) -> Self {
27        match normalize_token(value).as_str() {
28            "atx" => Self::Atx,
29            "atxclosed" => Self::AtxClosed,
30            _ => Self::Underlined,
31        }
32    }
33}
34
35/// List indentation character type.
36///
37/// Controls whether list items are indented with spaces or tabs.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub enum ListIndentType {
40    /// Use spaces for indentation. Default. Width controlled by `list_indent_width`.
41    #[default]
42    Spaces,
43    /// Use tabs for indentation.
44    Tabs,
45}
46
47impl ListIndentType {
48    /// Parse a list indentation type from a string.
49    ///
50    /// Accepts "tabs" or defaults to Spaces.
51    /// Input is normalized (lowercased, alphanumeric only).
52    #[must_use]
53    pub fn parse(value: &str) -> Self {
54        match normalize_token(value).as_str() {
55            "tabs" => Self::Tabs,
56            _ => Self::Spaces,
57        }
58    }
59}
60
61/// Whitespace handling strategy during conversion.
62///
63/// Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
65pub enum WhitespaceMode {
66    /// Collapse multiple whitespace characters to single spaces. Default. Matches browser behavior.
67    #[default]
68    Normalized,
69    /// Preserve all whitespace exactly as it appears in the HTML.
70    Strict,
71}
72
73impl WhitespaceMode {
74    /// Parse a whitespace mode from a string.
75    ///
76    /// Accepts "strict" or defaults to Normalized.
77    /// Input is normalized (lowercased, alphanumeric only).
78    #[must_use]
79    pub fn parse(value: &str) -> Self {
80        match normalize_token(value).as_str() {
81            "strict" => Self::Strict,
82            _ => Self::Normalized,
83        }
84    }
85}
86
87/// Line break syntax in Markdown output.
88///
89/// Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
91pub enum NewlineStyle {
92    /// Two trailing spaces at end of line. Default. Standard Markdown syntax.
93    #[default]
94    Spaces,
95    /// Backslash at end of line. Alternative Markdown syntax.
96    Backslash,
97}
98
99impl NewlineStyle {
100    /// Parse a newline style from a string.
101    ///
102    /// Accepts "backslash" or defaults to Spaces.
103    /// Input is normalized (lowercased, alphanumeric only).
104    #[must_use]
105    pub fn parse(value: &str) -> Self {
106        match normalize_token(value).as_str() {
107            "backslash" => Self::Backslash,
108            _ => Self::Spaces,
109        }
110    }
111}
112
113/// Code block fence style in Markdown output.
114///
115/// Determines how code blocks (`<pre><code>`) are rendered in Markdown.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
117pub enum CodeBlockStyle {
118    /// Indented code blocks (4 spaces). Default. `CommonMark` standard.
119    #[default]
120    Indented,
121    /// Fenced code blocks with backticks (```). Supports language hints.
122    Backticks,
123    /// Fenced code blocks with tildes (~~~). Supports language hints.
124    Tildes,
125}
126
127impl CodeBlockStyle {
128    /// Parse a code block style from a string.
129    ///
130    /// Accepts "backticks", "tildes", or defaults to Indented.
131    /// Input is normalized (lowercased, alphanumeric only).
132    #[must_use]
133    pub fn parse(value: &str) -> Self {
134        match normalize_token(value).as_str() {
135            "backticks" => Self::Backticks,
136            "tildes" => Self::Tildes,
137            _ => Self::Indented,
138        }
139    }
140}
141
142/// Highlight rendering style for `<mark>` elements.
143///
144/// Controls how highlighted text is rendered in Markdown output.
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
146pub enum HighlightStyle {
147    /// Double equals syntax (==text==). Default. Pandoc-compatible.
148    #[default]
149    DoubleEqual,
150    /// Preserve as HTML (==text==). Original HTML tag.
151    Html,
152    /// Render as bold (**text**). Uses strong emphasis.
153    Bold,
154    /// Strip formatting, render as plain text. No markup.
155    None,
156}
157
158impl HighlightStyle {
159    /// Parse a highlight style from a string.
160    ///
161    /// Accepts "doubleequal", "html", "bold", "none", or defaults to None.
162    /// Input is normalized (lowercased, alphanumeric only).
163    #[must_use]
164    pub fn parse(value: &str) -> Self {
165        match normalize_token(value).as_str() {
166            "doubleequal" => Self::DoubleEqual,
167            "html" => Self::Html,
168            "bold" => Self::Bold,
169            "none" => Self::None,
170            _ => Self::None,
171        }
172    }
173}
174
175/// Output format for conversion.
176///
177/// Specifies the target markup language format for the conversion output.
178#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
179pub enum OutputFormat {
180    /// Standard Markdown (CommonMark compatible). Default.
181    #[default]
182    Markdown,
183    /// Djot lightweight markup language.
184    Djot,
185}
186
187impl OutputFormat {
188    /// Parse an output format from a string.
189    ///
190    /// Accepts "djot" or defaults to Markdown.
191    /// Input is normalized (lowercased, alphanumeric only).
192    #[must_use]
193    pub fn parse(value: &str) -> Self {
194        match normalize_token(value).as_str() {
195            "djot" => Self::Djot,
196            _ => Self::Markdown,
197        }
198    }
199}
200
201/// Normalize a configuration string by lowercasing and removing non-alphanumeric characters.
202pub(crate) fn normalize_token(value: &str) -> String {
203    let mut out = String::with_capacity(value.len());
204    for ch in value.chars() {
205        if ch.is_ascii_alphanumeric() {
206            out.push(ch.to_ascii_lowercase());
207        }
208    }
209    out
210}
211
212#[cfg(any(feature = "serde", feature = "metadata"))]
213mod serde_impls {
214    use super::{
215        CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, OutputFormat, WhitespaceMode,
216    };
217    use serde::{Deserialize, Serialize, Serializer};
218
219    macro_rules! impl_deserialize_from_parse {
220        ($ty:ty, $parser:expr) => {
221            impl<'de> Deserialize<'de> for $ty {
222                fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
223                where
224                    D: serde::Deserializer<'de>,
225                {
226                    let value = String::deserialize(deserializer)?;
227                    Ok($parser(&value))
228                }
229            }
230        };
231    }
232
233    impl_deserialize_from_parse!(HeadingStyle, HeadingStyle::parse);
234    impl_deserialize_from_parse!(ListIndentType, ListIndentType::parse);
235    impl_deserialize_from_parse!(WhitespaceMode, WhitespaceMode::parse);
236    impl_deserialize_from_parse!(NewlineStyle, NewlineStyle::parse);
237    impl_deserialize_from_parse!(CodeBlockStyle, CodeBlockStyle::parse);
238    impl_deserialize_from_parse!(HighlightStyle, HighlightStyle::parse);
239    impl_deserialize_from_parse!(OutputFormat, OutputFormat::parse);
240
241    // Serialize implementations that convert enum variants to their string representations
242    impl Serialize for HeadingStyle {
243        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
244        where
245            S: Serializer,
246        {
247            let s = match self {
248                Self::Underlined => "underlined",
249                Self::Atx => "atx",
250                Self::AtxClosed => "atxclosed",
251            };
252            serializer.serialize_str(s)
253        }
254    }
255
256    impl Serialize for ListIndentType {
257        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
258        where
259            S: Serializer,
260        {
261            let s = match self {
262                Self::Spaces => "spaces",
263                Self::Tabs => "tabs",
264            };
265            serializer.serialize_str(s)
266        }
267    }
268
269    impl Serialize for WhitespaceMode {
270        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
271        where
272            S: Serializer,
273        {
274            let s = match self {
275                Self::Normalized => "normalized",
276                Self::Strict => "strict",
277            };
278            serializer.serialize_str(s)
279        }
280    }
281
282    impl Serialize for NewlineStyle {
283        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
284        where
285            S: Serializer,
286        {
287            let s = match self {
288                Self::Spaces => "spaces",
289                Self::Backslash => "backslash",
290            };
291            serializer.serialize_str(s)
292        }
293    }
294
295    impl Serialize for CodeBlockStyle {
296        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
297        where
298            S: Serializer,
299        {
300            let s = match self {
301                Self::Indented => "indented",
302                Self::Backticks => "backticks",
303                Self::Tildes => "tildes",
304            };
305            serializer.serialize_str(s)
306        }
307    }
308
309    impl Serialize for HighlightStyle {
310        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
311        where
312            S: Serializer,
313        {
314            let s = match self {
315                Self::DoubleEqual => "doubleequal",
316                Self::Html => "html",
317                Self::Bold => "bold",
318                Self::None => "none",
319            };
320            serializer.serialize_str(s)
321        }
322    }
323
324    impl Serialize for OutputFormat {
325        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
326        where
327            S: Serializer,
328        {
329            let s = match self {
330                Self::Markdown => "markdown",
331                Self::Djot => "djot",
332            };
333            serializer.serialize_str(s)
334        }
335    }
336}
337
338#[cfg(all(test, any(feature = "serde", feature = "metadata")))]
339mod tests {
340    use super::*;
341
342    #[test]
343    fn test_enum_serialization() {
344        // Test that enums serialize to lowercase strings
345        let heading = HeadingStyle::AtxClosed;
346        let json = serde_json::to_string(&heading).expect("Failed to serialize");
347        assert_eq!(json, r#""atxclosed""#);
348
349        let list_indent = ListIndentType::Tabs;
350        let json = serde_json::to_string(&list_indent).expect("Failed to serialize");
351        assert_eq!(json, r#""tabs""#);
352
353        let whitespace = WhitespaceMode::Strict;
354        let json = serde_json::to_string(&whitespace).expect("Failed to serialize");
355        assert_eq!(json, r#""strict""#);
356    }
357
358    #[test]
359    fn test_enum_deserialization() {
360        // Test that enums deserialize from strings (case insensitive)
361        let heading: HeadingStyle = serde_json::from_str(r#""atxclosed""#).expect("Failed");
362        assert_eq!(heading, HeadingStyle::AtxClosed);
363
364        let heading: HeadingStyle = serde_json::from_str(r#""ATXCLOSED""#).expect("Failed");
365        assert_eq!(heading, HeadingStyle::AtxClosed);
366
367        let list_indent: ListIndentType = serde_json::from_str(r#""tabs""#).expect("Failed");
368        assert_eq!(list_indent, ListIndentType::Tabs);
369    }
370}