Skip to main content

html_to_markdown_rs/options/
validation.rs

1//! Validation and parsing utilities for option enums.
2//!
3//! This module provides parsing and serialization logic for configuration
4//! enums (HeadingStyle, ListIndentType, etc.) with string conversion support.
5
6/// Heading style options for Markdown output.
7///
8/// Controls how headings (h1-h6) are rendered in the output Markdown.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum HeadingStyle {
11    /// Underlined style (=== for h1, --- for h2).
12    Underlined,
13    /// ATX style (# for h1, ## for h2, etc.). Default.
14    #[default]
15    Atx,
16    /// ATX closed style (# title #, with closing hashes).
17    AtxClosed,
18}
19
20impl HeadingStyle {
21    /// Parse a heading style from a string.
22    ///
23    /// Accepts "atx", "atxclosed", or defaults to Underlined.
24    /// Input is normalized (lowercased, alphanumeric only).
25    #[must_use]
26    pub fn parse(value: &str) -> Self {
27        match normalize_token(value).as_str() {
28            "atx" => Self::Atx,
29            "atxclosed" => Self::AtxClosed,
30            _ => Self::Underlined,
31        }
32    }
33}
34
35/// List indentation character type.
36///
37/// Controls whether list items are indented with spaces or tabs.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub enum ListIndentType {
40    /// Use spaces for indentation. Default. Width controlled by `list_indent_width`.
41    #[default]
42    Spaces,
43    /// Use tabs for indentation.
44    Tabs,
45}
46
47impl ListIndentType {
48    /// Parse a list indentation type from a string.
49    ///
50    /// Accepts "tabs" or defaults to Spaces.
51    /// Input is normalized (lowercased, alphanumeric only).
52    #[must_use]
53    pub fn parse(value: &str) -> Self {
54        match normalize_token(value).as_str() {
55            "tabs" => Self::Tabs,
56            _ => Self::Spaces,
57        }
58    }
59}
60
61/// Whitespace handling strategy during conversion.
62///
63/// Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
65pub enum WhitespaceMode {
66    /// Collapse multiple whitespace characters to single spaces. Default. Matches browser behavior.
67    #[default]
68    Normalized,
69    /// Preserve all whitespace exactly as it appears in the HTML.
70    Strict,
71}
72
73impl WhitespaceMode {
74    /// Parse a whitespace mode from a string.
75    ///
76    /// Accepts "strict" or defaults to Normalized.
77    /// Input is normalized (lowercased, alphanumeric only).
78    #[must_use]
79    pub fn parse(value: &str) -> Self {
80        match normalize_token(value).as_str() {
81            "strict" => Self::Strict,
82            _ => Self::Normalized,
83        }
84    }
85}
86
87/// Line break syntax in Markdown output.
88///
89/// Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
90#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
91pub enum NewlineStyle {
92    /// Two trailing spaces at end of line. Default. Standard Markdown syntax.
93    #[default]
94    Spaces,
95    /// Backslash at end of line. Alternative Markdown syntax.
96    Backslash,
97}
98
99impl NewlineStyle {
100    /// Parse a newline style from a string.
101    ///
102    /// Accepts "backslash" or defaults to Spaces.
103    /// Input is normalized (lowercased, alphanumeric only).
104    #[must_use]
105    pub fn parse(value: &str) -> Self {
106        match normalize_token(value).as_str() {
107            "backslash" => Self::Backslash,
108            _ => Self::Spaces,
109        }
110    }
111}
112
113/// Code block fence style in Markdown output.
114///
115/// Determines how code blocks (`<pre><code>`) are rendered in Markdown.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
117pub enum CodeBlockStyle {
118    /// Indented code blocks (4 spaces). `CommonMark` standard.
119    Indented,
120    /// Fenced code blocks with backticks (```). Default (GFM). Supports language hints.
121    #[default]
122    Backticks,
123    /// Fenced code blocks with tildes (~~~). Supports language hints.
124    Tildes,
125}
126
127impl CodeBlockStyle {
128    /// Parse a code block style from a string.
129    ///
130    /// Accepts "backticks", "tildes", or defaults to Indented.
131    /// Input is normalized (lowercased, alphanumeric only).
132    #[must_use]
133    pub fn parse(value: &str) -> Self {
134        match normalize_token(value).as_str() {
135            "backticks" => Self::Backticks,
136            "tildes" => Self::Tildes,
137            _ => Self::Indented,
138        }
139    }
140}
141
142/// Highlight rendering style for `<mark>` elements.
143///
144/// Controls how highlighted text is rendered in Markdown output.
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
146pub enum HighlightStyle {
147    /// Double equals syntax (==text==). Default. Pandoc-compatible.
148    #[default]
149    DoubleEqual,
150    /// Preserve as HTML (==text==). Original HTML tag.
151    Html,
152    /// Render as bold (**text**). Uses strong emphasis.
153    Bold,
154    /// Strip formatting, render as plain text. No markup.
155    None,
156}
157
158impl HighlightStyle {
159    /// Parse a highlight style from a string.
160    ///
161    /// Accepts "doubleequal", "html", "bold", "none", or defaults to None.
162    /// Input is normalized (lowercased, alphanumeric only).
163    #[must_use]
164    pub fn parse(value: &str) -> Self {
165        match normalize_token(value).as_str() {
166            "doubleequal" => Self::DoubleEqual,
167            "html" => Self::Html,
168            "bold" => Self::Bold,
169            "none" => Self::None,
170            _ => Self::None,
171        }
172    }
173}
174
175/// Link rendering style in Markdown output.
176///
177/// Controls whether links and images use inline `[text](url)` syntax or
178/// reference-style `[text][1]` syntax with definitions collected at the end.
179#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
180pub enum LinkStyle {
181    /// Inline links: `[text](url)`. Default.
182    #[default]
183    Inline,
184    /// Reference-style links: `[text][1]` with `[1]: url` at end of document.
185    Reference,
186}
187
188impl LinkStyle {
189    /// Parse a link style from a string.
190    ///
191    /// Accepts "reference" or defaults to Inline.
192    /// Input is normalized (lowercased, alphanumeric only).
193    #[must_use]
194    pub fn parse(value: &str) -> Self {
195        match normalize_token(value).as_str() {
196            "reference" => Self::Reference,
197            _ => Self::Inline,
198        }
199    }
200}
201
202/// Output format for conversion.
203///
204/// Specifies the target markup language format for the conversion output.
205#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
206pub enum OutputFormat {
207    /// Standard Markdown (CommonMark compatible). Default.
208    #[default]
209    Markdown,
210    /// Djot lightweight markup language.
211    Djot,
212    /// Plain text output (no markup, visible text only).
213    Plain,
214}
215
216impl OutputFormat {
217    /// Parse an output format from a string.
218    ///
219    /// Accepts "djot" or defaults to Markdown.
220    /// Input is normalized (lowercased, alphanumeric only).
221    #[must_use]
222    pub fn parse(value: &str) -> Self {
223        match normalize_token(value).as_str() {
224            "djot" => Self::Djot,
225            "plain" | "plaintext" | "text" => Self::Plain,
226            _ => Self::Markdown,
227        }
228    }
229}
230
231/// Normalize a configuration string by lowercasing and removing non-alphanumeric characters.
232pub(crate) fn normalize_token(value: &str) -> String {
233    let mut out = String::with_capacity(value.len());
234    for ch in value.chars() {
235        if ch.is_ascii_alphanumeric() {
236            out.push(ch.to_ascii_lowercase());
237        }
238    }
239    out
240}
241
242#[cfg(any(feature = "serde", feature = "metadata"))]
243mod serde_impls {
244    use super::{
245        CodeBlockStyle, HeadingStyle, HighlightStyle, LinkStyle, ListIndentType, NewlineStyle, OutputFormat,
246        WhitespaceMode,
247    };
248    use serde::{Deserialize, Serialize, Serializer};
249
250    macro_rules! impl_deserialize_from_parse {
251        ($ty:ty, $parser:expr) => {
252            impl<'de> Deserialize<'de> for $ty {
253                fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
254                where
255                    D: serde::Deserializer<'de>,
256                {
257                    let value = String::deserialize(deserializer)?;
258                    Ok($parser(&value))
259                }
260            }
261        };
262    }
263
264    impl_deserialize_from_parse!(HeadingStyle, HeadingStyle::parse);
265    impl_deserialize_from_parse!(ListIndentType, ListIndentType::parse);
266    impl_deserialize_from_parse!(WhitespaceMode, WhitespaceMode::parse);
267    impl_deserialize_from_parse!(NewlineStyle, NewlineStyle::parse);
268    impl_deserialize_from_parse!(CodeBlockStyle, CodeBlockStyle::parse);
269    impl_deserialize_from_parse!(HighlightStyle, HighlightStyle::parse);
270    impl_deserialize_from_parse!(LinkStyle, LinkStyle::parse);
271    impl_deserialize_from_parse!(OutputFormat, OutputFormat::parse);
272
273    // Serialize implementations that convert enum variants to their string representations
274    impl Serialize for HeadingStyle {
275        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
276        where
277            S: Serializer,
278        {
279            let s = match self {
280                Self::Underlined => "underlined",
281                Self::Atx => "atx",
282                Self::AtxClosed => "atxclosed",
283            };
284            serializer.serialize_str(s)
285        }
286    }
287
288    impl Serialize for ListIndentType {
289        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
290        where
291            S: Serializer,
292        {
293            let s = match self {
294                Self::Spaces => "spaces",
295                Self::Tabs => "tabs",
296            };
297            serializer.serialize_str(s)
298        }
299    }
300
301    impl Serialize for WhitespaceMode {
302        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
303        where
304            S: Serializer,
305        {
306            let s = match self {
307                Self::Normalized => "normalized",
308                Self::Strict => "strict",
309            };
310            serializer.serialize_str(s)
311        }
312    }
313
314    impl Serialize for NewlineStyle {
315        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
316        where
317            S: Serializer,
318        {
319            let s = match self {
320                Self::Spaces => "spaces",
321                Self::Backslash => "backslash",
322            };
323            serializer.serialize_str(s)
324        }
325    }
326
327    impl Serialize for CodeBlockStyle {
328        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
329        where
330            S: Serializer,
331        {
332            let s = match self {
333                Self::Indented => "indented",
334                Self::Backticks => "backticks",
335                Self::Tildes => "tildes",
336            };
337            serializer.serialize_str(s)
338        }
339    }
340
341    impl Serialize for HighlightStyle {
342        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
343        where
344            S: Serializer,
345        {
346            let s = match self {
347                Self::DoubleEqual => "doubleequal",
348                Self::Html => "html",
349                Self::Bold => "bold",
350                Self::None => "none",
351            };
352            serializer.serialize_str(s)
353        }
354    }
355
356    impl Serialize for LinkStyle {
357        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
358        where
359            S: Serializer,
360        {
361            let s = match self {
362                Self::Inline => "inline",
363                Self::Reference => "reference",
364            };
365            serializer.serialize_str(s)
366        }
367    }
368
369    impl Serialize for OutputFormat {
370        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
371        where
372            S: Serializer,
373        {
374            let s = match self {
375                Self::Markdown => "markdown",
376                Self::Djot => "djot",
377                Self::Plain => "plain",
378            };
379            serializer.serialize_str(s)
380        }
381    }
382}
383
384#[cfg(all(test, any(feature = "serde", feature = "metadata")))]
385mod tests {
386    use super::*;
387
388    #[test]
389    fn test_enum_serialization() {
390        // Test that enums serialize to lowercase strings
391        let heading = HeadingStyle::AtxClosed;
392        let json = serde_json::to_string(&heading).expect("Failed to serialize");
393        assert_eq!(json, r#""atxclosed""#);
394
395        let list_indent = ListIndentType::Tabs;
396        let json = serde_json::to_string(&list_indent).expect("Failed to serialize");
397        assert_eq!(json, r#""tabs""#);
398
399        let whitespace = WhitespaceMode::Strict;
400        let json = serde_json::to_string(&whitespace).expect("Failed to serialize");
401        assert_eq!(json, r#""strict""#);
402    }
403
404    #[test]
405    fn test_enum_deserialization() {
406        // Test that enums deserialize from strings (case insensitive)
407        let heading: HeadingStyle = serde_json::from_str(r#""atxclosed""#).expect("Failed");
408        assert_eq!(heading, HeadingStyle::AtxClosed);
409
410        let heading: HeadingStyle = serde_json::from_str(r#""ATXCLOSED""#).expect("Failed");
411        assert_eq!(heading, HeadingStyle::AtxClosed);
412
413        let list_indent: ListIndentType = serde_json::from_str(r#""tabs""#).expect("Failed");
414        assert_eq!(list_indent, ListIndentType::Tabs);
415    }
416}