Skip to main content

html_to_markdown_rs/
options.rs

1#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
2//! Configuration options for HTML to Markdown conversion.
3
4/// Heading style options for Markdown output.
5///
6/// Controls how headings (h1-h6) are rendered in the output Markdown.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
8pub enum HeadingStyle {
9    /// Underlined style (=== for h1, --- for h2).
10    Underlined,
11    /// ATX style (# for h1, ## for h2, etc.). Default.
12    #[default]
13    Atx,
14    /// ATX closed style (# title #, with closing hashes).
15    AtxClosed,
16}
17
18impl HeadingStyle {
19    /// Parse a heading style from a string.
20    ///
21    /// Accepts "atx", "atxclosed", or defaults to Underlined.
22    /// Input is normalized (lowercased, alphanumeric only).
23    #[must_use]
24    pub fn parse(value: &str) -> Self {
25        match normalize_token(value).as_str() {
26            "atx" => Self::Atx,
27            "atxclosed" => Self::AtxClosed,
28            _ => Self::Underlined,
29        }
30    }
31}
32
33/// List indentation character type.
34///
35/// Controls whether list items are indented with spaces or tabs.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub enum ListIndentType {
38    /// Use spaces for indentation. Default. Width controlled by `list_indent_width`.
39    #[default]
40    Spaces,
41    /// Use tabs for indentation.
42    Tabs,
43}
44
45impl ListIndentType {
46    /// Parse a list indentation type from a string.
47    ///
48    /// Accepts "tabs" or defaults to Spaces.
49    /// Input is normalized (lowercased, alphanumeric only).
50    #[must_use]
51    pub fn parse(value: &str) -> Self {
52        match normalize_token(value).as_str() {
53            "tabs" => Self::Tabs,
54            _ => Self::Spaces,
55        }
56    }
57}
58
59/// Whitespace handling strategy during conversion.
60///
61/// Determines how sequences of whitespace characters (spaces, tabs, newlines) are processed.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
63pub enum WhitespaceMode {
64    /// Collapse multiple whitespace characters to single spaces. Default. Matches browser behavior.
65    #[default]
66    Normalized,
67    /// Preserve all whitespace exactly as it appears in the HTML.
68    Strict,
69}
70
71impl WhitespaceMode {
72    /// Parse a whitespace mode from a string.
73    ///
74    /// Accepts "strict" or defaults to Normalized.
75    /// Input is normalized (lowercased, alphanumeric only).
76    #[must_use]
77    pub fn parse(value: &str) -> Self {
78        match normalize_token(value).as_str() {
79            "strict" => Self::Strict,
80            _ => Self::Normalized,
81        }
82    }
83}
84
85/// Line break syntax in Markdown output.
86///
87/// Controls how soft line breaks (from `<br>` or line breaks in source) are rendered.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
89pub enum NewlineStyle {
90    /// Two trailing spaces at end of line. Default. Standard Markdown syntax.
91    #[default]
92    Spaces,
93    /// Backslash at end of line. Alternative Markdown syntax.
94    Backslash,
95}
96
97impl NewlineStyle {
98    /// Parse a newline style from a string.
99    ///
100    /// Accepts "backslash" or defaults to Spaces.
101    /// Input is normalized (lowercased, alphanumeric only).
102    #[must_use]
103    pub fn parse(value: &str) -> Self {
104        match normalize_token(value).as_str() {
105            "backslash" => Self::Backslash,
106            _ => Self::Spaces,
107        }
108    }
109}
110
111/// Code block fence style in Markdown output.
112///
113/// Determines how code blocks (`<pre><code>`) are rendered in Markdown.
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
115pub enum CodeBlockStyle {
116    /// Indented code blocks (4 spaces). Default. `CommonMark` standard.
117    #[default]
118    Indented,
119    /// Fenced code blocks with backticks (```). Supports language hints.
120    Backticks,
121    /// Fenced code blocks with tildes (~~~). Supports language hints.
122    Tildes,
123}
124
125impl CodeBlockStyle {
126    /// Parse a code block style from a string.
127    ///
128    /// Accepts "backticks", "tildes", or defaults to Indented.
129    /// Input is normalized (lowercased, alphanumeric only).
130    #[must_use]
131    pub fn parse(value: &str) -> Self {
132        match normalize_token(value).as_str() {
133            "backticks" => Self::Backticks,
134            "tildes" => Self::Tildes,
135            _ => Self::Indented,
136        }
137    }
138}
139
140/// Highlight rendering style for `<mark>` elements.
141///
142/// Controls how highlighted text is rendered in Markdown output.
143#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
144pub enum HighlightStyle {
145    /// Double equals syntax (==text==). Default. Pandoc-compatible.
146    #[default]
147    DoubleEqual,
148    /// Preserve as HTML (==text==). Original HTML tag.
149    Html,
150    /// Render as bold (**text**). Uses strong emphasis.
151    Bold,
152    /// Strip formatting, render as plain text. No markup.
153    None,
154}
155
156impl HighlightStyle {
157    /// Parse a highlight style from a string.
158    ///
159    /// Accepts "doubleequal", "html", "bold", "none", or defaults to None.
160    /// Input is normalized (lowercased, alphanumeric only).
161    #[must_use]
162    pub fn parse(value: &str) -> Self {
163        match normalize_token(value).as_str() {
164            "doubleequal" => Self::DoubleEqual,
165            "html" => Self::Html,
166            "bold" => Self::Bold,
167            "none" => Self::None,
168            _ => Self::None,
169        }
170    }
171}
172
173/// HTML preprocessing aggressiveness level.
174///
175/// Controls the extent of cleanup performed before conversion. Higher levels remove more elements.
176#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
177pub enum PreprocessingPreset {
178    /// Minimal cleanup. Remove only essential noise (scripts, styles).
179    Minimal,
180    /// Standard cleanup. Default. Removes navigation, forms, and other auxiliary content.
181    #[default]
182    Standard,
183    /// Aggressive cleanup. Remove extensive non-content elements and structure.
184    Aggressive,
185}
186
187impl PreprocessingPreset {
188    /// Parse a preprocessing preset from a string.
189    ///
190    /// Accepts "minimal", "aggressive", or defaults to Standard.
191    /// Input is normalized (lowercased, alphanumeric only).
192    #[must_use]
193    pub fn parse(value: &str) -> Self {
194        match normalize_token(value).as_str() {
195            "minimal" => Self::Minimal,
196            "aggressive" => Self::Aggressive,
197            _ => Self::Standard,
198        }
199    }
200}
201
202/// Main conversion options for HTML to Markdown conversion.
203#[derive(Debug, Clone)]
204#[cfg_attr(
205    any(feature = "serde", feature = "metadata"),
206    derive(serde::Serialize, serde::Deserialize)
207)]
208#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
209pub struct ConversionOptions {
210    /// Heading style (Underlined, Atx, `AtxClosed`)
211    pub heading_style: HeadingStyle,
212
213    /// List indentation type (Spaces or Tabs)
214    pub list_indent_type: ListIndentType,
215
216    /// List indentation width in spaces (applied if using spaces indentation)
217    pub list_indent_width: usize,
218
219    /// Bullet characters for unordered lists (e.g., "-", "*", "+")
220    pub bullets: String,
221
222    /// Symbol for strong/emphasis emphasis rendering (* or _)
223    pub strong_em_symbol: char,
224
225    /// Escape asterisks (*) in text to prevent accidental formatting
226    pub escape_asterisks: bool,
227
228    /// Escape underscores (_) in text to prevent accidental formatting
229    pub escape_underscores: bool,
230
231    /// Escape miscellaneous markdown characters (\ & < ` [ > ~ # = + | -)
232    pub escape_misc: bool,
233
234    /// Escape all ASCII punctuation characters (for `CommonMark` spec compliance tests)
235    pub escape_ascii: bool,
236
237    /// Default code language for fenced code blocks when not specified
238    pub code_language: String,
239
240    /// Use autolinks syntax for bare URLs (<http://example.com>)
241    pub autolinks: bool,
242
243    /// Add default title element to HTML if none exists before conversion
244    pub default_title: bool,
245
246    /// Use HTML <br> elements in tables instead of spaces for line breaks
247    pub br_in_tables: bool,
248
249    /// Enable spatial table reconstruction in hOCR documents (via spatial positioning analysis)
250    pub hocr_spatial_tables: bool,
251
252    /// Highlight style for <mark> elements (`DoubleEqual`, Html, Bold, None)
253    pub highlight_style: HighlightStyle,
254
255    /// Extract metadata from HTML (title, description, images, links, etc.)
256    pub extract_metadata: bool,
257
258    /// Whitespace handling mode (Normalized collapses multiple spaces, Strict preserves)
259    pub whitespace_mode: WhitespaceMode,
260
261    /// Strip newline characters from HTML before processing
262    pub strip_newlines: bool,
263
264    /// Enable automatic text wrapping at `wrap_width`
265    pub wrap: bool,
266
267    /// Text wrapping width in characters (default 80)
268    pub wrap_width: usize,
269
270    /// Treat block-level elements as inline during conversion
271    pub convert_as_inline: bool,
272
273    /// Custom symbol for subscript content (e.g., "~")
274    pub sub_symbol: String,
275
276    /// Custom symbol for superscript content (e.g., "^")
277    pub sup_symbol: String,
278
279    /// Newline style in markdown output (Spaces adds two spaces, Backslash adds \)
280    pub newline_style: NewlineStyle,
281
282    /// Code block fence style (Indented, Backticks, Tildes)
283    pub code_block_style: CodeBlockStyle,
284
285    /// HTML elements where images should remain as markdown links (not converted to alt text)
286    pub keep_inline_images_in: Vec<String>,
287
288    /// HTML preprocessing options (remove nav, forms, etc.)
289    pub preprocessing: PreprocessingOptions,
290
291    /// Source document encoding (informational, typically "utf-8")
292    pub encoding: String,
293
294    /// Enable debug mode with diagnostic warnings on conversion issues
295    pub debug: bool,
296
297    /// HTML tags to strip (extract text content, no markdown conversion)
298    pub strip_tags: Vec<String>,
299
300    /// HTML tags to preserve as-is in output (keep original HTML, useful for complex tables)
301    pub preserve_tags: Vec<String>,
302
303    /// Skip all images during conversion.
304    /// When enabled, all `<img>` elements are completely omitted from output.
305    /// Useful for text-only extraction or filtering out visual content.
306    pub skip_images: bool,
307}
308
309/// Partial update for `ConversionOptions`.
310///
311/// This struct uses `Option<T>` to represent optional fields that can be selectively updated.
312/// Only specified fields (Some values) will override existing options; None values leave the
313/// corresponding fields unchanged when applied via [`ConversionOptions::apply_update`].
314#[derive(Debug, Clone, Default)]
315#[cfg_attr(
316    any(feature = "serde", feature = "metadata"),
317    derive(serde::Serialize, serde::Deserialize)
318)]
319#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
320pub struct ConversionOptionsUpdate {
321    /// Optional heading style override (Underlined, Atx, `AtxClosed`)
322    pub heading_style: Option<HeadingStyle>,
323
324    /// Optional list indentation type override (Spaces or Tabs)
325    pub list_indent_type: Option<ListIndentType>,
326
327    /// Optional list indentation width override in spaces
328    pub list_indent_width: Option<usize>,
329
330    /// Optional bullet characters override for unordered lists
331    pub bullets: Option<String>,
332
333    /// Optional strong/emphasis symbol override (* or _)
334    pub strong_em_symbol: Option<char>,
335
336    /// Optional asterisk escaping override in text content
337    pub escape_asterisks: Option<bool>,
338
339    /// Optional underscore escaping override in text content
340    pub escape_underscores: Option<bool>,
341
342    /// Optional miscellaneous character escaping override (\ & < ` [ > ~ # = + | -)
343    pub escape_misc: Option<bool>,
344
345    /// Optional ASCII punctuation escaping override (for spec compliance testing)
346    pub escape_ascii: Option<bool>,
347
348    /// Optional default code language override for fenced code blocks
349    pub code_language: Option<String>,
350
351    /// Optional autolinks syntax override for bare URLs
352    pub autolinks: Option<bool>,
353
354    /// Optional default title element injection override
355    pub default_title: Option<bool>,
356
357    /// Optional HTML <br> usage in tables override
358    pub br_in_tables: Option<bool>,
359
360    /// Optional spatial table reconstruction for hOCR documents override
361    pub hocr_spatial_tables: Option<bool>,
362
363    /// Optional highlight style override for <mark> elements
364    pub highlight_style: Option<HighlightStyle>,
365
366    /// Optional metadata extraction override (title, description, images, links)
367    pub extract_metadata: Option<bool>,
368
369    /// Optional whitespace handling mode override (Normalized or Strict)
370    pub whitespace_mode: Option<WhitespaceMode>,
371
372    /// Optional newline stripping override before processing
373    pub strip_newlines: Option<bool>,
374
375    /// Optional automatic text wrapping override
376    pub wrap: Option<bool>,
377
378    /// Optional text wrapping width override in characters
379    pub wrap_width: Option<usize>,
380
381    /// Optional block-level to inline conversion override
382    pub convert_as_inline: Option<bool>,
383
384    /// Optional subscript symbol override
385    pub sub_symbol: Option<String>,
386
387    /// Optional superscript symbol override
388    pub sup_symbol: Option<String>,
389
390    /// Optional newline style override for markdown output
391    pub newline_style: Option<NewlineStyle>,
392
393    /// Optional code block fence style override (Indented, Backticks, Tildes)
394    pub code_block_style: Option<CodeBlockStyle>,
395
396    /// Optional context elements where images remain as markdown links override
397    pub keep_inline_images_in: Option<Vec<String>>,
398
399    /// Optional preprocessing options partial update
400    pub preprocessing: Option<PreprocessingOptionsUpdate>,
401
402    /// Optional source document encoding override
403    pub encoding: Option<String>,
404
405    /// Optional debug mode override for diagnostic warnings
406    pub debug: Option<bool>,
407
408    /// Optional HTML tags to strip override (extract text, no conversion)
409    pub strip_tags: Option<Vec<String>>,
410
411    /// Optional HTML tags to preserve as-is override in output
412    pub preserve_tags: Option<Vec<String>>,
413
414    /// Optional skip images override
415    pub skip_images: Option<bool>,
416}
417
418impl Default for ConversionOptions {
419    fn default() -> Self {
420        Self {
421            heading_style: HeadingStyle::default(),
422            list_indent_type: ListIndentType::default(),
423            list_indent_width: 2,
424            bullets: "-".to_string(),
425            strong_em_symbol: '*',
426            escape_asterisks: false,
427            escape_underscores: false,
428            escape_misc: false,
429            escape_ascii: false,
430            code_language: String::new(),
431            autolinks: true,
432            default_title: false,
433            br_in_tables: false,
434            hocr_spatial_tables: true,
435            highlight_style: HighlightStyle::default(),
436            extract_metadata: true,
437            whitespace_mode: WhitespaceMode::default(),
438            strip_newlines: false,
439            wrap: false,
440            wrap_width: 80,
441            convert_as_inline: false,
442            sub_symbol: String::new(),
443            sup_symbol: String::new(),
444            newline_style: NewlineStyle::Spaces,
445            code_block_style: CodeBlockStyle::default(),
446            keep_inline_images_in: Vec::new(),
447            preprocessing: PreprocessingOptions::default(),
448            encoding: "utf-8".to_string(),
449            debug: false,
450            strip_tags: Vec::new(),
451            preserve_tags: Vec::new(),
452            skip_images: false,
453        }
454    }
455}
456
457impl ConversionOptions {
458    /// Apply a partial update to these conversion options.
459    ///
460    /// Any specified fields in the update will override the current values.
461    /// Unspecified fields (None) are left unchanged.
462    ///
463    /// # Arguments
464    ///
465    /// * `update` - Partial options update with fields to override
466    pub fn apply_update(&mut self, update: ConversionOptionsUpdate) {
467        if let Some(heading_style) = update.heading_style {
468            self.heading_style = heading_style;
469        }
470        if let Some(list_indent_type) = update.list_indent_type {
471            self.list_indent_type = list_indent_type;
472        }
473        if let Some(list_indent_width) = update.list_indent_width {
474            self.list_indent_width = list_indent_width;
475        }
476        if let Some(bullets) = update.bullets {
477            self.bullets = bullets;
478        }
479        if let Some(strong_em_symbol) = update.strong_em_symbol {
480            self.strong_em_symbol = strong_em_symbol;
481        }
482        if let Some(escape_asterisks) = update.escape_asterisks {
483            self.escape_asterisks = escape_asterisks;
484        }
485        if let Some(escape_underscores) = update.escape_underscores {
486            self.escape_underscores = escape_underscores;
487        }
488        if let Some(escape_misc) = update.escape_misc {
489            self.escape_misc = escape_misc;
490        }
491        if let Some(escape_ascii) = update.escape_ascii {
492            self.escape_ascii = escape_ascii;
493        }
494        if let Some(code_language) = update.code_language {
495            self.code_language = code_language;
496        }
497        if let Some(autolinks) = update.autolinks {
498            self.autolinks = autolinks;
499        }
500        if let Some(default_title) = update.default_title {
501            self.default_title = default_title;
502        }
503        if let Some(br_in_tables) = update.br_in_tables {
504            self.br_in_tables = br_in_tables;
505        }
506        if let Some(hocr_spatial_tables) = update.hocr_spatial_tables {
507            self.hocr_spatial_tables = hocr_spatial_tables;
508        }
509        if let Some(highlight_style) = update.highlight_style {
510            self.highlight_style = highlight_style;
511        }
512        if let Some(extract_metadata) = update.extract_metadata {
513            self.extract_metadata = extract_metadata;
514        }
515        if let Some(whitespace_mode) = update.whitespace_mode {
516            self.whitespace_mode = whitespace_mode;
517        }
518        if let Some(strip_newlines) = update.strip_newlines {
519            self.strip_newlines = strip_newlines;
520        }
521        if let Some(wrap) = update.wrap {
522            self.wrap = wrap;
523        }
524        if let Some(wrap_width) = update.wrap_width {
525            self.wrap_width = wrap_width;
526        }
527        if let Some(convert_as_inline) = update.convert_as_inline {
528            self.convert_as_inline = convert_as_inline;
529        }
530        if let Some(sub_symbol) = update.sub_symbol {
531            self.sub_symbol = sub_symbol;
532        }
533        if let Some(sup_symbol) = update.sup_symbol {
534            self.sup_symbol = sup_symbol;
535        }
536        if let Some(newline_style) = update.newline_style {
537            self.newline_style = newline_style;
538        }
539        if let Some(code_block_style) = update.code_block_style {
540            self.code_block_style = code_block_style;
541        }
542        if let Some(keep_inline_images_in) = update.keep_inline_images_in {
543            self.keep_inline_images_in = keep_inline_images_in;
544        }
545        if let Some(preprocessing) = update.preprocessing {
546            self.preprocessing.apply_update(preprocessing);
547        }
548        if let Some(encoding) = update.encoding {
549            self.encoding = encoding;
550        }
551        if let Some(debug) = update.debug {
552            self.debug = debug;
553        }
554        if let Some(strip_tags) = update.strip_tags {
555            self.strip_tags = strip_tags;
556        }
557        if let Some(preserve_tags) = update.preserve_tags {
558            self.preserve_tags = preserve_tags;
559        }
560        if let Some(skip_images) = update.skip_images {
561            self.skip_images = skip_images;
562        }
563    }
564
565    /// Create new conversion options from a partial update.
566    ///
567    /// Creates a new `ConversionOptions` struct with defaults, then applies the update.
568    /// Fields not specified in the update keep their default values.
569    ///
570    /// # Arguments
571    ///
572    /// * `update` - Partial options update with fields to set
573    ///
574    /// # Returns
575    ///
576    /// New `ConversionOptions` with specified updates applied to defaults
577    #[must_use]
578    pub fn from_update(update: ConversionOptionsUpdate) -> Self {
579        let mut options = Self::default();
580        options.apply_update(update);
581        options
582    }
583}
584
585impl From<ConversionOptionsUpdate> for ConversionOptions {
586    fn from(update: ConversionOptionsUpdate) -> Self {
587        Self::from_update(update)
588    }
589}
590
591/// HTML preprocessing options for document cleanup before conversion.
592#[derive(Debug, Clone)]
593#[cfg_attr(
594    any(feature = "serde", feature = "metadata"),
595    derive(serde::Serialize, serde::Deserialize)
596)]
597#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
598pub struct PreprocessingOptions {
599    /// Enable HTML preprocessing globally
600    pub enabled: bool,
601
602    /// Preprocessing preset level (Minimal, Standard, Aggressive)
603    pub preset: PreprocessingPreset,
604
605    /// Remove navigation elements (nav, breadcrumbs, menus, sidebars)
606    pub remove_navigation: bool,
607
608    /// Remove form elements (forms, inputs, buttons, etc.)
609    pub remove_forms: bool,
610}
611
612/// Partial update for `PreprocessingOptions`.
613///
614/// This struct uses `Option<T>` to represent optional fields that can be selectively updated.
615/// Only specified fields (Some values) will override existing options; None values leave the
616/// corresponding fields unchanged when applied via [`PreprocessingOptions::apply_update`].
617#[derive(Debug, Clone, Default)]
618#[cfg_attr(
619    any(feature = "serde", feature = "metadata"),
620    derive(serde::Serialize, serde::Deserialize)
621)]
622#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
623pub struct PreprocessingOptionsUpdate {
624    /// Optional global preprocessing enablement override
625    pub enabled: Option<bool>,
626
627    /// Optional preprocessing preset level override (Minimal, Standard, Aggressive)
628    pub preset: Option<PreprocessingPreset>,
629
630    /// Optional navigation element removal override (nav, breadcrumbs, menus, sidebars)
631    pub remove_navigation: Option<bool>,
632
633    /// Optional form element removal override (forms, inputs, buttons, etc.)
634    pub remove_forms: Option<bool>,
635}
636
637fn normalize_token(value: &str) -> String {
638    let mut out = String::with_capacity(value.len());
639    for ch in value.chars() {
640        if ch.is_ascii_alphanumeric() {
641            out.push(ch.to_ascii_lowercase());
642        }
643    }
644    out
645}
646
647#[cfg(any(feature = "serde", feature = "metadata"))]
648mod serde_impls {
649    use super::{
650        CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, PreprocessingPreset, WhitespaceMode,
651    };
652    use serde::{Deserialize, Serialize, Serializer};
653
654    macro_rules! impl_deserialize_from_parse {
655        ($ty:ty, $parser:expr) => {
656            impl<'de> Deserialize<'de> for $ty {
657                fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
658                where
659                    D: serde::Deserializer<'de>,
660                {
661                    let value = String::deserialize(deserializer)?;
662                    Ok($parser(&value))
663                }
664            }
665        };
666    }
667
668    impl_deserialize_from_parse!(HeadingStyle, HeadingStyle::parse);
669    impl_deserialize_from_parse!(ListIndentType, ListIndentType::parse);
670    impl_deserialize_from_parse!(WhitespaceMode, WhitespaceMode::parse);
671    impl_deserialize_from_parse!(NewlineStyle, NewlineStyle::parse);
672    impl_deserialize_from_parse!(CodeBlockStyle, CodeBlockStyle::parse);
673    impl_deserialize_from_parse!(HighlightStyle, HighlightStyle::parse);
674    impl_deserialize_from_parse!(PreprocessingPreset, PreprocessingPreset::parse);
675
676    // Serialize implementations that convert enum variants to their string representations
677    impl Serialize for HeadingStyle {
678        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
679        where
680            S: Serializer,
681        {
682            let s = match self {
683                Self::Underlined => "underlined",
684                Self::Atx => "atx",
685                Self::AtxClosed => "atxclosed",
686            };
687            serializer.serialize_str(s)
688        }
689    }
690
691    impl Serialize for ListIndentType {
692        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
693        where
694            S: Serializer,
695        {
696            let s = match self {
697                Self::Spaces => "spaces",
698                Self::Tabs => "tabs",
699            };
700            serializer.serialize_str(s)
701        }
702    }
703
704    impl Serialize for WhitespaceMode {
705        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
706        where
707            S: Serializer,
708        {
709            let s = match self {
710                Self::Normalized => "normalized",
711                Self::Strict => "strict",
712            };
713            serializer.serialize_str(s)
714        }
715    }
716
717    impl Serialize for NewlineStyle {
718        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
719        where
720            S: Serializer,
721        {
722            let s = match self {
723                Self::Spaces => "spaces",
724                Self::Backslash => "backslash",
725            };
726            serializer.serialize_str(s)
727        }
728    }
729
730    impl Serialize for CodeBlockStyle {
731        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
732        where
733            S: Serializer,
734        {
735            let s = match self {
736                Self::Indented => "indented",
737                Self::Backticks => "backticks",
738                Self::Tildes => "tildes",
739            };
740            serializer.serialize_str(s)
741        }
742    }
743
744    impl Serialize for HighlightStyle {
745        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
746        where
747            S: Serializer,
748        {
749            let s = match self {
750                Self::DoubleEqual => "doubleequal",
751                Self::Html => "html",
752                Self::Bold => "bold",
753                Self::None => "none",
754            };
755            serializer.serialize_str(s)
756        }
757    }
758
759    impl Serialize for PreprocessingPreset {
760        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
761        where
762            S: Serializer,
763        {
764            let s = match self {
765                Self::Minimal => "minimal",
766                Self::Standard => "standard",
767                Self::Aggressive => "aggressive",
768            };
769            serializer.serialize_str(s)
770        }
771    }
772}
773
774impl Default for PreprocessingOptions {
775    fn default() -> Self {
776        Self {
777            enabled: false,
778            preset: PreprocessingPreset::default(),
779            remove_navigation: true,
780            remove_forms: true,
781        }
782    }
783}
784
785impl PreprocessingOptions {
786    /// Apply a partial update to these preprocessing options.
787    ///
788    /// Any specified fields in the update will override the current values.
789    /// Unspecified fields (None) are left unchanged.
790    ///
791    /// # Arguments
792    ///
793    /// * `update` - Partial preprocessing options update
794    #[allow(clippy::needless_pass_by_value)]
795    pub const fn apply_update(&mut self, update: PreprocessingOptionsUpdate) {
796        if let Some(enabled) = update.enabled {
797            self.enabled = enabled;
798        }
799        if let Some(preset) = update.preset {
800            self.preset = preset;
801        }
802        if let Some(remove_navigation) = update.remove_navigation {
803            self.remove_navigation = remove_navigation;
804        }
805        if let Some(remove_forms) = update.remove_forms {
806            self.remove_forms = remove_forms;
807        }
808    }
809
810    /// Create new preprocessing options from a partial update.
811    ///
812    /// Creates a new `PreprocessingOptions` struct with defaults, then applies the update.
813    /// Fields not specified in the update keep their default values.
814    ///
815    /// # Arguments
816    ///
817    /// * `update` - Partial preprocessing options update
818    ///
819    /// # Returns
820    ///
821    /// New `PreprocessingOptions` with specified updates applied to defaults
822    #[must_use]
823    pub fn from_update(update: PreprocessingOptionsUpdate) -> Self {
824        let mut options = Self::default();
825        options.apply_update(update);
826        options
827    }
828}
829
830impl From<PreprocessingOptionsUpdate> for PreprocessingOptions {
831    fn from(update: PreprocessingOptionsUpdate) -> Self {
832        Self::from_update(update)
833    }
834}
835
836#[cfg(all(test, any(feature = "serde", feature = "metadata")))]
837mod tests {
838    use super::*;
839
840    #[test]
841    fn test_conversion_options_serde() {
842        let mut options = ConversionOptions::default();
843        options.heading_style = HeadingStyle::AtxClosed;
844        options.list_indent_width = 4;
845        options.bullets = "*".to_string();
846        options.escape_asterisks = true;
847        options.whitespace_mode = WhitespaceMode::Strict;
848
849        // Serialize to JSON
850        let json = serde_json::to_string(&options).expect("Failed to serialize");
851
852        // Deserialize back
853        let deserialized: ConversionOptions = serde_json::from_str(&json).expect("Failed to deserialize");
854
855        // Verify values
856        assert_eq!(deserialized.list_indent_width, 4);
857        assert_eq!(deserialized.bullets, "*");
858        assert_eq!(deserialized.escape_asterisks, true);
859        assert_eq!(deserialized.heading_style, HeadingStyle::AtxClosed);
860        assert_eq!(deserialized.whitespace_mode, WhitespaceMode::Strict);
861    }
862
863    #[test]
864    fn test_preprocessing_options_serde() {
865        let mut options = PreprocessingOptions::default();
866        options.enabled = true;
867        options.preset = PreprocessingPreset::Aggressive;
868        options.remove_navigation = false;
869
870        // Serialize to JSON
871        let json = serde_json::to_string(&options).expect("Failed to serialize");
872
873        // Deserialize back
874        let deserialized: PreprocessingOptions = serde_json::from_str(&json).expect("Failed to deserialize");
875
876        // Verify values
877        assert_eq!(deserialized.enabled, true);
878        assert_eq!(deserialized.preset, PreprocessingPreset::Aggressive);
879        assert_eq!(deserialized.remove_navigation, false);
880    }
881
882    #[test]
883    fn test_enum_serialization() {
884        // Test that enums serialize to lowercase strings
885        let heading = HeadingStyle::AtxClosed;
886        let json = serde_json::to_string(&heading).expect("Failed to serialize");
887        assert_eq!(json, r#""atxclosed""#);
888
889        let list_indent = ListIndentType::Tabs;
890        let json = serde_json::to_string(&list_indent).expect("Failed to serialize");
891        assert_eq!(json, r#""tabs""#);
892
893        let whitespace = WhitespaceMode::Strict;
894        let json = serde_json::to_string(&whitespace).expect("Failed to serialize");
895        assert_eq!(json, r#""strict""#);
896    }
897
898    #[test]
899    fn test_enum_deserialization() {
900        // Test that enums deserialize from strings (case insensitive)
901        let heading: HeadingStyle = serde_json::from_str(r#""atxclosed""#).expect("Failed");
902        assert_eq!(heading, HeadingStyle::AtxClosed);
903
904        let heading: HeadingStyle = serde_json::from_str(r#""ATXCLOSED""#).expect("Failed");
905        assert_eq!(heading, HeadingStyle::AtxClosed);
906
907        let list_indent: ListIndentType = serde_json::from_str(r#""tabs""#).expect("Failed");
908        assert_eq!(list_indent, ListIndentType::Tabs);
909    }
910}