html_to_markdown_rs/
options.rs

1//! Configuration options for HTML to Markdown conversion.
2
3/// Heading style options.
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub enum HeadingStyle {
6    /// Underlined style (=== for h1, --- for h2)
7    Underlined,
8    /// ATX style (# for h1, ## for h2, etc.)
9    Atx,
10    /// ATX closed style (# title #)
11    AtxClosed,
12}
13
14impl Default for HeadingStyle {
15    fn default() -> Self {
16        Self::Atx
17    }
18}
19
20/// List indentation type.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum ListIndentType {
23    Spaces,
24    Tabs,
25}
26
27impl Default for ListIndentType {
28    fn default() -> Self {
29        Self::Spaces
30    }
31}
32
33/// Whitespace handling mode.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum WhitespaceMode {
36    Normalized,
37    Strict,
38}
39
40impl Default for WhitespaceMode {
41    fn default() -> Self {
42        Self::Normalized
43    }
44}
45
46/// Newline style.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum NewlineStyle {
49    /// Two spaces at end of line
50    Spaces,
51    /// Backslash at end of line
52    Backslash,
53}
54
55impl Default for NewlineStyle {
56    fn default() -> Self {
57        Self::Spaces
58    }
59}
60
61/// Code block style.
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum CodeBlockStyle {
64    /// Indented code blocks (4 spaces) - CommonMark default
65    Indented,
66    /// Fenced code blocks with backticks (```)
67    Backticks,
68    /// Fenced code blocks with tildes (~~~)
69    Tildes,
70}
71
72impl Default for CodeBlockStyle {
73    fn default() -> Self {
74        Self::Indented
75    }
76}
77
78/// Highlight style for `<mark>` elements.
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum HighlightStyle {
81    /// ==text==
82    DoubleEqual,
83    /// <mark>text</mark>
84    Html,
85    /// **text**
86    Bold,
87    /// Plain text (no formatting)
88    None,
89}
90
91impl Default for HighlightStyle {
92    fn default() -> Self {
93        Self::DoubleEqual
94    }
95}
96
97/// Preprocessing preset levels.
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub enum PreprocessingPreset {
100    Minimal,
101    Standard,
102    Aggressive,
103}
104
105impl Default for PreprocessingPreset {
106    fn default() -> Self {
107        Self::Standard
108    }
109}
110
111/// Main conversion options.
112#[derive(Debug, Clone)]
113pub struct ConversionOptions {
114    /// Heading style
115    pub heading_style: HeadingStyle,
116
117    /// List indentation type
118    pub list_indent_type: ListIndentType,
119
120    /// List indentation width (spaces)
121    pub list_indent_width: usize,
122
123    /// Bullet characters for unordered lists
124    pub bullets: String,
125
126    /// Symbol for strong/emphasis (* or _)
127    pub strong_em_symbol: char,
128
129    /// Escape asterisks in text
130    pub escape_asterisks: bool,
131
132    /// Escape underscores in text
133    pub escape_underscores: bool,
134
135    /// Escape misc markdown characters
136    pub escape_misc: bool,
137
138    /// Escape all ASCII punctuation (for CommonMark spec compliance tests)
139    pub escape_ascii: bool,
140
141    /// Default code language
142    pub code_language: String,
143
144    /// Use autolinks for bare URLs
145    pub autolinks: bool,
146
147    /// Add default title if none exists
148    pub default_title: bool,
149
150    /// Use <br> in tables instead of spaces
151    pub br_in_tables: bool,
152
153    /// Highlight style for <mark> elements
154    pub highlight_style: HighlightStyle,
155
156    /// Extract metadata from HTML
157    pub extract_metadata: bool,
158
159    /// Whitespace handling mode
160    pub whitespace_mode: WhitespaceMode,
161
162    /// Strip newlines from HTML before processing
163    pub strip_newlines: bool,
164
165    /// Enable text wrapping
166    pub wrap: bool,
167
168    /// Text wrap width
169    pub wrap_width: usize,
170
171    /// Treat block elements as inline
172    pub convert_as_inline: bool,
173
174    /// Subscript symbol
175    pub sub_symbol: String,
176
177    /// Superscript symbol
178    pub sup_symbol: String,
179
180    /// Newline style
181    pub newline_style: NewlineStyle,
182
183    /// Code block style
184    pub code_block_style: CodeBlockStyle,
185
186    /// Elements where images should remain as markdown (not converted to alt text)
187    pub keep_inline_images_in: Vec<String>,
188
189    /// Preprocessing options
190    pub preprocessing: PreprocessingOptions,
191
192    /// Source encoding (informational)
193    pub encoding: String,
194
195    /// Enable debug mode with diagnostic warnings
196    pub debug: bool,
197
198    /// List of HTML tags to strip (output only text content, no markdown conversion)
199    pub strip_tags: Vec<String>,
200}
201
202impl Default for ConversionOptions {
203    fn default() -> Self {
204        Self {
205            heading_style: HeadingStyle::default(),
206            list_indent_type: ListIndentType::default(),
207            list_indent_width: 2,
208            bullets: "-".to_string(),
209            strong_em_symbol: '*',
210            escape_asterisks: false,
211            escape_underscores: false,
212            escape_misc: false,
213            escape_ascii: false,
214            code_language: String::new(),
215            autolinks: true,
216            default_title: false,
217            br_in_tables: false,
218            highlight_style: HighlightStyle::default(),
219            extract_metadata: true,
220            whitespace_mode: WhitespaceMode::default(),
221            strip_newlines: false,
222            wrap: false,
223            wrap_width: 80,
224            convert_as_inline: false,
225            sub_symbol: String::new(),
226            sup_symbol: String::new(),
227            newline_style: NewlineStyle::Spaces,
228            code_block_style: CodeBlockStyle::default(),
229            keep_inline_images_in: Vec::new(),
230            preprocessing: PreprocessingOptions::default(),
231            encoding: "utf-8".to_string(),
232            debug: false,
233            strip_tags: Vec::new(),
234        }
235    }
236}
237
238/// HTML preprocessing options.
239#[derive(Debug, Clone)]
240pub struct PreprocessingOptions {
241    /// Enable preprocessing
242    pub enabled: bool,
243
244    /// Preprocessing preset
245    pub preset: PreprocessingPreset,
246
247    /// Remove navigation elements
248    pub remove_navigation: bool,
249
250    /// Remove form elements
251    pub remove_forms: bool,
252}
253
254impl Default for PreprocessingOptions {
255    fn default() -> Self {
256        Self {
257            enabled: false,
258            preset: PreprocessingPreset::default(),
259            remove_navigation: true,
260            remove_forms: true,
261        }
262    }
263}