html_to_markdown_rs/options/
conversion.rs1#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
2
3use crate::options::preprocessing::PreprocessingOptions;
10use crate::options::preprocessing::PreprocessingOptionsUpdate;
11use crate::options::validation::{
12 CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, OutputFormat, WhitespaceMode,
13};
14
15#[derive(Debug, Clone)]
17#[cfg_attr(
18 any(feature = "serde", feature = "metadata"),
19 derive(serde::Serialize, serde::Deserialize)
20)]
21#[cfg_attr(
22 any(feature = "serde", feature = "metadata"),
23 serde(rename_all = "camelCase", default)
24)]
25pub struct ConversionOptions {
26 pub heading_style: HeadingStyle,
28
29 pub list_indent_type: ListIndentType,
31
32 pub list_indent_width: usize,
34
35 pub bullets: String,
37
38 pub strong_em_symbol: char,
40
41 pub escape_asterisks: bool,
43
44 pub escape_underscores: bool,
46
47 pub escape_misc: bool,
49
50 pub escape_ascii: bool,
52
53 pub code_language: String,
55
56 pub autolinks: bool,
58
59 pub default_title: bool,
61
62 pub br_in_tables: bool,
64
65 pub hocr_spatial_tables: bool,
67
68 pub highlight_style: HighlightStyle,
70
71 pub extract_metadata: bool,
73
74 pub whitespace_mode: WhitespaceMode,
76
77 pub strip_newlines: bool,
79
80 pub wrap: bool,
82
83 pub wrap_width: usize,
85
86 pub convert_as_inline: bool,
88
89 pub sub_symbol: String,
91
92 pub sup_symbol: String,
94
95 pub newline_style: NewlineStyle,
97
98 pub code_block_style: CodeBlockStyle,
100
101 pub keep_inline_images_in: Vec<String>,
103
104 pub preprocessing: PreprocessingOptions,
106
107 pub encoding: String,
109
110 pub debug: bool,
112
113 pub strip_tags: Vec<String>,
115
116 pub preserve_tags: Vec<String>,
118
119 pub skip_images: bool,
123
124 pub output_format: OutputFormat,
126}
127
128#[derive(Debug, Clone, Default)]
134#[cfg_attr(
135 any(feature = "serde", feature = "metadata"),
136 derive(serde::Serialize, serde::Deserialize)
137)]
138#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
139pub struct ConversionOptionsUpdate {
140 pub heading_style: Option<HeadingStyle>,
142
143 pub list_indent_type: Option<ListIndentType>,
145
146 pub list_indent_width: Option<usize>,
148
149 pub bullets: Option<String>,
151
152 pub strong_em_symbol: Option<char>,
154
155 pub escape_asterisks: Option<bool>,
157
158 pub escape_underscores: Option<bool>,
160
161 pub escape_misc: Option<bool>,
163
164 pub escape_ascii: Option<bool>,
166
167 pub code_language: Option<String>,
169
170 pub autolinks: Option<bool>,
172
173 pub default_title: Option<bool>,
175
176 pub br_in_tables: Option<bool>,
178
179 pub hocr_spatial_tables: Option<bool>,
181
182 pub highlight_style: Option<HighlightStyle>,
184
185 pub extract_metadata: Option<bool>,
187
188 pub whitespace_mode: Option<WhitespaceMode>,
190
191 pub strip_newlines: Option<bool>,
193
194 pub wrap: Option<bool>,
196
197 pub wrap_width: Option<usize>,
199
200 pub convert_as_inline: Option<bool>,
202
203 pub sub_symbol: Option<String>,
205
206 pub sup_symbol: Option<String>,
208
209 pub newline_style: Option<NewlineStyle>,
211
212 pub code_block_style: Option<CodeBlockStyle>,
214
215 pub keep_inline_images_in: Option<Vec<String>>,
217
218 pub preprocessing: Option<PreprocessingOptionsUpdate>,
220
221 pub encoding: Option<String>,
223
224 pub debug: Option<bool>,
226
227 pub strip_tags: Option<Vec<String>>,
229
230 pub preserve_tags: Option<Vec<String>>,
232
233 pub skip_images: Option<bool>,
235
236 pub output_format: Option<OutputFormat>,
238}
239
240impl Default for ConversionOptions {
241 fn default() -> Self {
242 Self {
243 heading_style: HeadingStyle::default(),
244 list_indent_type: ListIndentType::default(),
245 list_indent_width: 2,
246 bullets: "-".to_string(),
247 strong_em_symbol: '*',
248 escape_asterisks: false,
249 escape_underscores: false,
250 escape_misc: false,
251 escape_ascii: false,
252 code_language: String::new(),
253 autolinks: true,
254 default_title: false,
255 br_in_tables: false,
256 hocr_spatial_tables: true,
257 highlight_style: HighlightStyle::default(),
258 extract_metadata: true,
259 whitespace_mode: WhitespaceMode::default(),
260 strip_newlines: false,
261 wrap: false,
262 wrap_width: 80,
263 convert_as_inline: false,
264 sub_symbol: String::new(),
265 sup_symbol: String::new(),
266 newline_style: NewlineStyle::Spaces,
267 code_block_style: CodeBlockStyle::default(),
268 keep_inline_images_in: Vec::new(),
269 preprocessing: PreprocessingOptions::default(),
270 encoding: "utf-8".to_string(),
271 debug: false,
272 strip_tags: Vec::new(),
273 preserve_tags: Vec::new(),
274 skip_images: false,
275 output_format: OutputFormat::default(),
276 }
277 }
278}
279
280impl ConversionOptions {
281 pub fn apply_update(&mut self, update: ConversionOptionsUpdate) {
290 if let Some(heading_style) = update.heading_style {
291 self.heading_style = heading_style;
292 }
293 if let Some(list_indent_type) = update.list_indent_type {
294 self.list_indent_type = list_indent_type;
295 }
296 if let Some(list_indent_width) = update.list_indent_width {
297 self.list_indent_width = list_indent_width;
298 }
299 if let Some(bullets) = update.bullets {
300 self.bullets = bullets;
301 }
302 if let Some(strong_em_symbol) = update.strong_em_symbol {
303 self.strong_em_symbol = strong_em_symbol;
304 }
305 if let Some(escape_asterisks) = update.escape_asterisks {
306 self.escape_asterisks = escape_asterisks;
307 }
308 if let Some(escape_underscores) = update.escape_underscores {
309 self.escape_underscores = escape_underscores;
310 }
311 if let Some(escape_misc) = update.escape_misc {
312 self.escape_misc = escape_misc;
313 }
314 if let Some(escape_ascii) = update.escape_ascii {
315 self.escape_ascii = escape_ascii;
316 }
317 if let Some(code_language) = update.code_language {
318 self.code_language = code_language;
319 }
320 if let Some(autolinks) = update.autolinks {
321 self.autolinks = autolinks;
322 }
323 if let Some(default_title) = update.default_title {
324 self.default_title = default_title;
325 }
326 if let Some(br_in_tables) = update.br_in_tables {
327 self.br_in_tables = br_in_tables;
328 }
329 if let Some(hocr_spatial_tables) = update.hocr_spatial_tables {
330 self.hocr_spatial_tables = hocr_spatial_tables;
331 }
332 if let Some(highlight_style) = update.highlight_style {
333 self.highlight_style = highlight_style;
334 }
335 if let Some(extract_metadata) = update.extract_metadata {
336 self.extract_metadata = extract_metadata;
337 }
338 if let Some(whitespace_mode) = update.whitespace_mode {
339 self.whitespace_mode = whitespace_mode;
340 }
341 if let Some(strip_newlines) = update.strip_newlines {
342 self.strip_newlines = strip_newlines;
343 }
344 if let Some(wrap) = update.wrap {
345 self.wrap = wrap;
346 }
347 if let Some(wrap_width) = update.wrap_width {
348 self.wrap_width = wrap_width;
349 }
350 if let Some(convert_as_inline) = update.convert_as_inline {
351 self.convert_as_inline = convert_as_inline;
352 }
353 if let Some(sub_symbol) = update.sub_symbol {
354 self.sub_symbol = sub_symbol;
355 }
356 if let Some(sup_symbol) = update.sup_symbol {
357 self.sup_symbol = sup_symbol;
358 }
359 if let Some(newline_style) = update.newline_style {
360 self.newline_style = newline_style;
361 }
362 if let Some(code_block_style) = update.code_block_style {
363 self.code_block_style = code_block_style;
364 }
365 if let Some(keep_inline_images_in) = update.keep_inline_images_in {
366 self.keep_inline_images_in = keep_inline_images_in;
367 }
368 if let Some(preprocessing) = update.preprocessing {
369 self.preprocessing.apply_update(preprocessing);
370 }
371 if let Some(encoding) = update.encoding {
372 self.encoding = encoding;
373 }
374 if let Some(debug) = update.debug {
375 self.debug = debug;
376 }
377 if let Some(strip_tags) = update.strip_tags {
378 self.strip_tags = strip_tags;
379 }
380 if let Some(preserve_tags) = update.preserve_tags {
381 self.preserve_tags = preserve_tags;
382 }
383 if let Some(skip_images) = update.skip_images {
384 self.skip_images = skip_images;
385 }
386 if let Some(output_format) = update.output_format {
387 self.output_format = output_format;
388 }
389 }
390
391 #[must_use]
404 pub fn from_update(update: ConversionOptionsUpdate) -> Self {
405 let mut options = Self::default();
406 options.apply_update(update);
407 options
408 }
409}
410
411impl From<ConversionOptionsUpdate> for ConversionOptions {
412 fn from(update: ConversionOptionsUpdate) -> Self {
413 Self::from_update(update)
414 }
415}
416
417#[cfg(all(test, any(feature = "serde", feature = "metadata")))]
418mod tests {
419 use super::*;
420
421 #[test]
422 fn test_conversion_options_serde() {
423 let mut options = ConversionOptions::default();
424 options.heading_style = HeadingStyle::AtxClosed;
425 options.list_indent_width = 4;
426 options.bullets = "*".to_string();
427 options.escape_asterisks = true;
428 options.whitespace_mode = WhitespaceMode::Strict;
429
430 let json = serde_json::to_string(&options).expect("Failed to serialize");
432
433 let deserialized: ConversionOptions = serde_json::from_str(&json).expect("Failed to deserialize");
435
436 assert_eq!(deserialized.list_indent_width, 4);
438 assert_eq!(deserialized.bullets, "*");
439 assert_eq!(deserialized.escape_asterisks, true);
440 assert_eq!(deserialized.heading_style, HeadingStyle::AtxClosed);
441 assert_eq!(deserialized.whitespace_mode, WhitespaceMode::Strict);
442 }
443
444 #[test]
445 fn test_conversion_options_partial_deserialization() {
446 let partial_json = r#"{
448 "headingStyle": "atxClosed",
449 "listIndentWidth": 4,
450 "bullets": "*"
451 }"#;
452
453 let deserialized: ConversionOptions =
454 serde_json::from_str(partial_json).expect("Failed to deserialize partial JSON");
455
456 assert_eq!(deserialized.heading_style, HeadingStyle::AtxClosed);
458 assert_eq!(deserialized.list_indent_width, 4);
459 assert_eq!(deserialized.bullets, "*");
460
461 assert_eq!(deserialized.escape_asterisks, false); assert_eq!(deserialized.escape_underscores, false); assert_eq!(deserialized.list_indent_type, ListIndentType::Spaces); }
466}