1#![allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::unused_self)]
2#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
8pub enum HeadingStyle {
9 Underlined,
11 #[default]
13 Atx,
14 AtxClosed,
16}
17
18impl HeadingStyle {
19 #[must_use]
24 pub fn parse(value: &str) -> Self {
25 match normalize_token(value).as_str() {
26 "atx" => Self::Atx,
27 "atxclosed" => Self::AtxClosed,
28 _ => Self::Underlined,
29 }
30 }
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub enum ListIndentType {
38 #[default]
40 Spaces,
41 Tabs,
43}
44
45impl ListIndentType {
46 #[must_use]
51 pub fn parse(value: &str) -> Self {
52 match normalize_token(value).as_str() {
53 "tabs" => Self::Tabs,
54 _ => Self::Spaces,
55 }
56 }
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
63pub enum WhitespaceMode {
64 #[default]
66 Normalized,
67 Strict,
69}
70
71impl WhitespaceMode {
72 #[must_use]
77 pub fn parse(value: &str) -> Self {
78 match normalize_token(value).as_str() {
79 "strict" => Self::Strict,
80 _ => Self::Normalized,
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
89pub enum NewlineStyle {
90 #[default]
92 Spaces,
93 Backslash,
95}
96
97impl NewlineStyle {
98 #[must_use]
103 pub fn parse(value: &str) -> Self {
104 match normalize_token(value).as_str() {
105 "backslash" => Self::Backslash,
106 _ => Self::Spaces,
107 }
108 }
109}
110
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
115pub enum CodeBlockStyle {
116 #[default]
118 Indented,
119 Backticks,
121 Tildes,
123}
124
125impl CodeBlockStyle {
126 #[must_use]
131 pub fn parse(value: &str) -> Self {
132 match normalize_token(value).as_str() {
133 "backticks" => Self::Backticks,
134 "tildes" => Self::Tildes,
135 _ => Self::Indented,
136 }
137 }
138}
139
140#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
144pub enum HighlightStyle {
145 #[default]
147 DoubleEqual,
148 Html,
150 Bold,
152 None,
154}
155
156impl HighlightStyle {
157 #[must_use]
162 pub fn parse(value: &str) -> Self {
163 match normalize_token(value).as_str() {
164 "doubleequal" => Self::DoubleEqual,
165 "html" => Self::Html,
166 "bold" => Self::Bold,
167 "none" => Self::None,
168 _ => Self::None,
169 }
170 }
171}
172
173#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
177pub enum PreprocessingPreset {
178 Minimal,
180 #[default]
182 Standard,
183 Aggressive,
185}
186
187impl PreprocessingPreset {
188 #[must_use]
193 pub fn parse(value: &str) -> Self {
194 match normalize_token(value).as_str() {
195 "minimal" => Self::Minimal,
196 "aggressive" => Self::Aggressive,
197 _ => Self::Standard,
198 }
199 }
200}
201
202#[derive(Debug, Clone)]
204#[cfg_attr(
205 any(feature = "serde", feature = "metadata"),
206 derive(serde::Serialize, serde::Deserialize)
207)]
208#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
209pub struct ConversionOptions {
210 pub heading_style: HeadingStyle,
212
213 pub list_indent_type: ListIndentType,
215
216 pub list_indent_width: usize,
218
219 pub bullets: String,
221
222 pub strong_em_symbol: char,
224
225 pub escape_asterisks: bool,
227
228 pub escape_underscores: bool,
230
231 pub escape_misc: bool,
233
234 pub escape_ascii: bool,
236
237 pub code_language: String,
239
240 pub autolinks: bool,
242
243 pub default_title: bool,
245
246 pub br_in_tables: bool,
248
249 pub hocr_spatial_tables: bool,
251
252 pub highlight_style: HighlightStyle,
254
255 pub extract_metadata: bool,
257
258 pub whitespace_mode: WhitespaceMode,
260
261 pub strip_newlines: bool,
263
264 pub wrap: bool,
266
267 pub wrap_width: usize,
269
270 pub convert_as_inline: bool,
272
273 pub sub_symbol: String,
275
276 pub sup_symbol: String,
278
279 pub newline_style: NewlineStyle,
281
282 pub code_block_style: CodeBlockStyle,
284
285 pub keep_inline_images_in: Vec<String>,
287
288 pub preprocessing: PreprocessingOptions,
290
291 pub encoding: String,
293
294 pub debug: bool,
296
297 pub strip_tags: Vec<String>,
299
300 pub preserve_tags: Vec<String>,
302
303 pub skip_images: bool,
307}
308
309#[derive(Debug, Clone, Default)]
315#[cfg_attr(
316 any(feature = "serde", feature = "metadata"),
317 derive(serde::Serialize, serde::Deserialize)
318)]
319#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
320pub struct ConversionOptionsUpdate {
321 pub heading_style: Option<HeadingStyle>,
323
324 pub list_indent_type: Option<ListIndentType>,
326
327 pub list_indent_width: Option<usize>,
329
330 pub bullets: Option<String>,
332
333 pub strong_em_symbol: Option<char>,
335
336 pub escape_asterisks: Option<bool>,
338
339 pub escape_underscores: Option<bool>,
341
342 pub escape_misc: Option<bool>,
344
345 pub escape_ascii: Option<bool>,
347
348 pub code_language: Option<String>,
350
351 pub autolinks: Option<bool>,
353
354 pub default_title: Option<bool>,
356
357 pub br_in_tables: Option<bool>,
359
360 pub hocr_spatial_tables: Option<bool>,
362
363 pub highlight_style: Option<HighlightStyle>,
365
366 pub extract_metadata: Option<bool>,
368
369 pub whitespace_mode: Option<WhitespaceMode>,
371
372 pub strip_newlines: Option<bool>,
374
375 pub wrap: Option<bool>,
377
378 pub wrap_width: Option<usize>,
380
381 pub convert_as_inline: Option<bool>,
383
384 pub sub_symbol: Option<String>,
386
387 pub sup_symbol: Option<String>,
389
390 pub newline_style: Option<NewlineStyle>,
392
393 pub code_block_style: Option<CodeBlockStyle>,
395
396 pub keep_inline_images_in: Option<Vec<String>>,
398
399 pub preprocessing: Option<PreprocessingOptionsUpdate>,
401
402 pub encoding: Option<String>,
404
405 pub debug: Option<bool>,
407
408 pub strip_tags: Option<Vec<String>>,
410
411 pub preserve_tags: Option<Vec<String>>,
413
414 pub skip_images: Option<bool>,
416}
417
418impl Default for ConversionOptions {
419 fn default() -> Self {
420 Self {
421 heading_style: HeadingStyle::default(),
422 list_indent_type: ListIndentType::default(),
423 list_indent_width: 2,
424 bullets: "-".to_string(),
425 strong_em_symbol: '*',
426 escape_asterisks: false,
427 escape_underscores: false,
428 escape_misc: false,
429 escape_ascii: false,
430 code_language: String::new(),
431 autolinks: true,
432 default_title: false,
433 br_in_tables: false,
434 hocr_spatial_tables: true,
435 highlight_style: HighlightStyle::default(),
436 extract_metadata: true,
437 whitespace_mode: WhitespaceMode::default(),
438 strip_newlines: false,
439 wrap: false,
440 wrap_width: 80,
441 convert_as_inline: false,
442 sub_symbol: String::new(),
443 sup_symbol: String::new(),
444 newline_style: NewlineStyle::Spaces,
445 code_block_style: CodeBlockStyle::default(),
446 keep_inline_images_in: Vec::new(),
447 preprocessing: PreprocessingOptions::default(),
448 encoding: "utf-8".to_string(),
449 debug: false,
450 strip_tags: Vec::new(),
451 preserve_tags: Vec::new(),
452 skip_images: false,
453 }
454 }
455}
456
457impl ConversionOptions {
458 pub fn apply_update(&mut self, update: ConversionOptionsUpdate) {
467 if let Some(heading_style) = update.heading_style {
468 self.heading_style = heading_style;
469 }
470 if let Some(list_indent_type) = update.list_indent_type {
471 self.list_indent_type = list_indent_type;
472 }
473 if let Some(list_indent_width) = update.list_indent_width {
474 self.list_indent_width = list_indent_width;
475 }
476 if let Some(bullets) = update.bullets {
477 self.bullets = bullets;
478 }
479 if let Some(strong_em_symbol) = update.strong_em_symbol {
480 self.strong_em_symbol = strong_em_symbol;
481 }
482 if let Some(escape_asterisks) = update.escape_asterisks {
483 self.escape_asterisks = escape_asterisks;
484 }
485 if let Some(escape_underscores) = update.escape_underscores {
486 self.escape_underscores = escape_underscores;
487 }
488 if let Some(escape_misc) = update.escape_misc {
489 self.escape_misc = escape_misc;
490 }
491 if let Some(escape_ascii) = update.escape_ascii {
492 self.escape_ascii = escape_ascii;
493 }
494 if let Some(code_language) = update.code_language {
495 self.code_language = code_language;
496 }
497 if let Some(autolinks) = update.autolinks {
498 self.autolinks = autolinks;
499 }
500 if let Some(default_title) = update.default_title {
501 self.default_title = default_title;
502 }
503 if let Some(br_in_tables) = update.br_in_tables {
504 self.br_in_tables = br_in_tables;
505 }
506 if let Some(hocr_spatial_tables) = update.hocr_spatial_tables {
507 self.hocr_spatial_tables = hocr_spatial_tables;
508 }
509 if let Some(highlight_style) = update.highlight_style {
510 self.highlight_style = highlight_style;
511 }
512 if let Some(extract_metadata) = update.extract_metadata {
513 self.extract_metadata = extract_metadata;
514 }
515 if let Some(whitespace_mode) = update.whitespace_mode {
516 self.whitespace_mode = whitespace_mode;
517 }
518 if let Some(strip_newlines) = update.strip_newlines {
519 self.strip_newlines = strip_newlines;
520 }
521 if let Some(wrap) = update.wrap {
522 self.wrap = wrap;
523 }
524 if let Some(wrap_width) = update.wrap_width {
525 self.wrap_width = wrap_width;
526 }
527 if let Some(convert_as_inline) = update.convert_as_inline {
528 self.convert_as_inline = convert_as_inline;
529 }
530 if let Some(sub_symbol) = update.sub_symbol {
531 self.sub_symbol = sub_symbol;
532 }
533 if let Some(sup_symbol) = update.sup_symbol {
534 self.sup_symbol = sup_symbol;
535 }
536 if let Some(newline_style) = update.newline_style {
537 self.newline_style = newline_style;
538 }
539 if let Some(code_block_style) = update.code_block_style {
540 self.code_block_style = code_block_style;
541 }
542 if let Some(keep_inline_images_in) = update.keep_inline_images_in {
543 self.keep_inline_images_in = keep_inline_images_in;
544 }
545 if let Some(preprocessing) = update.preprocessing {
546 self.preprocessing.apply_update(preprocessing);
547 }
548 if let Some(encoding) = update.encoding {
549 self.encoding = encoding;
550 }
551 if let Some(debug) = update.debug {
552 self.debug = debug;
553 }
554 if let Some(strip_tags) = update.strip_tags {
555 self.strip_tags = strip_tags;
556 }
557 if let Some(preserve_tags) = update.preserve_tags {
558 self.preserve_tags = preserve_tags;
559 }
560 if let Some(skip_images) = update.skip_images {
561 self.skip_images = skip_images;
562 }
563 }
564
565 #[must_use]
578 pub fn from_update(update: ConversionOptionsUpdate) -> Self {
579 let mut options = Self::default();
580 options.apply_update(update);
581 options
582 }
583}
584
585impl From<ConversionOptionsUpdate> for ConversionOptions {
586 fn from(update: ConversionOptionsUpdate) -> Self {
587 Self::from_update(update)
588 }
589}
590
591#[derive(Debug, Clone)]
593#[cfg_attr(
594 any(feature = "serde", feature = "metadata"),
595 derive(serde::Serialize, serde::Deserialize)
596)]
597#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
598pub struct PreprocessingOptions {
599 pub enabled: bool,
601
602 pub preset: PreprocessingPreset,
604
605 pub remove_navigation: bool,
607
608 pub remove_forms: bool,
610}
611
612#[derive(Debug, Clone, Default)]
618#[cfg_attr(
619 any(feature = "serde", feature = "metadata"),
620 derive(serde::Serialize, serde::Deserialize)
621)]
622#[cfg_attr(any(feature = "serde", feature = "metadata"), serde(rename_all = "camelCase"))]
623pub struct PreprocessingOptionsUpdate {
624 pub enabled: Option<bool>,
626
627 pub preset: Option<PreprocessingPreset>,
629
630 pub remove_navigation: Option<bool>,
632
633 pub remove_forms: Option<bool>,
635}
636
637fn normalize_token(value: &str) -> String {
638 let mut out = String::with_capacity(value.len());
639 for ch in value.chars() {
640 if ch.is_ascii_alphanumeric() {
641 out.push(ch.to_ascii_lowercase());
642 }
643 }
644 out
645}
646
647#[cfg(any(feature = "serde", feature = "metadata"))]
648mod serde_impls {
649 use super::{
650 CodeBlockStyle, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle, PreprocessingPreset, WhitespaceMode,
651 };
652 use serde::{Deserialize, Serialize, Serializer};
653
654 macro_rules! impl_deserialize_from_parse {
655 ($ty:ty, $parser:expr) => {
656 impl<'de> Deserialize<'de> for $ty {
657 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
658 where
659 D: serde::Deserializer<'de>,
660 {
661 let value = String::deserialize(deserializer)?;
662 Ok($parser(&value))
663 }
664 }
665 };
666 }
667
668 impl_deserialize_from_parse!(HeadingStyle, HeadingStyle::parse);
669 impl_deserialize_from_parse!(ListIndentType, ListIndentType::parse);
670 impl_deserialize_from_parse!(WhitespaceMode, WhitespaceMode::parse);
671 impl_deserialize_from_parse!(NewlineStyle, NewlineStyle::parse);
672 impl_deserialize_from_parse!(CodeBlockStyle, CodeBlockStyle::parse);
673 impl_deserialize_from_parse!(HighlightStyle, HighlightStyle::parse);
674 impl_deserialize_from_parse!(PreprocessingPreset, PreprocessingPreset::parse);
675
676 impl Serialize for HeadingStyle {
678 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
679 where
680 S: Serializer,
681 {
682 let s = match self {
683 Self::Underlined => "underlined",
684 Self::Atx => "atx",
685 Self::AtxClosed => "atxclosed",
686 };
687 serializer.serialize_str(s)
688 }
689 }
690
691 impl Serialize for ListIndentType {
692 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
693 where
694 S: Serializer,
695 {
696 let s = match self {
697 Self::Spaces => "spaces",
698 Self::Tabs => "tabs",
699 };
700 serializer.serialize_str(s)
701 }
702 }
703
704 impl Serialize for WhitespaceMode {
705 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
706 where
707 S: Serializer,
708 {
709 let s = match self {
710 Self::Normalized => "normalized",
711 Self::Strict => "strict",
712 };
713 serializer.serialize_str(s)
714 }
715 }
716
717 impl Serialize for NewlineStyle {
718 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
719 where
720 S: Serializer,
721 {
722 let s = match self {
723 Self::Spaces => "spaces",
724 Self::Backslash => "backslash",
725 };
726 serializer.serialize_str(s)
727 }
728 }
729
730 impl Serialize for CodeBlockStyle {
731 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
732 where
733 S: Serializer,
734 {
735 let s = match self {
736 Self::Indented => "indented",
737 Self::Backticks => "backticks",
738 Self::Tildes => "tildes",
739 };
740 serializer.serialize_str(s)
741 }
742 }
743
744 impl Serialize for HighlightStyle {
745 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
746 where
747 S: Serializer,
748 {
749 let s = match self {
750 Self::DoubleEqual => "doubleequal",
751 Self::Html => "html",
752 Self::Bold => "bold",
753 Self::None => "none",
754 };
755 serializer.serialize_str(s)
756 }
757 }
758
759 impl Serialize for PreprocessingPreset {
760 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
761 where
762 S: Serializer,
763 {
764 let s = match self {
765 Self::Minimal => "minimal",
766 Self::Standard => "standard",
767 Self::Aggressive => "aggressive",
768 };
769 serializer.serialize_str(s)
770 }
771 }
772}
773
774impl Default for PreprocessingOptions {
775 fn default() -> Self {
776 Self {
777 enabled: false,
778 preset: PreprocessingPreset::default(),
779 remove_navigation: true,
780 remove_forms: true,
781 }
782 }
783}
784
785impl PreprocessingOptions {
786 #[allow(clippy::needless_pass_by_value)]
795 pub const fn apply_update(&mut self, update: PreprocessingOptionsUpdate) {
796 if let Some(enabled) = update.enabled {
797 self.enabled = enabled;
798 }
799 if let Some(preset) = update.preset {
800 self.preset = preset;
801 }
802 if let Some(remove_navigation) = update.remove_navigation {
803 self.remove_navigation = remove_navigation;
804 }
805 if let Some(remove_forms) = update.remove_forms {
806 self.remove_forms = remove_forms;
807 }
808 }
809
810 #[must_use]
823 pub fn from_update(update: PreprocessingOptionsUpdate) -> Self {
824 let mut options = Self::default();
825 options.apply_update(update);
826 options
827 }
828}
829
830impl From<PreprocessingOptionsUpdate> for PreprocessingOptions {
831 fn from(update: PreprocessingOptionsUpdate) -> Self {
832 Self::from_update(update)
833 }
834}
835
836#[cfg(all(test, any(feature = "serde", feature = "metadata")))]
837mod tests {
838 use super::*;
839
840 #[test]
841 fn test_conversion_options_serde() {
842 let mut options = ConversionOptions::default();
843 options.heading_style = HeadingStyle::AtxClosed;
844 options.list_indent_width = 4;
845 options.bullets = "*".to_string();
846 options.escape_asterisks = true;
847 options.whitespace_mode = WhitespaceMode::Strict;
848
849 let json = serde_json::to_string(&options).expect("Failed to serialize");
851
852 let deserialized: ConversionOptions = serde_json::from_str(&json).expect("Failed to deserialize");
854
855 assert_eq!(deserialized.list_indent_width, 4);
857 assert_eq!(deserialized.bullets, "*");
858 assert_eq!(deserialized.escape_asterisks, true);
859 assert_eq!(deserialized.heading_style, HeadingStyle::AtxClosed);
860 assert_eq!(deserialized.whitespace_mode, WhitespaceMode::Strict);
861 }
862
863 #[test]
864 fn test_preprocessing_options_serde() {
865 let mut options = PreprocessingOptions::default();
866 options.enabled = true;
867 options.preset = PreprocessingPreset::Aggressive;
868 options.remove_navigation = false;
869
870 let json = serde_json::to_string(&options).expect("Failed to serialize");
872
873 let deserialized: PreprocessingOptions = serde_json::from_str(&json).expect("Failed to deserialize");
875
876 assert_eq!(deserialized.enabled, true);
878 assert_eq!(deserialized.preset, PreprocessingPreset::Aggressive);
879 assert_eq!(deserialized.remove_navigation, false);
880 }
881
882 #[test]
883 fn test_enum_serialization() {
884 let heading = HeadingStyle::AtxClosed;
886 let json = serde_json::to_string(&heading).expect("Failed to serialize");
887 assert_eq!(json, r#""atxclosed""#);
888
889 let list_indent = ListIndentType::Tabs;
890 let json = serde_json::to_string(&list_indent).expect("Failed to serialize");
891 assert_eq!(json, r#""tabs""#);
892
893 let whitespace = WhitespaceMode::Strict;
894 let json = serde_json::to_string(&whitespace).expect("Failed to serialize");
895 assert_eq!(json, r#""strict""#);
896 }
897
898 #[test]
899 fn test_enum_deserialization() {
900 let heading: HeadingStyle = serde_json::from_str(r#""atxclosed""#).expect("Failed");
902 assert_eq!(heading, HeadingStyle::AtxClosed);
903
904 let heading: HeadingStyle = serde_json::from_str(r#""ATXCLOSED""#).expect("Failed");
905 assert_eq!(heading, HeadingStyle::AtxClosed);
906
907 let list_indent: ListIndentType = serde_json::from_str(r#""tabs""#).expect("Failed");
908 assert_eq!(list_indent, ListIndentType::Tabs);
909 }
910}