html_to_markdown_rs/
lib.rs1#![allow(
2 clippy::too_many_lines,
3 clippy::option_if_let_else,
4 clippy::match_wildcard_for_single_variants,
5 clippy::needless_pass_by_value,
6 clippy::struct_excessive_bools,
7 clippy::fn_params_excessive_bools,
8 clippy::branches_sharing_code,
9 clippy::match_same_arms,
10 clippy::missing_errors_doc,
11 clippy::items_after_statements,
12 clippy::doc_markdown,
13 clippy::cast_sign_loss,
14 clippy::default_trait_access,
15 clippy::unused_self,
16 clippy::cast_precision_loss,
17 clippy::collapsible_if,
18 clippy::too_many_arguments,
19 clippy::collapsible_else_if,
20 clippy::extra_unused_lifetimes,
21 clippy::unnecessary_lazy_evaluations,
22 clippy::must_use_candidate,
23 clippy::trivially_copy_pass_by_ref,
24 clippy::explicit_iter_loop,
25 clippy::missing_const_for_fn,
26 clippy::manual_assert,
27 clippy::return_self_not_must_use,
28 clippy::collapsible_match,
29 clippy::cast_possible_truncation,
30 clippy::map_unwrap_or,
31 clippy::manual_let_else,
32 clippy::used_underscore_binding,
33 clippy::assigning_clones,
34 clippy::uninlined_format_args
35)]
36
37pub mod converter;
51pub mod error;
52#[cfg(feature = "inline-images")]
53mod inline_images;
54#[cfg(feature = "metadata")]
55pub mod metadata;
56pub mod options;
57pub mod safety;
58pub mod text;
59pub mod types;
60#[cfg(feature = "visitor")]
61pub mod visitor;
62#[cfg(feature = "visitor")]
63pub mod visitor_helpers;
64pub mod wrapper;
65
66mod convert_api;
68mod exports;
69pub mod prelude;
70mod rcdom;
71mod validation;
72
73pub use exports::*;
78pub use types::{
79 AnnotationKind, ConversionResult, DocumentNode, DocumentStructure, GridCell, NodeContent, ProcessingWarning,
80 TableData, TableGrid, TextAnnotation, WarningKind,
81};
82
83pub use convert_api::convert;
88
89#[cfg(any(feature = "serde", feature = "metadata"))]
90pub use convert_api::{conversion_options_from_json, conversion_options_update_from_json};
91
92#[cfg(feature = "metadata")]
93pub use convert_api::metadata_config_from_json;
94
95#[cfg(feature = "inline-images")]
96pub use convert_api::inline_image_config_from_json;
97
98#[cfg(feature = "visitor")]
99#[doc(hidden)]
100pub use convert_api::convert_with_visitor;
101
102#[cfg(test)]
106mod basic_tests {
107 use super::*;
108
109 #[test]
110 fn test_binary_input_rejected() {
111 let html = format!("abc{}def", "\0".repeat(20));
112 let result = convert(&html, None);
113 assert!(matches!(result, Err(ConversionError::InvalidInput(_))));
114 }
115
116 #[test]
117 fn test_binary_magic_rejected() {
118 let html = "%PDF-1.7";
119 let result = convert(html, None);
120 assert!(matches!(result, Err(ConversionError::InvalidInput(_))));
121 }
122
123 #[test]
124 fn test_utf16_hint_recovered() {
125 let html = String::from_utf8_lossy(b"\xFF\xFE<\0h\0t\0m\0l\0>\0").to_string();
126 let result = convert(&html, None);
127 assert!(result.is_ok(), "UTF-16 input should be recovered instead of rejected");
128 }
129
130 #[test]
131 fn test_plain_text_allowed() {
132 let result = convert("Just text", None).unwrap();
133 let content = result.content.unwrap_or_default();
134 assert!(content.contains("Just text"));
135 }
136
137 #[test]
138 fn test_plain_text_escaped_when_enabled() {
139 let options = ConversionOptions {
140 escape_asterisks: true,
141 escape_underscores: true,
142 ..ConversionOptions::default()
143 };
144 let result = convert("Text *asterisks* _underscores_", Some(options)).unwrap();
145 let content = result.content.unwrap_or_default();
146 assert!(content.contains(r"\*asterisks\*"));
147 assert!(content.contains(r"\_underscores\_"));
148 }
149}