#![allow(clippy::too_many_arguments, clippy::trivially_copy_pass_by_ref)]
#![cfg_attr(all(feature = "simd", nightly), feature(portable_simd))]
pub mod error;
#[cfg(feature = "metadata")]
pub mod metadata;
pub mod options;
pub mod types;
#[cfg(feature = "visitor")]
pub mod visitor;
mod convert_api;
#[allow(dead_code)]
pub(crate) mod converter;
mod exports;
pub(crate) mod tl_types;
#[cfg(any(test, feature = "testkit"))]
#[allow(unused_imports)]
pub mod testkit {
pub use crate::converter::prescan;
pub use crate::converter::tier1;
}
#[cfg(any(test, feature = "testkit"))]
pub use converter::prescan;
#[cfg(any(test, feature = "testkit"))]
pub use converter::tier1;
#[cfg(feature = "inline-images")]
mod inline_images;
pub(crate) mod prelude;
mod rcdom;
#[cfg(all(feature = "simd", nightly))]
mod simd_scan;
pub(crate) mod text;
mod validation;
#[cfg(feature = "visitor")]
pub(crate) mod visitor_helpers;
pub(crate) mod wrapper;
pub use exports::*;
pub use types::{
AnnotationKind, ConversionResult, DocumentNode, DocumentStructure, GridCell, NodeContent,
ProcessingWarning, TableData, TableGrid, TextAnnotation, WarningKind,
};
#[cfg(feature = "visitor")]
pub use visitor::{NodeContext, NodeType, VisitResult};
pub use convert_api::convert;
#[cfg(test)]
mod basic_tests {
use super::*;
#[test]
fn test_binary_input_rejected() {
let html = format!("abc{}def", "\0".repeat(20));
let result = convert(&html, None);
assert!(matches!(result, Err(ConversionError::InvalidInput(_))));
}
#[test]
fn test_binary_magic_rejected() {
let html = "%PDF-1.7";
let result = convert(html, None);
assert!(matches!(result, Err(ConversionError::InvalidInput(_))));
}
#[test]
fn test_utf16_hint_recovered() {
let html = String::from_utf8_lossy(b"\xFF\xFE<\0h\0t\0m\0l\0>\0").to_string();
let result = convert(&html, None);
assert!(
result.is_ok(),
"UTF-16 input should be recovered instead of rejected"
);
}
#[test]
fn test_plain_text_allowed() {
let result = convert("Just text", None).unwrap();
let content = result.content.unwrap_or_default();
assert!(content.contains("Just text"));
}
#[test]
fn test_plain_text_escaped_when_enabled() {
let options = ConversionOptions {
escape_asterisks: true,
escape_underscores: true,
..ConversionOptions::default()
};
let result = convert("Text *asterisks* _underscores_", Some(options)).unwrap();
let content = result.content.unwrap_or_default();
assert!(content.contains(r"\*asterisks\*"));
assert!(content.contains(r"\_underscores\_"));
}
}