pub mod markdown;
pub mod structured;
pub mod text;
use crate::compress::{compress_block, compress_text};
use crate::types::{ContentType, UrlReference};
pub(crate) fn is_skippable(name: &str) -> bool {
matches!(
name,
"script" | "style" | "noscript" | "svg" | "head" | "template" | "iframe"
)
}
pub struct Converted {
pub content: String,
pub references: Vec<UrlReference>,
}
pub fn convert(html: &str, base_url: &str, content_type: ContentType) -> Converted {
match content_type {
ContentType::Text => {
let (body, references) = text::html_to_text_with_refs(html, base_url);
let body = compress_block(&body);
let refs_block = text::render_references(&references);
let content = if refs_block.is_empty() {
body
} else {
format!("{}\n\n{}", body, refs_block)
};
Converted {
content,
references,
}
}
ContentType::Markdown => {
let md = markdown::html_to_markdown(html, base_url);
Converted {
content: compress_block(&md),
references: Vec::new(),
}
}
ContentType::Structured => {
let doc = structured::html_to_structured(html, base_url);
let _ = compress_text; Converted {
content: structured::to_json(&doc),
references: doc.references,
}
}
}
}