1pub mod markdown;
4pub mod structured;
5pub mod text;
6
7use crate::compress::{compress_block, compress_text};
8use crate::types::{ContentType, UrlReference};
9
10pub(crate) fn is_skippable(name: &str) -> bool {
14 matches!(
15 name,
16 "script" | "style" | "noscript" | "svg" | "head" | "template" | "iframe"
17 )
18}
19
20pub struct Converted {
22 pub content: String,
23 pub references: Vec<UrlReference>,
24}
25
26pub fn convert(html: &str, base_url: &str, content_type: ContentType) -> Converted {
31 match content_type {
32 ContentType::Text => {
33 let (body, references) = text::html_to_text_with_refs(html, base_url);
34 let body = compress_block(&body);
35 let refs_block = text::render_references(&references);
36 let content = if refs_block.is_empty() {
37 body
38 } else {
39 format!("{}\n\n{}", body, refs_block)
40 };
41 Converted {
42 content,
43 references,
44 }
45 }
46 ContentType::Markdown => {
47 let md = markdown::html_to_markdown(html, base_url);
48 Converted {
49 content: compress_block(&md),
50 references: Vec::new(),
51 }
52 }
53 ContentType::Structured => {
54 let doc = structured::html_to_structured(html, base_url);
55 let _ = compress_text; Converted {
57 content: structured::to_json(&doc),
58 references: doc.references,
59 }
60 }
61 }
62}