use crate::language_profile::LanguageProfile;
use crate::link_network::LinkType;
pub const DOCUMENT_FORMATS: &[&str] = &["txt", "Markdown", "HTML", "PDF", "DOCX"];
pub const CROSS_FORMAT_CONCEPTS: &[&str] = &[
"heading",
"paragraph",
"bullet-list",
"ordered-list",
"list-item",
"strong",
"emphasis",
"hyperlink",
];
#[must_use]
pub fn document_format_profile(format: &str) -> Option<LanguageProfile> {
let canonical = canonical_document_format(format)?;
let profile = base_profile(canonical);
Some(match canonical {
"txt" => txt_profile(profile),
"Markdown" => markdown_profile(profile),
"HTML" => html_profile(profile),
"PDF" => pdf_profile(profile),
"DOCX" => docx_profile(profile),
_ => unreachable!("canonical_document_format only yields known formats"),
})
}
#[must_use]
pub fn canonical_document_format(format: &str) -> Option<&'static str> {
match format.to_ascii_lowercase().as_str() {
"txt" | "text" | "plain-text" | "plaintext" => Some("txt"),
"markdown" | "md" => Some("Markdown"),
"html" | "htm" => Some("HTML"),
"pdf" => Some("PDF"),
"docx" => Some("DOCX"),
_ => None,
}
}
fn base_profile(canonical: &str) -> LanguageProfile {
LanguageProfile::new(canonical, canonical)
.with_link_type(LinkType::Document)
.with_link_type(LinkType::Concept)
.with_link_type(LinkType::Token)
}
fn with_supported<'a>(
mut profile: LanguageProfile,
concepts: impl IntoIterator<Item = &'a str>,
) -> LanguageProfile {
for concept in concepts {
profile = profile.with_concept(concept);
}
profile
}
fn txt_profile(profile: LanguageProfile) -> LanguageProfile {
with_supported(profile, ["paragraph"])
.with_concept_fallback(
"heading",
"flattened to a plain paragraph (heading level dropped)",
)
.with_concept_fallback(
"bullet-list",
"flattened to plain lines with a `- ` marker per item",
)
.with_concept_fallback(
"ordered-list",
"flattened to plain lines with a `N. ` marker per item",
)
.with_concept_fallback("list-item", "rendered as a single plain line")
.with_concept_fallback("strong", "rendered as unstyled plain text")
.with_concept_fallback("emphasis", "rendered as unstyled plain text")
.with_concept_fallback("hyperlink", "rendered as its visible text (URL dropped)")
}
fn markdown_profile(profile: LanguageProfile) -> LanguageProfile {
with_supported(
profile,
[
"heading",
"paragraph",
"bullet-list",
"list-item",
"strong",
"emphasis",
"hyperlink",
],
)
.with_concept_fallback(
"ordered-list",
"rendered with bullet `- ` markers (ordering not preserved by the Markdown profile)",
)
}
fn html_profile(profile: LanguageProfile) -> LanguageProfile {
with_supported(profile, CROSS_FORMAT_CONCEPTS.iter().copied())
}
fn pdf_profile(profile: LanguageProfile) -> LanguageProfile {
with_supported(
profile,
[
"heading",
"paragraph",
"bullet-list",
"ordered-list",
"list-item",
"strong",
"emphasis",
],
)
.with_concept_fallback(
"hyperlink",
"rendered as its visible text, unstyled (URL dropped)",
)
}
fn docx_profile(profile: LanguageProfile) -> LanguageProfile {
with_supported(
profile,
[
"heading",
"paragraph",
"bullet-list",
"ordered-list",
"list-item",
"strong",
"emphasis",
],
)
.with_concept_fallback(
"hyperlink",
"rendered as its visible text, unstyled (URL dropped)",
)
}