use meta_language::{
canonical_document_format, document_format_profile, docx_package_is_recognized,
docx_profile_is_recognized, parse_markup_document, pdf_profile_is_recognized,
render_docx_package, LinkNetwork, ParseConfiguration, CROSS_FORMAT_CONCEPTS, DOCUMENT_FORMATS,
};
pub const DOCUMENT_FORMAT_ENGINE: &str = "meta_language";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DocumentFormatCapabilities {
pub format: String,
pub native_concepts: Vec<String>,
pub fallbacks: Vec<(String, String)>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DocumentConversion {
pub source_format: String,
pub target_format: String,
pub output: String,
pub target_capabilities: DocumentFormatCapabilities,
pub package_bytes: Option<Vec<u8>>,
}
#[must_use]
pub const fn supported_document_formats() -> &'static [&'static str] {
DOCUMENT_FORMATS
}
#[must_use]
pub const fn cross_format_document_concepts() -> &'static [&'static str] {
CROSS_FORMAT_CONCEPTS
}
#[must_use]
pub fn canonical_document_format_label(format: &str) -> Option<&'static str> {
canonical_document_format(format)
}
#[must_use]
pub fn document_format_capabilities(format: &str) -> Option<DocumentFormatCapabilities> {
let canonical = canonical_document_format(format)?;
let profile = document_format_profile(canonical)?;
let native_concepts = CROSS_FORMAT_CONCEPTS
.iter()
.copied()
.filter(|concept| profile.supports_concept(concept))
.map(str::to_owned)
.collect();
let fallbacks = CROSS_FORMAT_CONCEPTS
.iter()
.copied()
.filter_map(|concept| {
profile
.concept_fallback(concept)
.map(|fallback| (concept.to_owned(), fallback.to_owned()))
})
.collect();
Some(DocumentFormatCapabilities {
format: canonical.to_owned(),
native_concepts,
fallbacks,
})
}
#[must_use]
pub fn document_profile_is_recognized(format: &str, text: &str) -> bool {
match canonical_document_format(format) {
Some("PDF") => pdf_profile_is_recognized(text),
Some("DOCX") => docx_profile_is_recognized(text),
Some(canonical) => parse_markup_document(canonical, text)
.is_some_and(|document| !document.blocks.is_empty()),
None => false,
}
}
#[must_use]
pub fn document_package_is_recognized(format: &str, bytes: &[u8]) -> bool {
match canonical_document_format(format) {
Some("DOCX") => docx_package_is_recognized(bytes),
_ => false,
}
}
#[must_use]
pub fn convert_document_format(
source_format: &str,
target_format: &str,
source_text: &str,
) -> Option<DocumentConversion> {
let source = canonical_document_format(source_format)?;
let target = canonical_document_format(target_format)?;
let network = LinkNetwork::parse(source_text, source, ParseConfiguration::default());
let output = network.reconstruct_text_as(target, ParseConfiguration::default());
let target_capabilities = document_format_capabilities(target)?;
let package_bytes = package_bytes_for_target(source, target, source_text);
Some(DocumentConversion {
source_format: source.to_owned(),
target_format: target.to_owned(),
output,
target_capabilities,
package_bytes,
})
}
fn package_bytes_for_target(
source_format: &str,
target_format: &str,
source_text: &str,
) -> Option<Vec<u8>> {
if target_format != "DOCX" {
return None;
}
let document = parse_markup_document(source_format, source_text)?;
(!document.blocks.is_empty()).then(|| render_docx_package(&document))
}