use crate::core::Document;
use crate::text::document_structure::DocumentStructure;
pub trait LayoutParser: Send + Sync {
fn parse(&self, content: &str) -> DocumentStructure;
fn supports_format(&self, format: &str) -> bool;
fn name(&self) -> &'static str;
}
pub struct LayoutParserFactory;
impl LayoutParserFactory {
pub fn create_for_document(document: &Document) -> Box<dyn LayoutParser> {
if document.title.ends_with(".md") || document.title.ends_with(".markdown") {
return Box::new(crate::text::parsers::MarkdownLayoutParser::new());
}
if document.title.ends_with(".html") || document.title.ends_with(".htm") {
return Box::new(crate::text::parsers::HtmlLayoutParser::new());
}
if document.content.contains("<h1")
|| document.content.contains("<h2")
|| document.content.contains("<html")
|| document.content.contains("<!DOCTYPE")
{
return Box::new(crate::text::parsers::HtmlLayoutParser::new());
}
if document
.content
.lines()
.any(|line| line.trim_start().starts_with('#'))
{
return Box::new(crate::text::parsers::MarkdownLayoutParser::new());
}
Box::new(crate::text::parsers::PlainTextLayoutParser::new())
}
pub fn create_for_format(format: &str) -> Box<dyn LayoutParser> {
match format.to_lowercase().as_str() {
"markdown" | "md" => Box::new(crate::text::parsers::MarkdownLayoutParser::new()),
"html" | "htm" => Box::new(crate::text::parsers::HtmlLayoutParser::new()),
"text" | "txt" | "plain" => {
Box::new(crate::text::parsers::PlainTextLayoutParser::new())
},
_ => Box::new(crate::text::parsers::PlainTextLayoutParser::new()),
}
}
}