pub mod markdown;
pub mod pdf;
pub mod toc;
pub mod types;
pub use types::{DocumentFormat, DocumentMeta, ParseResult, RawNode};
use std::path::Path;
use crate::error::Result;
use crate::index::parse::markdown::MarkdownParser;
pub async fn parse_content(content: &str, format: DocumentFormat) -> Result<ParseResult> {
match format {
DocumentFormat::Markdown => {
let parser = MarkdownParser::new();
parser.parse(content).await
}
DocumentFormat::Pdf => Err(crate::Error::Parse(
"PDF requires bytes, not string content".to_string(),
)),
}
}
pub async fn parse_file(path: &Path, format: DocumentFormat) -> Result<ParseResult> {
match format {
DocumentFormat::Markdown => {
let parser = MarkdownParser::new();
parser.parse_file(path).await
}
DocumentFormat::Pdf => {
let parser = pdf::PdfParser::new();
parser.parse_file(path).await
}
}
}
pub async fn parse_bytes(bytes: &[u8], format: DocumentFormat) -> Result<ParseResult> {
match format {
DocumentFormat::Markdown => {
let content = std::str::from_utf8(bytes)
.map_err(|e| crate::Error::Parse(format!("Invalid UTF-8 content: {}", e)))?;
let parser = MarkdownParser::new();
parser.parse(content).await
}
DocumentFormat::Pdf => {
let parser = pdf::PdfParser::new();
parser.parse_bytes_async(bytes, None).await
}
}
}
pub fn format_from_extension(ext: &str) -> Option<DocumentFormat> {
DocumentFormat::from_extension(ext)
}