use kawat_metadata::DocumentMetadata;
use serde::{Deserialize, Serialize};
use crate::config::ExtractorOptions;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Document {
#[serde(flatten)]
pub metadata: DocumentMetadata,
pub body: String,
pub comments: Option<String>,
pub raw_text: Option<String>,
#[serde(skip)]
pub text: Option<String>,
}
impl Document {
pub fn to_formatted_string(&self, _options: &ExtractorOptions) -> String {
self.body.clone()
}
pub fn as_map(&self) -> std::collections::HashMap<String, Option<String>> {
let mut map = std::collections::HashMap::new();
map.insert("title".into(), self.metadata.title.clone());
map.insert("author".into(), self.metadata.author.clone());
map.insert("url".into(), self.metadata.url.clone());
map.insert("date".into(), self.metadata.date.clone());
map.insert("body".into(), Some(self.body.clone()));
map.insert("comments".into(), self.comments.clone());
map
}
}