use super::{ExtractionQuality, FormField, Page, Resource};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
pub metadata: Metadata,
pub pages: Vec<Page>,
pub resources: HashMap<String, Resource>,
pub outline: Option<Outline>,
pub extraction_quality: ExtractionQuality,
pub form_fields: Vec<FormField>,
}
impl Document {
pub fn new() -> Self {
Self {
metadata: Metadata::default(),
pages: Vec::new(),
resources: HashMap::new(),
outline: None,
extraction_quality: ExtractionQuality::default(),
form_fields: Vec::new(),
}
}
pub fn page_count(&self) -> u32 {
self.pages.len() as u32
}
pub fn get_page(&self, page_num: u32) -> Option<&Page> {
if page_num == 0 {
return None;
}
self.pages.get((page_num - 1) as usize)
}
pub fn add_page(&mut self, page: Page) {
self.pages.push(page);
}
pub fn add_resource(&mut self, id: String, resource: Resource) {
self.resources.insert(id, resource);
}
pub fn get_resource(&self, id: &str) -> Option<&Resource> {
self.resources.get(id)
}
pub fn is_empty(&self) -> bool {
self.pages.is_empty()
}
pub fn plain_text(&self) -> String {
self.pages
.iter()
.map(|page| page.plain_text())
.collect::<Vec<_>>()
.join("\n\n")
}
}
impl Default for Document {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Metadata {
pub title: Option<String>,
pub author: Option<String>,
pub subject: Option<String>,
pub keywords: Option<String>,
pub creator: Option<String>,
pub producer: Option<String>,
pub created: Option<DateTime<Utc>>,
pub modified: Option<DateTime<Utc>>,
pub pdf_version: String,
pub page_count: u32,
pub encrypted: bool,
pub tagged: bool,
}
impl Metadata {
pub fn with_version(version: impl Into<String>) -> Self {
Self {
pdf_version: version.into(),
..Default::default()
}
}
pub fn to_yaml_frontmatter(&self) -> String {
let mut lines = vec!["---".to_string()];
if let Some(ref title) = self.title {
lines.push(format!("title: \"{}\"", escape_yaml(title)));
}
if let Some(ref author) = self.author {
lines.push(format!("author: \"{}\"", escape_yaml(author)));
}
if let Some(ref keywords) = self.keywords {
if !keywords.trim().is_empty() {
lines.push(format!("keywords: \"{}\"", escape_yaml(keywords)));
}
}
lines.push(format!("pages: {}", self.page_count));
lines.push("---".to_string());
lines.push(String::new());
lines.join("\n")
}
}
fn escape_yaml(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Outline {
pub items: Vec<OutlineItem>,
}
impl Outline {
pub fn new() -> Self {
Self { items: Vec::new() }
}
pub fn add_item(&mut self, item: OutlineItem) {
self.items.push(item);
}
pub fn is_empty(&self) -> bool {
self.items.is_empty()
}
pub fn total_items(&self) -> usize {
fn count_items(items: &[OutlineItem]) -> usize {
items
.iter()
.map(|item| 1 + count_items(&item.children))
.sum()
}
count_items(&self.items)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OutlineItem {
pub title: String,
pub page: Option<u32>,
pub level: u8,
pub children: Vec<OutlineItem>,
}
impl OutlineItem {
pub fn new(title: impl Into<String>, page: Option<u32>, level: u8) -> Self {
Self {
title: title.into(),
page,
level,
children: Vec::new(),
}
}
pub fn add_child(&mut self, child: OutlineItem) {
self.children.push(child);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_document_new() {
let doc = Document::new();
assert!(doc.is_empty());
assert_eq!(doc.page_count(), 0);
}
#[test]
fn test_metadata_frontmatter() {
let mut metadata = Metadata::with_version("1.7");
metadata.title = Some("Test Document".to_string());
metadata.author = Some("John Doe".to_string());
metadata.page_count = 10;
let yaml = metadata.to_yaml_frontmatter();
assert!(yaml.contains("title: \"Test Document\""));
assert!(yaml.contains("author: \"John Doe\""));
assert!(yaml.contains("pages: 10"));
assert!(!yaml.contains("pdf_version"));
}
#[test]
fn test_outline() {
let mut outline = Outline::new();
let mut chapter1 = OutlineItem::new("Chapter 1", Some(1), 0);
chapter1.add_child(OutlineItem::new("Section 1.1", Some(2), 1));
chapter1.add_child(OutlineItem::new("Section 1.2", Some(5), 1));
outline.add_item(chapter1);
assert_eq!(outline.total_items(), 3);
}
}