use crate::model::Metadata;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RenderResult {
pub content: String,
pub metadata: Metadata,
pub stats: ExtractionStats,
}
impl RenderResult {
pub fn new(content: String, metadata: Metadata, stats: ExtractionStats) -> Self {
Self {
content,
metadata,
stats,
}
}
pub fn content_only(content: String) -> Self {
Self {
content,
metadata: Metadata::default(),
stats: ExtractionStats::default(),
}
}
pub fn content_len(&self) -> usize {
self.content.len()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ExtractionStats {
pub page_count: u32,
pub paragraph_count: u32,
pub table_count: u32,
pub image_count: u32,
pub list_item_count: u32,
pub word_count: u32,
pub char_count: u32,
pub heading_count: u32,
pub horizontal_rule_count: u32,
}
impl ExtractionStats {
pub fn new() -> Self {
Self::default()
}
pub fn add_paragraph(&mut self) {
self.paragraph_count += 1;
}
pub fn add_table(&mut self) {
self.table_count += 1;
}
pub fn add_image(&mut self) {
self.image_count += 1;
}
pub fn add_list_item(&mut self) {
self.list_item_count += 1;
}
pub fn add_heading(&mut self) {
self.heading_count += 1;
}
pub fn add_horizontal_rule(&mut self) {
self.horizontal_rule_count += 1;
}
pub fn add_page(&mut self) {
self.page_count += 1;
}
pub fn count_text(&mut self, text: &str) {
self.word_count += text.split_whitespace().count() as u32;
self.char_count += text.chars().filter(|c| !c.is_whitespace()).count() as u32;
}
pub fn merge(&mut self, other: &ExtractionStats) {
self.page_count += other.page_count;
self.paragraph_count += other.paragraph_count;
self.table_count += other.table_count;
self.image_count += other.image_count;
self.list_item_count += other.list_item_count;
self.word_count += other.word_count;
self.char_count += other.char_count;
self.heading_count += other.heading_count;
self.horizontal_rule_count += other.horizontal_rule_count;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extraction_stats_count_text() {
let mut stats = ExtractionStats::new();
stats.count_text("Hello, world! This is a test.");
assert_eq!(stats.word_count, 6);
assert_eq!(stats.char_count, 24);
}
#[test]
fn test_extraction_stats_merge() {
let mut stats1 = ExtractionStats::new();
stats1.paragraph_count = 5;
stats1.table_count = 2;
let stats2 = ExtractionStats {
paragraph_count: 3,
table_count: 1,
image_count: 4,
..Default::default()
};
stats1.merge(&stats2);
assert_eq!(stats1.paragraph_count, 8);
assert_eq!(stats1.table_count, 3);
assert_eq!(stats1.image_count, 4);
}
#[test]
fn test_render_result_content_only() {
let result = RenderResult::content_only("# Hello".to_string());
assert_eq!(result.content, "# Hello");
assert_eq!(result.stats.paragraph_count, 0);
}
}