use crate::error::{PageError, Pdf2MdError};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConversionOutput {
pub markdown: String,
pub pages: Vec<PageResult>,
pub metadata: DocumentMetadata,
pub stats: ConversionStats,
}
impl ConversionOutput {
pub fn failed_pages(&self) -> usize {
self.stats.failed_pages
}
pub fn into_result(self) -> Result<Self, Pdf2MdError> {
let failed = self.stats.failed_pages;
if failed > 0 {
Err(Pdf2MdError::PartialFailure {
success: self.stats.processed_pages,
failed,
total: self.stats.total_pages,
})
} else {
Ok(self)
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PageResult {
pub page_num: usize,
pub markdown: String,
pub input_tokens: usize,
pub output_tokens: usize,
pub duration_ms: u64,
pub retries: u8,
pub error: Option<PageError>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ConversionStats {
pub total_pages: usize,
pub processed_pages: usize,
pub failed_pages: usize,
pub skipped_pages: usize,
pub total_input_tokens: u64,
pub total_output_tokens: u64,
pub total_duration_ms: u64,
pub render_duration_ms: u64,
pub llm_duration_ms: u64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DocumentMetadata {
pub title: Option<String>,
pub author: Option<String>,
pub subject: Option<String>,
pub creator: Option<String>,
pub producer: Option<String>,
pub creation_date: Option<String>,
pub modification_date: Option<String>,
pub page_count: usize,
pub pdf_version: String,
pub is_encrypted: bool,
}
#[cfg(test)]
mod tests {
use super::*;
fn make_output(failed: usize, processed: usize, total: usize) -> ConversionOutput {
ConversionOutput {
markdown: "# Hello".into(),
pages: vec![],
metadata: DocumentMetadata::default(),
stats: ConversionStats {
total_pages: total,
processed_pages: processed,
failed_pages: failed,
..Default::default()
},
}
}
#[test]
fn failed_pages_matches_stats() {
let out = make_output(2, 8, 10);
assert_eq!(out.failed_pages(), 2);
}
#[test]
fn into_result_ok_when_no_failures() {
let out = make_output(0, 5, 5);
assert!(out.into_result().is_ok());
}
#[test]
fn into_result_err_on_partial_failure() {
let out = make_output(1, 9, 10);
let err = out.into_result().unwrap_err();
match err {
Pdf2MdError::PartialFailure {
success,
failed,
total,
} => {
assert_eq!((success, failed, total), (9, 1, 10));
}
other => panic!("expected PartialFailure, got {other:?}"),
}
}
}