1#[derive(Debug, Clone, PartialEq)]
2pub struct ConversionResult {
3 pub document: Document,
4 pub warnings: Vec<Warning>,
5}
6
7#[derive(Debug, Clone, PartialEq)]
8pub struct Document {
9 pub metadata: DocumentMetadata,
10 pub nodes: Vec<Node>,
11}
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct DocumentMetadata {
15 pub title: Option<String>,
16 pub author: Option<String>,
17 pub page_count: usize,
18}
19
20#[derive(Debug, Clone, PartialEq)]
21pub enum Node {
22 Heading { level: u8, spans: Vec<Span> },
23 Paragraph { spans: Vec<Span> },
24 RawText(String),
25}
26
27#[derive(Debug, Clone, PartialEq)]
28pub struct Span {
29 pub text: String,
30 pub bold: bool,
31 pub italic: bool,
32 pub font_size: f32,
33 pub font_name: Option<String>,
34}
35
36#[derive(Debug, Clone, PartialEq)]
37pub enum Warning {
38 MissingFontMetrics { font_name: String, page: usize },
39 UnreadableTextStream { page: usize, detail: String },
40 UnsupportedEncoding { encoding: String, page: usize },
41 MalformedPdfObject { detail: String },
42}
43
44impl Document {
45 pub fn to_markdown(&self) -> String {
46 crate::renderer::render_document(self)
47 }
48}
49
50impl ConversionResult {
51 pub fn to_markdown(&self) -> String {
52 self.document.to_markdown()
53 }
54}
55
56#[cfg(test)]
57mod tests {
58 use super::*;
59
60 #[test]
61 fn conversion_result_keeps_warnings_and_raw_text() {
62 let result = ConversionResult {
63 document: Document {
64 metadata: DocumentMetadata {
65 title: None,
66 author: None,
67 page_count: 0,
68 },
69 nodes: vec![Node::RawText("fallback".to_string())],
70 },
71 warnings: vec![Warning::MalformedPdfObject {
72 detail: "broken object".to_string(),
73 }],
74 };
75
76 assert_eq!(result.document.nodes.len(), 1);
77 assert_eq!(result.warnings.len(), 1);
78 }
79
80 #[test]
81 fn raw_text_variant_round_trips() {
82 let node = Node::RawText("unclassified".to_string());
83 match node {
84 Node::RawText(s) => assert_eq!(s, "unclassified"),
85 _ => panic!("expected raw text"),
86 }
87 }
88}