Skip to main content

papyrus_core/ast/
mod.rs

1#[derive(Debug, Clone, PartialEq)]
2pub struct ConversionResult {
3    pub document: Document,
4    pub warnings: Vec<Warning>,
5}
6
7#[derive(Debug, Clone, PartialEq)]
8pub struct Document {
9    pub metadata: DocumentMetadata,
10    pub nodes: Vec<Node>,
11}
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct DocumentMetadata {
15    pub title: Option<String>,
16    pub author: Option<String>,
17    pub page_count: usize,
18}
19
20#[derive(Debug, Clone, PartialEq)]
21pub enum Node {
22    Heading { level: u8, spans: Vec<Span> },
23    Paragraph { spans: Vec<Span> },
24    RawText(String),
25}
26
27#[derive(Debug, Clone, PartialEq)]
28pub struct Span {
29    pub text: String,
30    pub bold: bool,
31    pub italic: bool,
32    pub font_size: f32,
33    pub font_name: Option<String>,
34}
35
36#[derive(Debug, Clone, PartialEq)]
37pub enum Warning {
38    MissingFontMetrics { font_name: String, page: usize },
39    UnreadableTextStream { page: usize, detail: String },
40    UnsupportedEncoding { encoding: String, page: usize },
41    MalformedPdfObject { detail: String },
42}
43
44impl Document {
45    pub fn to_markdown(&self) -> String {
46        crate::renderer::render_document(self)
47    }
48}
49
50impl ConversionResult {
51    pub fn to_markdown(&self) -> String {
52        self.document.to_markdown()
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59
60    #[test]
61    fn conversion_result_keeps_warnings_and_raw_text() {
62        let result = ConversionResult {
63            document: Document {
64                metadata: DocumentMetadata {
65                    title: None,
66                    author: None,
67                    page_count: 0,
68                },
69                nodes: vec![Node::RawText("fallback".to_string())],
70            },
71            warnings: vec![Warning::MalformedPdfObject {
72                detail: "broken object".to_string(),
73            }],
74        };
75
76        assert_eq!(result.document.nodes.len(), 1);
77        assert_eq!(result.warnings.len(), 1);
78    }
79
80    #[test]
81    fn raw_text_variant_round_trips() {
82        let node = Node::RawText("unclassified".to_string());
83        match node {
84            Node::RawText(s) => assert_eq!(s, "unclassified"),
85            _ => panic!("expected raw text"),
86        }
87    }
88}