Skip to main content

ppt_rs/oxml/
presentation.rs

1//! Presentation XML parsing and reading
2//!
3//! Parses presentation.xml and provides high-level access to presentation content.
4
5use super::slide::{ParsedSlide, SlideParser};
6use super::xmlchemy::XmlParser;
7use crate::exc::PptxError;
8use crate::opc::Package;
9
10/// Parsed presentation metadata
11#[derive(Debug, Clone)]
12pub struct PresentationInfo {
13    pub title: Option<String>,
14    pub creator: Option<String>,
15    pub last_modified_by: Option<String>,
16    pub created: Option<String>,
17    pub modified: Option<String>,
18    pub revision: Option<u32>,
19    pub slide_count: usize,
20}
21
22impl PresentationInfo {
23    pub fn new() -> Self {
24        PresentationInfo {
25            title: None,
26            creator: None,
27            last_modified_by: None,
28            created: None,
29            modified: None,
30            revision: None,
31            slide_count: 0,
32        }
33    }
34}
35
36impl Default for PresentationInfo {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42/// Presentation reader for parsing PPTX files
43pub struct PresentationReader {
44    package: Package,
45    info: PresentationInfo,
46    slide_paths: Vec<String>,
47}
48
49impl PresentationReader {
50    /// Open a PPTX file for reading
51    pub fn open(path: &str) -> Result<Self, PptxError> {
52        let package = Package::open(path)?;
53        let mut reader = PresentationReader {
54            package,
55            info: PresentationInfo::new(),
56            slide_paths: Vec::new(),
57        };
58        reader.parse_structure()?;
59        Ok(reader)
60    }
61
62    /// Get presentation info
63    pub fn info(&self) -> &PresentationInfo {
64        &self.info
65    }
66
67    /// Get number of slides
68    pub fn slide_count(&self) -> usize {
69        self.slide_paths.len()
70    }
71
72    /// Get slide by index (0-based)
73    pub fn get_slide(&self, index: usize) -> Result<ParsedSlide, PptxError> {
74        let path = self
75            .slide_paths
76            .get(index)
77            .ok_or_else(|| PptxError::NotFound(format!("Slide {index} not found")))?;
78
79        let xml = self
80            .package
81            .get_part(path)
82            .ok_or_else(|| PptxError::NotFound(format!("Slide file not found: {path}")))?;
83
84        let xml_str = String::from_utf8_lossy(xml);
85        SlideParser::parse(&xml_str)
86    }
87
88    /// Get all slides
89    pub fn get_all_slides(&self) -> Result<Vec<ParsedSlide>, PptxError> {
90        let mut slides = Vec::new();
91        for i in 0..self.slide_paths.len() {
92            slides.push(self.get_slide(i)?);
93        }
94        Ok(slides)
95    }
96
97    /// Get all text from presentation
98    pub fn extract_all_text(&self) -> Result<Vec<String>, PptxError> {
99        let mut all_text = Vec::new();
100        for slide in self.get_all_slides()? {
101            all_text.extend(slide.all_text());
102        }
103        Ok(all_text)
104    }
105
106    /// Parse presentation structure
107    fn parse_structure(&mut self) -> Result<(), PptxError> {
108        // Parse core properties
109        self.parse_core_properties()?;
110
111        // Parse presentation.xml to get slide list
112        self.parse_presentation_xml()?;
113
114        Ok(())
115    }
116
117    fn parse_core_properties(&mut self) -> Result<(), PptxError> {
118        if let Some(core_xml) = self.package.get_part("docProps/core.xml") {
119            let xml_str = String::from_utf8_lossy(core_xml);
120            if let Ok(root) = XmlParser::parse_str(&xml_str) {
121                self.info.title = root
122                    .find_descendant("title")
123                    .map(|e| e.text_content())
124                    .filter(|s| !s.is_empty());
125
126                self.info.creator = root
127                    .find_descendant("creator")
128                    .map(|e| e.text_content())
129                    .filter(|s| !s.is_empty());
130
131                self.info.last_modified_by = root
132                    .find_descendant("lastModifiedBy")
133                    .map(|e| e.text_content())
134                    .filter(|s| !s.is_empty());
135
136                self.info.created = root
137                    .find_descendant("created")
138                    .map(|e| e.text_content())
139                    .filter(|s| !s.is_empty());
140
141                self.info.modified = root
142                    .find_descendant("modified")
143                    .map(|e| e.text_content())
144                    .filter(|s| !s.is_empty());
145
146                self.info.revision = root
147                    .find_descendant("revision")
148                    .and_then(|e| e.text_content().parse().ok());
149            }
150        }
151        Ok(())
152    }
153
154    fn parse_presentation_xml(&mut self) -> Result<(), PptxError> {
155        // First, find slide references from presentation.xml.rels
156        if let Some(rels_xml) = self.package.get_part("ppt/_rels/presentation.xml.rels") {
157            let xml_str = String::from_utf8_lossy(rels_xml);
158            if let Ok(root) = XmlParser::parse_str(&xml_str) {
159                let mut slide_rels: Vec<(String, String)> = Vec::new();
160
161                for rel in root.find_all("Relationship") {
162                    let rel_type = rel.attr("Type").unwrap_or("");
163                    if rel_type.contains("/slide")
164                        && !rel_type.contains("Layout")
165                        && !rel_type.contains("Master")
166                    {
167                        if let (Some(id), Some(target)) = (rel.attr("Id"), rel.attr("Target")) {
168                            let full_path = if target.starts_with('/') {
169                                target[1..].to_string()
170                            } else {
171                                format!("ppt/{target}")
172                            };
173                            slide_rels.push((id.to_string(), full_path));
174                        }
175                    }
176                }
177
178                // Sort by relationship ID to maintain slide order
179                slide_rels.sort_by(|a, b| {
180                    let num_a: u32 = a.0.trim_start_matches("rId").parse().unwrap_or(0);
181                    let num_b: u32 = b.0.trim_start_matches("rId").parse().unwrap_or(0);
182                    num_a.cmp(&num_b)
183                });
184
185                self.slide_paths = slide_rels.into_iter().map(|(_, path)| path).collect();
186            }
187        }
188
189        // Fallback: scan for slide files
190        if self.slide_paths.is_empty() {
191            let paths = self.package.part_paths();
192            let mut slides: Vec<String> = paths
193                .into_iter()
194                .filter(|p| {
195                    p.starts_with("ppt/slides/slide") && p.ends_with(".xml") && !p.contains("_rels")
196                })
197                .map(|s| s.to_string())
198                .collect();
199            slides.sort();
200            self.slide_paths = slides;
201        }
202
203        self.info.slide_count = self.slide_paths.len();
204        Ok(())
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use crate::generator::create_pptx_with_content;
212    use crate::generator::SlideContent;
213    use std::fs;
214
215    #[test]
216    fn test_read_generated_pptx() {
217        // Create a test PPTX
218        let slides = vec![
219            SlideContent::new("Test Title")
220                .add_bullet("Bullet 1")
221                .add_bullet("Bullet 2"),
222            SlideContent::new("Second Slide").add_bullet("More content"),
223        ];
224
225        let pptx_data = create_pptx_with_content("Test Presentation", slides).unwrap();
226        fs::write("test_read.pptx", &pptx_data).unwrap();
227
228        // Read it back
229        let reader = PresentationReader::open("test_read.pptx").unwrap();
230
231        assert_eq!(reader.slide_count(), 2);
232        assert!(reader.info().title.is_some());
233
234        let slide1 = reader.get_slide(0).unwrap();
235        assert!(slide1.title.is_some());
236
237        // Cleanup
238        fs::remove_file("test_read.pptx").ok();
239    }
240
241    #[test]
242    fn test_extract_all_text() {
243        let slides = vec![
244            SlideContent::new("Title One")
245                .add_bullet("Point A")
246                .add_bullet("Point B"),
247            SlideContent::new("Title Two").add_bullet("Point C"),
248        ];
249
250        let pptx_data = create_pptx_with_content("Text Extract Test", slides).unwrap();
251        fs::write("test_extract.pptx", &pptx_data).unwrap();
252
253        let reader = PresentationReader::open("test_extract.pptx").unwrap();
254        let all_text = reader.extract_all_text().unwrap();
255
256        assert!(all_text.iter().any(|t| t.contains("Title One")));
257        assert!(all_text.iter().any(|t| t.contains("Point A")));
258
259        fs::remove_file("test_extract.pptx").ok();
260    }
261}