Skip to main content

ppt_rs/oxml/
presentation.rs

1//! Presentation XML parsing and reading
2//!
3//! Parses presentation.xml and provides high-level access to presentation content.
4
5use super::slide::{ParsedSlide, SlideParser};
6use super::xmlchemy::XmlParser;
7use crate::exc::PptxError;
8use crate::opc::Package;
9
10/// Parsed presentation metadata
11#[derive(Debug, Clone)]
12pub struct PresentationInfo {
13    pub title: Option<String>,
14    pub creator: Option<String>,
15    pub last_modified_by: Option<String>,
16    pub created: Option<String>,
17    pub modified: Option<String>,
18    pub revision: Option<u32>,
19    pub slide_count: usize,
20}
21
22impl PresentationInfo {
23    pub fn new() -> Self {
24        PresentationInfo {
25            title: None,
26            creator: None,
27            last_modified_by: None,
28            created: None,
29            modified: None,
30            revision: None,
31            slide_count: 0,
32        }
33    }
34}
35
36impl Default for PresentationInfo {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42/// Presentation reader for parsing PPTX files
43pub struct PresentationReader {
44    package: Package,
45    info: PresentationInfo,
46    slide_paths: Vec<String>,
47}
48
49impl PresentationReader {
50    /// Open a PPTX file for reading
51    pub fn open(path: &str) -> Result<Self, PptxError> {
52        let package = Package::open(path)?;
53        let mut reader = PresentationReader {
54            package,
55            info: PresentationInfo::new(),
56            slide_paths: Vec::new(),
57        };
58        reader.parse_structure()?;
59        Ok(reader)
60    }
61
62    /// Get presentation info
63    pub fn info(&self) -> &PresentationInfo {
64        &self.info
65    }
66
67    /// Get number of slides
68    pub fn slide_count(&self) -> usize {
69        self.slide_paths.len()
70    }
71
72    /// Get slide by index (0-based)
73    pub fn get_slide(&self, index: usize) -> Result<ParsedSlide, PptxError> {
74        let path = self.slide_paths.get(index)
75            .ok_or_else(|| PptxError::NotFound(format!("Slide {index} not found")))?;
76        
77        let xml = self.package.get_part(path)
78            .ok_or_else(|| PptxError::NotFound(format!("Slide file not found: {path}")))?;
79        
80        let xml_str = String::from_utf8_lossy(xml);
81        SlideParser::parse(&xml_str)
82    }
83
84    /// Get all slides
85    pub fn get_all_slides(&self) -> Result<Vec<ParsedSlide>, PptxError> {
86        let mut slides = Vec::new();
87        for i in 0..self.slide_paths.len() {
88            slides.push(self.get_slide(i)?);
89        }
90        Ok(slides)
91    }
92
93    /// Get all text from presentation
94    pub fn extract_all_text(&self) -> Result<Vec<String>, PptxError> {
95        let mut all_text = Vec::new();
96        for slide in self.get_all_slides()? {
97            all_text.extend(slide.all_text());
98        }
99        Ok(all_text)
100    }
101
102    /// Parse presentation structure
103    fn parse_structure(&mut self) -> Result<(), PptxError> {
104        // Parse core properties
105        self.parse_core_properties()?;
106        
107        // Parse presentation.xml to get slide list
108        self.parse_presentation_xml()?;
109        
110        Ok(())
111    }
112
113    fn parse_core_properties(&mut self) -> Result<(), PptxError> {
114        if let Some(core_xml) = self.package.get_part("docProps/core.xml") {
115            let xml_str = String::from_utf8_lossy(core_xml);
116            if let Ok(root) = XmlParser::parse_str(&xml_str) {
117                self.info.title = root.find_descendant("title")
118                    .map(|e| e.text_content())
119                    .filter(|s| !s.is_empty());
120                
121                self.info.creator = root.find_descendant("creator")
122                    .map(|e| e.text_content())
123                    .filter(|s| !s.is_empty());
124                
125                self.info.last_modified_by = root.find_descendant("lastModifiedBy")
126                    .map(|e| e.text_content())
127                    .filter(|s| !s.is_empty());
128                
129                self.info.created = root.find_descendant("created")
130                    .map(|e| e.text_content())
131                    .filter(|s| !s.is_empty());
132                
133                self.info.modified = root.find_descendant("modified")
134                    .map(|e| e.text_content())
135                    .filter(|s| !s.is_empty());
136                
137                self.info.revision = root.find_descendant("revision")
138                    .and_then(|e| e.text_content().parse().ok());
139            }
140        }
141        Ok(())
142    }
143
144    fn parse_presentation_xml(&mut self) -> Result<(), PptxError> {
145        // First, find slide references from presentation.xml.rels
146        if let Some(rels_xml) = self.package.get_part("ppt/_rels/presentation.xml.rels") {
147            let xml_str = String::from_utf8_lossy(rels_xml);
148            if let Ok(root) = XmlParser::parse_str(&xml_str) {
149                let mut slide_rels: Vec<(String, String)> = Vec::new();
150                
151                for rel in root.find_all("Relationship") {
152                    let rel_type = rel.attr("Type").unwrap_or("");
153                    if rel_type.contains("/slide") && !rel_type.contains("Layout") && !rel_type.contains("Master") {
154                        if let (Some(id), Some(target)) = (rel.attr("Id"), rel.attr("Target")) {
155                            let full_path = if target.starts_with('/') {
156                                target[1..].to_string()
157                            } else {
158                                format!("ppt/{target}")
159                            };
160                            slide_rels.push((id.to_string(), full_path));
161                        }
162                    }
163                }
164                
165                // Sort by relationship ID to maintain slide order
166                slide_rels.sort_by(|a, b| {
167                    let num_a: u32 = a.0.trim_start_matches("rId").parse().unwrap_or(0);
168                    let num_b: u32 = b.0.trim_start_matches("rId").parse().unwrap_or(0);
169                    num_a.cmp(&num_b)
170                });
171                
172                self.slide_paths = slide_rels.into_iter().map(|(_, path)| path).collect();
173            }
174        }
175        
176        // Fallback: scan for slide files
177        if self.slide_paths.is_empty() {
178            let paths = self.package.part_paths();
179            let mut slides: Vec<String> = paths.into_iter()
180                .filter(|p| p.starts_with("ppt/slides/slide") && p.ends_with(".xml") && !p.contains("_rels"))
181                .map(|s| s.to_string())
182                .collect();
183            slides.sort();
184            self.slide_paths = slides;
185        }
186        
187        self.info.slide_count = self.slide_paths.len();
188        Ok(())
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use crate::generator::create_pptx_with_content;
196    use crate::generator::SlideContent;
197    use std::fs;
198
199    #[test]
200    fn test_read_generated_pptx() {
201        // Create a test PPTX
202        let slides = vec![
203            SlideContent::new("Test Title")
204                .add_bullet("Bullet 1")
205                .add_bullet("Bullet 2"),
206            SlideContent::new("Second Slide")
207                .add_bullet("More content"),
208        ];
209        
210        let pptx_data = create_pptx_with_content("Test Presentation", slides).unwrap();
211        fs::write("test_read.pptx", &pptx_data).unwrap();
212        
213        // Read it back
214        let reader = PresentationReader::open("test_read.pptx").unwrap();
215        
216        assert_eq!(reader.slide_count(), 2);
217        assert!(reader.info().title.is_some());
218        
219        let slide1 = reader.get_slide(0).unwrap();
220        assert!(slide1.title.is_some());
221        
222        // Cleanup
223        fs::remove_file("test_read.pptx").ok();
224    }
225
226    #[test]
227    fn test_extract_all_text() {
228        let slides = vec![
229            SlideContent::new("Title One")
230                .add_bullet("Point A")
231                .add_bullet("Point B"),
232            SlideContent::new("Title Two")
233                .add_bullet("Point C"),
234        ];
235        
236        let pptx_data = create_pptx_with_content("Text Extract Test", slides).unwrap();
237        fs::write("test_extract.pptx", &pptx_data).unwrap();
238        
239        let reader = PresentationReader::open("test_extract.pptx").unwrap();
240        let all_text = reader.extract_all_text().unwrap();
241        
242        assert!(all_text.iter().any(|t| t.contains("Title One")));
243        assert!(all_text.iter().any(|t| t.contains("Point A")));
244        
245        fs::remove_file("test_extract.pptx").ok();
246    }
247}