ppt_rs/oxml/
presentation.rs1use super::slide::{ParsedSlide, SlideParser};
6use super::xmlchemy::XmlParser;
7use crate::exc::PptxError;
8use crate::opc::Package;
9
10#[derive(Debug, Clone)]
12pub struct PresentationInfo {
13 pub title: Option<String>,
14 pub creator: Option<String>,
15 pub last_modified_by: Option<String>,
16 pub created: Option<String>,
17 pub modified: Option<String>,
18 pub revision: Option<u32>,
19 pub slide_count: usize,
20}
21
22impl PresentationInfo {
23 pub fn new() -> Self {
24 PresentationInfo {
25 title: None,
26 creator: None,
27 last_modified_by: None,
28 created: None,
29 modified: None,
30 revision: None,
31 slide_count: 0,
32 }
33 }
34}
35
36impl Default for PresentationInfo {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42pub struct PresentationReader {
44 package: Package,
45 info: PresentationInfo,
46 slide_paths: Vec<String>,
47}
48
49impl PresentationReader {
50 pub fn open(path: &str) -> Result<Self, PptxError> {
52 let package = Package::open(path)?;
53 let mut reader = PresentationReader {
54 package,
55 info: PresentationInfo::new(),
56 slide_paths: Vec::new(),
57 };
58 reader.parse_structure()?;
59 Ok(reader)
60 }
61
62 pub fn info(&self) -> &PresentationInfo {
64 &self.info
65 }
66
67 pub fn slide_count(&self) -> usize {
69 self.slide_paths.len()
70 }
71
72 pub fn get_slide(&self, index: usize) -> Result<ParsedSlide, PptxError> {
74 let path = self.slide_paths.get(index)
75 .ok_or_else(|| PptxError::NotFound(format!("Slide {index} not found")))?;
76
77 let xml = self.package.get_part(path)
78 .ok_or_else(|| PptxError::NotFound(format!("Slide file not found: {path}")))?;
79
80 let xml_str = String::from_utf8_lossy(xml);
81 SlideParser::parse(&xml_str)
82 }
83
84 pub fn get_all_slides(&self) -> Result<Vec<ParsedSlide>, PptxError> {
86 let mut slides = Vec::new();
87 for i in 0..self.slide_paths.len() {
88 slides.push(self.get_slide(i)?);
89 }
90 Ok(slides)
91 }
92
93 pub fn extract_all_text(&self) -> Result<Vec<String>, PptxError> {
95 let mut all_text = Vec::new();
96 for slide in self.get_all_slides()? {
97 all_text.extend(slide.all_text());
98 }
99 Ok(all_text)
100 }
101
102 fn parse_structure(&mut self) -> Result<(), PptxError> {
104 self.parse_core_properties()?;
106
107 self.parse_presentation_xml()?;
109
110 Ok(())
111 }
112
113 fn parse_core_properties(&mut self) -> Result<(), PptxError> {
114 if let Some(core_xml) = self.package.get_part("docProps/core.xml") {
115 let xml_str = String::from_utf8_lossy(core_xml);
116 if let Ok(root) = XmlParser::parse_str(&xml_str) {
117 self.info.title = root.find_descendant("title")
118 .map(|e| e.text_content())
119 .filter(|s| !s.is_empty());
120
121 self.info.creator = root.find_descendant("creator")
122 .map(|e| e.text_content())
123 .filter(|s| !s.is_empty());
124
125 self.info.last_modified_by = root.find_descendant("lastModifiedBy")
126 .map(|e| e.text_content())
127 .filter(|s| !s.is_empty());
128
129 self.info.created = root.find_descendant("created")
130 .map(|e| e.text_content())
131 .filter(|s| !s.is_empty());
132
133 self.info.modified = root.find_descendant("modified")
134 .map(|e| e.text_content())
135 .filter(|s| !s.is_empty());
136
137 self.info.revision = root.find_descendant("revision")
138 .and_then(|e| e.text_content().parse().ok());
139 }
140 }
141 Ok(())
142 }
143
144 fn parse_presentation_xml(&mut self) -> Result<(), PptxError> {
145 if let Some(rels_xml) = self.package.get_part("ppt/_rels/presentation.xml.rels") {
147 let xml_str = String::from_utf8_lossy(rels_xml);
148 if let Ok(root) = XmlParser::parse_str(&xml_str) {
149 let mut slide_rels: Vec<(String, String)> = Vec::new();
150
151 for rel in root.find_all("Relationship") {
152 let rel_type = rel.attr("Type").unwrap_or("");
153 if rel_type.contains("/slide") && !rel_type.contains("Layout") && !rel_type.contains("Master") {
154 if let (Some(id), Some(target)) = (rel.attr("Id"), rel.attr("Target")) {
155 let full_path = if target.starts_with('/') {
156 target[1..].to_string()
157 } else {
158 format!("ppt/{target}")
159 };
160 slide_rels.push((id.to_string(), full_path));
161 }
162 }
163 }
164
165 slide_rels.sort_by(|a, b| {
167 let num_a: u32 = a.0.trim_start_matches("rId").parse().unwrap_or(0);
168 let num_b: u32 = b.0.trim_start_matches("rId").parse().unwrap_or(0);
169 num_a.cmp(&num_b)
170 });
171
172 self.slide_paths = slide_rels.into_iter().map(|(_, path)| path).collect();
173 }
174 }
175
176 if self.slide_paths.is_empty() {
178 let paths = self.package.part_paths();
179 let mut slides: Vec<String> = paths.into_iter()
180 .filter(|p| p.starts_with("ppt/slides/slide") && p.ends_with(".xml") && !p.contains("_rels"))
181 .map(|s| s.to_string())
182 .collect();
183 slides.sort();
184 self.slide_paths = slides;
185 }
186
187 self.info.slide_count = self.slide_paths.len();
188 Ok(())
189 }
190}
191
192#[cfg(test)]
193mod tests {
194 use super::*;
195 use crate::generator::create_pptx_with_content;
196 use crate::generator::SlideContent;
197 use std::fs;
198
199 #[test]
200 fn test_read_generated_pptx() {
201 let slides = vec![
203 SlideContent::new("Test Title")
204 .add_bullet("Bullet 1")
205 .add_bullet("Bullet 2"),
206 SlideContent::new("Second Slide")
207 .add_bullet("More content"),
208 ];
209
210 let pptx_data = create_pptx_with_content("Test Presentation", slides).unwrap();
211 fs::write("test_read.pptx", &pptx_data).unwrap();
212
213 let reader = PresentationReader::open("test_read.pptx").unwrap();
215
216 assert_eq!(reader.slide_count(), 2);
217 assert!(reader.info().title.is_some());
218
219 let slide1 = reader.get_slide(0).unwrap();
220 assert!(slide1.title.is_some());
221
222 fs::remove_file("test_read.pptx").ok();
224 }
225
226 #[test]
227 fn test_extract_all_text() {
228 let slides = vec![
229 SlideContent::new("Title One")
230 .add_bullet("Point A")
231 .add_bullet("Point B"),
232 SlideContent::new("Title Two")
233 .add_bullet("Point C"),
234 ];
235
236 let pptx_data = create_pptx_with_content("Text Extract Test", slides).unwrap();
237 fs::write("test_extract.pptx", &pptx_data).unwrap();
238
239 let reader = PresentationReader::open("test_extract.pptx").unwrap();
240 let all_text = reader.extract_all_text().unwrap();
241
242 assert!(all_text.iter().any(|t| t.contains("Title One")));
243 assert!(all_text.iter().any(|t| t.contains("Point A")));
244
245 fs::remove_file("test_extract.pptx").ok();
246 }
247}