ppt_rs/oxml/
presentation.rs1use super::slide::{ParsedSlide, SlideParser};
6use super::xmlchemy::XmlParser;
7use crate::exc::PptxError;
8use crate::opc::Package;
9
10#[derive(Debug, Clone)]
12pub struct PresentationInfo {
13 pub title: Option<String>,
14 pub creator: Option<String>,
15 pub last_modified_by: Option<String>,
16 pub created: Option<String>,
17 pub modified: Option<String>,
18 pub revision: Option<u32>,
19 pub slide_count: usize,
20}
21
22impl PresentationInfo {
23 pub fn new() -> Self {
24 PresentationInfo {
25 title: None,
26 creator: None,
27 last_modified_by: None,
28 created: None,
29 modified: None,
30 revision: None,
31 slide_count: 0,
32 }
33 }
34}
35
36impl Default for PresentationInfo {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42pub struct PresentationReader {
44 package: Package,
45 info: PresentationInfo,
46 slide_paths: Vec<String>,
47}
48
49impl PresentationReader {
50 pub fn open(path: &str) -> Result<Self, PptxError> {
52 let package = Package::open(path)?;
53 let mut reader = PresentationReader {
54 package,
55 info: PresentationInfo::new(),
56 slide_paths: Vec::new(),
57 };
58 reader.parse_structure()?;
59 Ok(reader)
60 }
61
62 pub fn info(&self) -> &PresentationInfo {
64 &self.info
65 }
66
67 pub fn slide_count(&self) -> usize {
69 self.slide_paths.len()
70 }
71
72 pub fn get_slide(&self, index: usize) -> Result<ParsedSlide, PptxError> {
74 let path = self
75 .slide_paths
76 .get(index)
77 .ok_or_else(|| PptxError::NotFound(format!("Slide {index} not found")))?;
78
79 let xml = self
80 .package
81 .get_part(path)
82 .ok_or_else(|| PptxError::NotFound(format!("Slide file not found: {path}")))?;
83
84 let xml_str = String::from_utf8_lossy(xml);
85 SlideParser::parse(&xml_str)
86 }
87
88 pub fn get_all_slides(&self) -> Result<Vec<ParsedSlide>, PptxError> {
90 let mut slides = Vec::new();
91 for i in 0..self.slide_paths.len() {
92 slides.push(self.get_slide(i)?);
93 }
94 Ok(slides)
95 }
96
97 pub fn extract_all_text(&self) -> Result<Vec<String>, PptxError> {
99 let mut all_text = Vec::new();
100 for slide in self.get_all_slides()? {
101 all_text.extend(slide.all_text());
102 }
103 Ok(all_text)
104 }
105
106 fn parse_structure(&mut self) -> Result<(), PptxError> {
108 self.parse_core_properties()?;
110
111 self.parse_presentation_xml()?;
113
114 Ok(())
115 }
116
117 fn parse_core_properties(&mut self) -> Result<(), PptxError> {
118 if let Some(core_xml) = self.package.get_part("docProps/core.xml") {
119 let xml_str = String::from_utf8_lossy(core_xml);
120 if let Ok(root) = XmlParser::parse_str(&xml_str) {
121 self.info.title = root
122 .find_descendant("title")
123 .map(|e| e.text_content())
124 .filter(|s| !s.is_empty());
125
126 self.info.creator = root
127 .find_descendant("creator")
128 .map(|e| e.text_content())
129 .filter(|s| !s.is_empty());
130
131 self.info.last_modified_by = root
132 .find_descendant("lastModifiedBy")
133 .map(|e| e.text_content())
134 .filter(|s| !s.is_empty());
135
136 self.info.created = root
137 .find_descendant("created")
138 .map(|e| e.text_content())
139 .filter(|s| !s.is_empty());
140
141 self.info.modified = root
142 .find_descendant("modified")
143 .map(|e| e.text_content())
144 .filter(|s| !s.is_empty());
145
146 self.info.revision = root
147 .find_descendant("revision")
148 .and_then(|e| e.text_content().parse().ok());
149 }
150 }
151 Ok(())
152 }
153
154 fn parse_presentation_xml(&mut self) -> Result<(), PptxError> {
155 if let Some(rels_xml) = self.package.get_part("ppt/_rels/presentation.xml.rels") {
157 let xml_str = String::from_utf8_lossy(rels_xml);
158 if let Ok(root) = XmlParser::parse_str(&xml_str) {
159 let mut slide_rels: Vec<(String, String)> = Vec::new();
160
161 for rel in root.find_all("Relationship") {
162 let rel_type = rel.attr("Type").unwrap_or("");
163 if rel_type.contains("/slide")
164 && !rel_type.contains("Layout")
165 && !rel_type.contains("Master")
166 {
167 if let (Some(id), Some(target)) = (rel.attr("Id"), rel.attr("Target")) {
168 let full_path = if target.starts_with('/') {
169 target[1..].to_string()
170 } else {
171 format!("ppt/{target}")
172 };
173 slide_rels.push((id.to_string(), full_path));
174 }
175 }
176 }
177
178 slide_rels.sort_by(|a, b| {
180 let num_a: u32 = a.0.trim_start_matches("rId").parse().unwrap_or(0);
181 let num_b: u32 = b.0.trim_start_matches("rId").parse().unwrap_or(0);
182 num_a.cmp(&num_b)
183 });
184
185 self.slide_paths = slide_rels.into_iter().map(|(_, path)| path).collect();
186 }
187 }
188
189 if self.slide_paths.is_empty() {
191 let paths = self.package.part_paths();
192 let mut slides: Vec<String> = paths
193 .into_iter()
194 .filter(|p| {
195 p.starts_with("ppt/slides/slide") && p.ends_with(".xml") && !p.contains("_rels")
196 })
197 .map(|s| s.to_string())
198 .collect();
199 slides.sort();
200 self.slide_paths = slides;
201 }
202
203 self.info.slide_count = self.slide_paths.len();
204 Ok(())
205 }
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211 use crate::generator::create_pptx_with_content;
212 use crate::generator::SlideContent;
213 use std::fs;
214
215 #[test]
216 fn test_read_generated_pptx() {
217 let slides = vec![
219 SlideContent::new("Test Title")
220 .add_bullet("Bullet 1")
221 .add_bullet("Bullet 2"),
222 SlideContent::new("Second Slide").add_bullet("More content"),
223 ];
224
225 let pptx_data = create_pptx_with_content("Test Presentation", slides).unwrap();
226 fs::write("test_read.pptx", &pptx_data).unwrap();
227
228 let reader = PresentationReader::open("test_read.pptx").unwrap();
230
231 assert_eq!(reader.slide_count(), 2);
232 assert!(reader.info().title.is_some());
233
234 let slide1 = reader.get_slide(0).unwrap();
235 assert!(slide1.title.is_some());
236
237 fs::remove_file("test_read.pptx").ok();
239 }
240
241 #[test]
242 fn test_extract_all_text() {
243 let slides = vec![
244 SlideContent::new("Title One")
245 .add_bullet("Point A")
246 .add_bullet("Point B"),
247 SlideContent::new("Title Two").add_bullet("Point C"),
248 ];
249
250 let pptx_data = create_pptx_with_content("Text Extract Test", slides).unwrap();
251 fs::write("test_extract.pptx", &pptx_data).unwrap();
252
253 let reader = PresentationReader::open("test_extract.pptx").unwrap();
254 let all_text = reader.extract_all_text().unwrap();
255
256 assert!(all_text.iter().any(|t| t.contains("Title One")));
257 assert!(all_text.iter().any(|t| t.contains("Point A")));
258
259 fs::remove_file("test_extract.pptx").ok();
260 }
261}