1use super::xmlchemy::{XmlElement, XmlParser};
6use crate::exc::PptxError;
7
8#[derive(Debug, Clone)]
10pub struct TextRun {
11 pub text: String,
12 pub bold: bool,
13 pub italic: bool,
14 pub underline: bool,
15 pub font_size: Option<u32>,
16 pub color: Option<String>,
17}
18
19impl TextRun {
20 pub fn new(text: &str) -> Self {
21 TextRun {
22 text: text.to_string(),
23 bold: false,
24 italic: false,
25 underline: false,
26 font_size: None,
27 color: None,
28 }
29 }
30}
31
32#[derive(Debug, Clone)]
34pub struct Paragraph {
35 pub runs: Vec<TextRun>,
36 pub level: u32,
37}
38
39impl Paragraph {
40 pub fn new() -> Self {
41 Paragraph {
42 runs: Vec::new(),
43 level: 0,
44 }
45 }
46
47 pub fn text(&self) -> String {
49 self.runs.iter().map(|r| r.text.as_str()).collect()
50 }
51}
52
53impl Default for Paragraph {
54 fn default() -> Self {
55 Self::new()
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct ParsedShape {
62 pub name: String,
63 pub shape_type: Option<String>,
64 pub paragraphs: Vec<Paragraph>,
65 pub x: i64,
66 pub y: i64,
67 pub width: i64,
68 pub height: i64,
69 pub is_title: bool,
70 pub is_body: bool,
71}
72
73impl ParsedShape {
74 pub fn new(name: &str) -> Self {
75 ParsedShape {
76 name: name.to_string(),
77 shape_type: None,
78 paragraphs: Vec::new(),
79 x: 0,
80 y: 0,
81 width: 0,
82 height: 0,
83 is_title: false,
84 is_body: false,
85 }
86 }
87
88 pub fn text(&self) -> String {
90 self.paragraphs
91 .iter()
92 .map(|p| p.text())
93 .collect::<Vec<_>>()
94 .join("\n")
95 }
96}
97
98#[derive(Debug, Clone)]
100pub struct ParsedTableCell {
101 pub text: String,
102 pub row_span: u32,
103 pub col_span: u32,
104}
105
106#[derive(Debug, Clone)]
108pub struct ParsedTable {
109 pub rows: Vec<Vec<ParsedTableCell>>,
110}
111
112impl ParsedTable {
113 pub fn new() -> Self {
114 ParsedTable { rows: Vec::new() }
115 }
116
117 pub fn row_count(&self) -> usize {
118 self.rows.len()
119 }
120
121 pub fn col_count(&self) -> usize {
122 self.rows.first().map(|r| r.len()).unwrap_or(0)
123 }
124}
125
126impl Default for ParsedTable {
127 fn default() -> Self {
128 Self::new()
129 }
130}
131
132#[derive(Debug, Clone)]
134pub struct ParsedSlide {
135 pub shapes: Vec<ParsedShape>,
136 pub tables: Vec<ParsedTable>,
137 pub title: Option<String>,
138 pub body_text: Vec<String>,
139}
140
141impl ParsedSlide {
142 pub fn new() -> Self {
143 ParsedSlide {
144 shapes: Vec::new(),
145 tables: Vec::new(),
146 title: None,
147 body_text: Vec::new(),
148 }
149 }
150
151 pub fn all_text(&self) -> Vec<String> {
153 let mut texts = Vec::new();
154 if let Some(ref title) = self.title {
155 texts.push(title.clone());
156 }
157 texts.extend(self.body_text.clone());
158 for shape in &self.shapes {
159 let text = shape.text();
160 if !text.is_empty() {
161 texts.push(text);
162 }
163 }
164 texts
165 }
166}
167
168impl Default for ParsedSlide {
169 fn default() -> Self {
170 Self::new()
171 }
172}
173
174pub struct SlideParser;
176
177impl SlideParser {
178 pub fn parse(xml: &str) -> Result<ParsedSlide, PptxError> {
180 let root = XmlParser::parse_str(xml)?;
181 let mut slide = ParsedSlide::new();
182
183 if let Some(sp_tree) = root.find_descendant("spTree") {
185 for sp in sp_tree.find_all("sp") {
187 if let Some(mut shape) = Self::parse_shape(sp) {
188 if Self::is_title_shape(sp) {
190 shape.is_title = true;
191 slide.title = Some(shape.text());
192 } else if Self::is_body_shape(sp) {
193 shape.is_body = true;
194 for para in &shape.paragraphs {
195 let text = para.text();
196 if !text.is_empty() {
197 slide.body_text.push(text);
198 }
199 }
200 }
201 slide.shapes.push(shape);
202 }
203 }
204
205 for gf in sp_tree.find_all("graphicFrame") {
207 if let Some(table) = Self::parse_table_from_graphic_frame(gf) {
208 slide.tables.push(table);
209 }
210 }
211 }
212
213 Ok(slide)
214 }
215
216 fn parse_shape(sp: &XmlElement) -> Option<ParsedShape> {
217 let name = sp
219 .find_descendant("cNvPr")
220 .and_then(|e| e.attr("name"))
221 .unwrap_or("Shape");
222
223 let mut shape = ParsedShape::new(name);
224
225 if let Some(xfrm) = sp.find_descendant("xfrm") {
227 if let Some(off) = xfrm.find("off") {
228 shape.x = off.attr("x").and_then(|v| v.parse().ok()).unwrap_or(0);
229 shape.y = off.attr("y").and_then(|v| v.parse().ok()).unwrap_or(0);
230 }
231 if let Some(ext) = xfrm.find("ext") {
232 shape.width = ext.attr("cx").and_then(|v| v.parse().ok()).unwrap_or(0);
233 shape.height = ext.attr("cy").and_then(|v| v.parse().ok()).unwrap_or(0);
234 }
235 }
236
237 if let Some(prst_geom) = sp.find_descendant("prstGeom") {
239 shape.shape_type = prst_geom.attr("prst").map(|s| s.to_string());
240 }
241
242 if let Some(tx_body) = sp.find_descendant("txBody") {
244 shape.paragraphs = Self::parse_text_body(tx_body);
245 }
246
247 Some(shape)
248 }
249
250 fn parse_text_body(tx_body: &XmlElement) -> Vec<Paragraph> {
251 let mut paragraphs = Vec::new();
252
253 for p in tx_body.find_all("p") {
254 let mut para = Paragraph::new();
255
256 if let Some(ppr) = p.find("pPr") {
258 para.level = ppr.attr("lvl").and_then(|v| v.parse().ok()).unwrap_or(0);
259 }
260
261 for r in p.find_all("r") {
263 let text = r.find("t").map(|t| t.text_content()).unwrap_or_default();
264 if text.is_empty() {
265 continue;
266 }
267
268 let mut run = TextRun::new(&text);
269
270 if let Some(rpr) = r.find("rPr") {
272 run.bold = rpr
273 .attr("b")
274 .map(|v| v == "1" || v == "true")
275 .unwrap_or(false);
276 run.italic = rpr
277 .attr("i")
278 .map(|v| v == "1" || v == "true")
279 .unwrap_or(false);
280 run.underline = rpr.attr("u").is_some();
281 run.font_size = rpr.attr("sz").and_then(|v| v.parse().ok());
282
283 if let Some(solid_fill) = rpr.find_descendant("solidFill") {
285 if let Some(srgb) = solid_fill.find("srgbClr") {
286 run.color = srgb.attr("val").map(|s| s.to_string());
287 }
288 }
289 }
290
291 para.runs.push(run);
292 }
293
294 if !para.runs.is_empty() {
295 paragraphs.push(para);
296 }
297 }
298
299 paragraphs
300 }
301
302 fn is_title_shape(sp: &XmlElement) -> bool {
303 if let Some(nv_pr) = sp.find_descendant("nvPr") {
305 if let Some(ph) = nv_pr.find("ph") {
306 let ph_type = ph.attr("type").unwrap_or("");
307 if ph_type == "title" || ph_type == "ctrTitle" {
308 return true;
309 }
310 }
311 }
312 if let Some(cnv_pr) = sp.find_descendant("cNvPr") {
314 if let Some(name) = cnv_pr.attr("name") {
315 let name_lower = name.to_lowercase();
316 if name_lower == "title" || name_lower.contains("title") {
317 return true;
318 }
319 }
320 }
321 false
322 }
323
324 fn is_body_shape(sp: &XmlElement) -> bool {
325 if let Some(nv_pr) = sp.find_descendant("nvPr") {
327 if let Some(ph) = nv_pr.find("ph") {
328 let ph_type = ph.attr("type").unwrap_or("body");
329 if ph_type == "body" || ph_type.is_empty() {
330 return true;
331 }
332 }
333 }
334 if let Some(cnv_pr) = sp.find_descendant("cNvPr") {
336 if let Some(name) = cnv_pr.attr("name") {
337 let name_lower = name.to_lowercase();
338 if name_lower == "content" || name_lower.contains("content") {
339 return true;
340 }
341 }
342 }
343 false
344 }
345
346 fn parse_table_from_graphic_frame(gf: &XmlElement) -> Option<ParsedTable> {
347 let tbl = gf.find_descendant("tbl")?;
349 let mut table = ParsedTable::new();
350
351 for tr in tbl.find_all("tr") {
352 let mut row = Vec::new();
353 for tc in tr.find_all("tc") {
354 let text = tc
355 .find_descendant("t")
356 .map(|t| t.text_content())
357 .unwrap_or_default();
358
359 let row_span = tc.attr("rowSpan").and_then(|v| v.parse().ok()).unwrap_or(1);
360 let col_span = tc
361 .attr("gridSpan")
362 .and_then(|v| v.parse().ok())
363 .unwrap_or(1);
364
365 row.push(ParsedTableCell {
366 text,
367 row_span,
368 col_span,
369 });
370 }
371 if !row.is_empty() {
372 table.rows.push(row);
373 }
374 }
375
376 if table.rows.is_empty() {
377 None
378 } else {
379 Some(table)
380 }
381 }
382}
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387
388 #[test]
389 fn test_parse_simple_slide() {
390 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
391 <p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
392 xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
393 <p:cSld>
394 <p:spTree>
395 <p:sp>
396 <p:nvSpPr>
397 <p:cNvPr id="2" name="Title"/>
398 <p:nvPr><p:ph type="title"/></p:nvPr>
399 </p:nvSpPr>
400 <p:txBody>
401 <a:p>
402 <a:r><a:t>Test Title</a:t></a:r>
403 </a:p>
404 </p:txBody>
405 </p:sp>
406 <p:sp>
407 <p:nvSpPr>
408 <p:cNvPr id="3" name="Content"/>
409 <p:nvPr><p:ph type="body"/></p:nvPr>
410 </p:nvSpPr>
411 <p:txBody>
412 <a:p>
413 <a:r><a:t>Bullet 1</a:t></a:r>
414 </a:p>
415 <a:p>
416 <a:r><a:t>Bullet 2</a:t></a:r>
417 </a:p>
418 </p:txBody>
419 </p:sp>
420 </p:spTree>
421 </p:cSld>
422 </p:sld>"#;
423
424 let slide = SlideParser::parse(xml).unwrap();
425 assert_eq!(slide.title, Some("Test Title".to_string()));
426 assert_eq!(slide.body_text.len(), 2);
427 assert_eq!(slide.body_text[0], "Bullet 1");
428 assert_eq!(slide.body_text[1], "Bullet 2");
429 }
430
431 #[test]
432 fn test_parse_formatted_text() {
433 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
434 <p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
435 xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
436 <p:cSld>
437 <p:spTree>
438 <p:sp>
439 <p:nvSpPr>
440 <p:cNvPr id="2" name="Title"/>
441 <p:nvPr><p:ph type="title"/></p:nvPr>
442 </p:nvSpPr>
443 <p:txBody>
444 <a:p>
445 <a:r>
446 <a:rPr b="1" i="1" sz="4400"/>
447 <a:t>Bold Italic</a:t>
448 </a:r>
449 </a:p>
450 </p:txBody>
451 </p:sp>
452 </p:spTree>
453 </p:cSld>
454 </p:sld>"#;
455
456 let slide = SlideParser::parse(xml).unwrap();
457 assert!(slide.shapes.len() > 0);
458 let shape = &slide.shapes[0];
459 assert!(shape.paragraphs.len() > 0);
460 let run = &shape.paragraphs[0].runs[0];
461 assert!(run.bold);
462 assert!(run.italic);
463 assert_eq!(run.font_size, Some(4400));
464 }
465}