1use super::xmlchemy::{XmlElement, XmlParser};
6use crate::exc::PptxError;
7
8#[derive(Debug, Clone)]
10pub struct TextRun {
11 pub text: String,
12 pub bold: bool,
13 pub italic: bool,
14 pub underline: bool,
15 pub font_size: Option<u32>,
16 pub color: Option<String>,
17}
18
19impl TextRun {
20 pub fn new(text: &str) -> Self {
21 TextRun {
22 text: text.to_string(),
23 bold: false,
24 italic: false,
25 underline: false,
26 font_size: None,
27 color: None,
28 }
29 }
30}
31
32#[derive(Debug, Clone)]
34pub struct Paragraph {
35 pub runs: Vec<TextRun>,
36 pub level: u32,
37}
38
39impl Paragraph {
40 pub fn new() -> Self {
41 Paragraph {
42 runs: Vec::new(),
43 level: 0,
44 }
45 }
46
47 pub fn text(&self) -> String {
49 self.runs.iter().map(|r| r.text.as_str()).collect()
50 }
51}
52
53impl Default for Paragraph {
54 fn default() -> Self {
55 Self::new()
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct ParsedShape {
62 pub name: String,
63 pub shape_type: Option<String>,
64 pub paragraphs: Vec<Paragraph>,
65 pub x: i64,
66 pub y: i64,
67 pub width: i64,
68 pub height: i64,
69 pub is_title: bool,
70 pub is_body: bool,
71}
72
73impl ParsedShape {
74 pub fn new(name: &str) -> Self {
75 ParsedShape {
76 name: name.to_string(),
77 shape_type: None,
78 paragraphs: Vec::new(),
79 x: 0,
80 y: 0,
81 width: 0,
82 height: 0,
83 is_title: false,
84 is_body: false,
85 }
86 }
87
88 pub fn text(&self) -> String {
90 self.paragraphs.iter()
91 .map(|p| p.text())
92 .collect::<Vec<_>>()
93 .join("\n")
94 }
95}
96
97#[derive(Debug, Clone)]
99pub struct ParsedTableCell {
100 pub text: String,
101 pub row_span: u32,
102 pub col_span: u32,
103}
104
105#[derive(Debug, Clone)]
107pub struct ParsedTable {
108 pub rows: Vec<Vec<ParsedTableCell>>,
109}
110
111impl ParsedTable {
112 pub fn new() -> Self {
113 ParsedTable { rows: Vec::new() }
114 }
115
116 pub fn row_count(&self) -> usize {
117 self.rows.len()
118 }
119
120 pub fn col_count(&self) -> usize {
121 self.rows.first().map(|r| r.len()).unwrap_or(0)
122 }
123}
124
125impl Default for ParsedTable {
126 fn default() -> Self {
127 Self::new()
128 }
129}
130
131#[derive(Debug, Clone)]
133pub struct ParsedSlide {
134 pub shapes: Vec<ParsedShape>,
135 pub tables: Vec<ParsedTable>,
136 pub title: Option<String>,
137 pub body_text: Vec<String>,
138}
139
140impl ParsedSlide {
141 pub fn new() -> Self {
142 ParsedSlide {
143 shapes: Vec::new(),
144 tables: Vec::new(),
145 title: None,
146 body_text: Vec::new(),
147 }
148 }
149
150 pub fn all_text(&self) -> Vec<String> {
152 let mut texts = Vec::new();
153 if let Some(ref title) = self.title {
154 texts.push(title.clone());
155 }
156 texts.extend(self.body_text.clone());
157 for shape in &self.shapes {
158 let text = shape.text();
159 if !text.is_empty() {
160 texts.push(text);
161 }
162 }
163 texts
164 }
165}
166
167impl Default for ParsedSlide {
168 fn default() -> Self {
169 Self::new()
170 }
171}
172
173pub struct SlideParser;
175
176impl SlideParser {
177 pub fn parse(xml: &str) -> Result<ParsedSlide, PptxError> {
179 let root = XmlParser::parse_str(xml)?;
180 let mut slide = ParsedSlide::new();
181
182 if let Some(sp_tree) = root.find_descendant("spTree") {
184 for sp in sp_tree.find_all("sp") {
186 if let Some(mut shape) = Self::parse_shape(sp) {
187 if Self::is_title_shape(sp) {
189 shape.is_title = true;
190 slide.title = Some(shape.text());
191 } else if Self::is_body_shape(sp) {
192 shape.is_body = true;
193 for para in &shape.paragraphs {
194 let text = para.text();
195 if !text.is_empty() {
196 slide.body_text.push(text);
197 }
198 }
199 }
200 slide.shapes.push(shape);
201 }
202 }
203
204 for gf in sp_tree.find_all("graphicFrame") {
206 if let Some(table) = Self::parse_table_from_graphic_frame(gf) {
207 slide.tables.push(table);
208 }
209 }
210 }
211
212 Ok(slide)
213 }
214
215 fn parse_shape(sp: &XmlElement) -> Option<ParsedShape> {
216 let name = sp.find_descendant("cNvPr")
218 .and_then(|e| e.attr("name"))
219 .unwrap_or("Shape");
220
221 let mut shape = ParsedShape::new(name);
222
223 if let Some(xfrm) = sp.find_descendant("xfrm") {
225 if let Some(off) = xfrm.find("off") {
226 shape.x = off.attr("x").and_then(|v| v.parse().ok()).unwrap_or(0);
227 shape.y = off.attr("y").and_then(|v| v.parse().ok()).unwrap_or(0);
228 }
229 if let Some(ext) = xfrm.find("ext") {
230 shape.width = ext.attr("cx").and_then(|v| v.parse().ok()).unwrap_or(0);
231 shape.height = ext.attr("cy").and_then(|v| v.parse().ok()).unwrap_or(0);
232 }
233 }
234
235 if let Some(prst_geom) = sp.find_descendant("prstGeom") {
237 shape.shape_type = prst_geom.attr("prst").map(|s| s.to_string());
238 }
239
240 if let Some(tx_body) = sp.find_descendant("txBody") {
242 shape.paragraphs = Self::parse_text_body(tx_body);
243 }
244
245 Some(shape)
246 }
247
248 fn parse_text_body(tx_body: &XmlElement) -> Vec<Paragraph> {
249 let mut paragraphs = Vec::new();
250
251 for p in tx_body.find_all("p") {
252 let mut para = Paragraph::new();
253
254 if let Some(ppr) = p.find("pPr") {
256 para.level = ppr.attr("lvl").and_then(|v| v.parse().ok()).unwrap_or(0);
257 }
258
259 for r in p.find_all("r") {
261 let text = r.find("t").map(|t| t.text_content()).unwrap_or_default();
262 if text.is_empty() {
263 continue;
264 }
265
266 let mut run = TextRun::new(&text);
267
268 if let Some(rpr) = r.find("rPr") {
270 run.bold = rpr.attr("b").map(|v| v == "1" || v == "true").unwrap_or(false);
271 run.italic = rpr.attr("i").map(|v| v == "1" || v == "true").unwrap_or(false);
272 run.underline = rpr.attr("u").is_some();
273 run.font_size = rpr.attr("sz").and_then(|v| v.parse().ok());
274
275 if let Some(solid_fill) = rpr.find_descendant("solidFill") {
277 if let Some(srgb) = solid_fill.find("srgbClr") {
278 run.color = srgb.attr("val").map(|s| s.to_string());
279 }
280 }
281 }
282
283 para.runs.push(run);
284 }
285
286 if !para.runs.is_empty() {
287 paragraphs.push(para);
288 }
289 }
290
291 paragraphs
292 }
293
294 fn is_title_shape(sp: &XmlElement) -> bool {
295 if let Some(nv_pr) = sp.find_descendant("nvPr") {
297 if let Some(ph) = nv_pr.find("ph") {
298 let ph_type = ph.attr("type").unwrap_or("");
299 if ph_type == "title" || ph_type == "ctrTitle" {
300 return true;
301 }
302 }
303 }
304 if let Some(cnv_pr) = sp.find_descendant("cNvPr") {
306 if let Some(name) = cnv_pr.attr("name") {
307 let name_lower = name.to_lowercase();
308 if name_lower == "title" || name_lower.contains("title") {
309 return true;
310 }
311 }
312 }
313 false
314 }
315
316 fn is_body_shape(sp: &XmlElement) -> bool {
317 if let Some(nv_pr) = sp.find_descendant("nvPr") {
319 if let Some(ph) = nv_pr.find("ph") {
320 let ph_type = ph.attr("type").unwrap_or("body");
321 if ph_type == "body" || ph_type.is_empty() {
322 return true;
323 }
324 }
325 }
326 if let Some(cnv_pr) = sp.find_descendant("cNvPr") {
328 if let Some(name) = cnv_pr.attr("name") {
329 let name_lower = name.to_lowercase();
330 if name_lower == "content" || name_lower.contains("content") {
331 return true;
332 }
333 }
334 }
335 false
336 }
337
338 fn parse_table_from_graphic_frame(gf: &XmlElement) -> Option<ParsedTable> {
339 let tbl = gf.find_descendant("tbl")?;
341 let mut table = ParsedTable::new();
342
343 for tr in tbl.find_all("tr") {
344 let mut row = Vec::new();
345 for tc in tr.find_all("tc") {
346 let text = tc.find_descendant("t")
347 .map(|t| t.text_content())
348 .unwrap_or_default();
349
350 let row_span = tc.attr("rowSpan").and_then(|v| v.parse().ok()).unwrap_or(1);
351 let col_span = tc.attr("gridSpan").and_then(|v| v.parse().ok()).unwrap_or(1);
352
353 row.push(ParsedTableCell {
354 text,
355 row_span,
356 col_span,
357 });
358 }
359 if !row.is_empty() {
360 table.rows.push(row);
361 }
362 }
363
364 if table.rows.is_empty() {
365 None
366 } else {
367 Some(table)
368 }
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 #[test]
377 fn test_parse_simple_slide() {
378 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
379 <p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
380 xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
381 <p:cSld>
382 <p:spTree>
383 <p:sp>
384 <p:nvSpPr>
385 <p:cNvPr id="2" name="Title"/>
386 <p:nvPr><p:ph type="title"/></p:nvPr>
387 </p:nvSpPr>
388 <p:txBody>
389 <a:p>
390 <a:r><a:t>Test Title</a:t></a:r>
391 </a:p>
392 </p:txBody>
393 </p:sp>
394 <p:sp>
395 <p:nvSpPr>
396 <p:cNvPr id="3" name="Content"/>
397 <p:nvPr><p:ph type="body"/></p:nvPr>
398 </p:nvSpPr>
399 <p:txBody>
400 <a:p>
401 <a:r><a:t>Bullet 1</a:t></a:r>
402 </a:p>
403 <a:p>
404 <a:r><a:t>Bullet 2</a:t></a:r>
405 </a:p>
406 </p:txBody>
407 </p:sp>
408 </p:spTree>
409 </p:cSld>
410 </p:sld>"#;
411
412 let slide = SlideParser::parse(xml).unwrap();
413 assert_eq!(slide.title, Some("Test Title".to_string()));
414 assert_eq!(slide.body_text.len(), 2);
415 assert_eq!(slide.body_text[0], "Bullet 1");
416 assert_eq!(slide.body_text[1], "Bullet 2");
417 }
418
419 #[test]
420 fn test_parse_formatted_text() {
421 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
422 <p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
423 xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
424 <p:cSld>
425 <p:spTree>
426 <p:sp>
427 <p:nvSpPr>
428 <p:cNvPr id="2" name="Title"/>
429 <p:nvPr><p:ph type="title"/></p:nvPr>
430 </p:nvSpPr>
431 <p:txBody>
432 <a:p>
433 <a:r>
434 <a:rPr b="1" i="1" sz="4400"/>
435 <a:t>Bold Italic</a:t>
436 </a:r>
437 </a:p>
438 </p:txBody>
439 </p:sp>
440 </p:spTree>
441 </p:cSld>
442 </p:sld>"#;
443
444 let slide = SlideParser::parse(xml).unwrap();
445 assert!(slide.shapes.len() > 0);
446 let shape = &slide.shapes[0];
447 assert!(shape.paragraphs.len() > 0);
448 let run = &shape.paragraphs[0].runs[0];
449 assert!(run.bold);
450 assert!(run.italic);
451 assert_eq!(run.font_size, Some(4400));
452 }
453}