Skip to main content

ppt_rs/cli/markdown/
parser.rs

1//! Markdown parser state machine
2//!
3//! Handles parsing of markdown content into slide structures.
4
5use super::mermaid;
6use crate::generator::{
7    CodeBlock, Shape, ShapeFill, ShapeType, SlideContent, TableBuilder, TableCell, TableRow,
8};
9use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
10
11/// Parse markdown content into slides
12pub fn parse(content: &str) -> Result<Vec<SlideContent>, String> {
13    let mut parser = MarkdownParser::new();
14    parser.parse(content)
15}
16
17/// State machine for markdown parsing
18struct MarkdownParser {
19    slides: Vec<SlideContent>,
20    current_slide: Option<SlideContent>,
21    current_text: String,
22    // List state
23    in_list: bool,
24    list_items: Vec<String>,
25    // Table state
26    in_table: bool,
27    table_rows: Vec<Vec<String>>,
28    current_row: Vec<String>,
29    current_cell: String,
30    in_table_head: bool,
31    // Code block state
32    in_code_block: bool,
33    code_content: String,
34    code_language: Option<String>,
35    // Formatting state
36    is_bold: bool,
37    is_italic: bool,
38    // Blockquote (speaker notes)
39    in_blockquote: bool,
40    blockquote_text: String,
41    // Image state
42    pending_image: Option<(String, String)>,
43}
44
45impl MarkdownParser {
46    fn new() -> Self {
47        Self {
48            slides: Vec::new(),
49            current_slide: None,
50            current_text: String::new(),
51            in_list: false,
52            list_items: Vec::new(),
53            in_table: false,
54            table_rows: Vec::new(),
55            current_row: Vec::new(),
56            current_cell: String::new(),
57            in_table_head: false,
58            in_code_block: false,
59            code_content: String::new(),
60            code_language: None,
61            is_bold: false,
62            is_italic: false,
63            in_blockquote: false,
64            blockquote_text: String::new(),
65            pending_image: None,
66        }
67    }
68
69    fn parse(&mut self, content: &str) -> Result<Vec<SlideContent>, String> {
70        let options =
71            Options::ENABLE_TABLES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_TASKLISTS;
72
73        let parser = Parser::new_ext(content, options);
74
75        for event in parser {
76            self.handle_event(event);
77        }
78
79        self.finalize_current_slide();
80
81        if self.slides.is_empty() {
82            return Err("No slides found in markdown file".to_string());
83        }
84
85        Ok(std::mem::take(&mut self.slides))
86    }
87
88    fn handle_event(&mut self, event: Event) {
89        match event {
90            // Headings create new slides
91            Event::Start(Tag::Heading { level, .. }) => {
92                if level == HeadingLevel::H1 {
93                    self.finalize_current_slide();
94                }
95                self.current_text.clear();
96            }
97            Event::End(TagEnd::Heading(level)) => {
98                let title = std::mem::take(&mut self.current_text).trim().to_string();
99                if level == HeadingLevel::H1 {
100                    self.current_slide = Some(SlideContent::new(&title));
101                } else if let Some(ref mut slide) = self.current_slide {
102                    let formatted = format!("**{}**", title);
103                    *slide = slide.clone().add_bullet(&formatted);
104                }
105            }
106
107            // Lists
108            Event::Start(Tag::List(_)) => {
109                self.in_list = true;
110                self.list_items.clear();
111            }
112            Event::End(TagEnd::List(_)) => {
113                self.in_list = false;
114                self.flush_list_items();
115            }
116            Event::Start(Tag::Item) => {
117                self.current_text.clear();
118            }
119            Event::End(TagEnd::Item) => {
120                let item = std::mem::take(&mut self.current_text).trim().to_string();
121                if !item.is_empty() {
122                    self.list_items.push(item);
123                }
124            }
125
126            // Tables
127            Event::Start(Tag::Table(_)) => {
128                self.in_table = true;
129                self.table_rows.clear();
130                self.in_table_head = false;
131            }
132            Event::End(TagEnd::Table) => {
133                self.in_table = false;
134                self.flush_table();
135            }
136            Event::Start(Tag::TableHead) => {
137                self.in_table_head = true;
138                self.current_row.clear();
139            }
140            Event::End(TagEnd::TableHead) => {
141                self.in_table_head = false;
142                if !self.current_row.is_empty() {
143                    self.table_rows.push(std::mem::take(&mut self.current_row));
144                }
145            }
146            Event::Start(Tag::TableRow) => {
147                self.current_row.clear();
148            }
149            Event::End(TagEnd::TableRow) => {
150                if !self.current_row.is_empty() {
151                    self.table_rows.push(std::mem::take(&mut self.current_row));
152                }
153            }
154            Event::Start(Tag::TableCell) => {
155                self.current_cell.clear();
156            }
157            Event::End(TagEnd::TableCell) => {
158                self.current_row
159                    .push(std::mem::take(&mut self.current_cell).trim().to_string());
160            }
161
162            // Code blocks
163            Event::Start(Tag::CodeBlock(kind)) => {
164                self.in_code_block = true;
165                self.code_content.clear();
166                self.code_language = match kind {
167                    pulldown_cmark::CodeBlockKind::Fenced(lang) => {
168                        let lang_str = lang.to_string();
169                        if lang_str.is_empty() {
170                            None
171                        } else {
172                            Some(lang_str)
173                        }
174                    }
175                    _ => None,
176                };
177            }
178            Event::End(TagEnd::CodeBlock) => {
179                self.in_code_block = false;
180                self.flush_code_block();
181            }
182
183            // Blockquotes (speaker notes)
184            Event::Start(Tag::BlockQuote) => {
185                self.in_blockquote = true;
186                self.blockquote_text.clear();
187            }
188            Event::End(TagEnd::BlockQuote) => {
189                self.in_blockquote = false;
190                self.flush_blockquote();
191            }
192
193            // Inline formatting
194            Event::Start(Tag::Strong) => self.is_bold = true,
195            Event::End(TagEnd::Strong) => self.is_bold = false,
196            Event::Start(Tag::Emphasis) => self.is_italic = true,
197            Event::End(TagEnd::Emphasis) => self.is_italic = false,
198            Event::Code(code) => {
199                let formatted = format!("`{}`", code);
200                self.push_text(&formatted);
201            }
202
203            // Images
204            Event::Start(Tag::Image {
205                dest_url, title, ..
206            }) => {
207                self.pending_image = Some((dest_url.to_string(), title.to_string()));
208            }
209            Event::End(TagEnd::Image) => {
210                if let Some((url, alt)) = self.pending_image.take() {
211                    self.add_image_placeholder(&url, &alt);
212                }
213            }
214
215            // Horizontal rule = slide break
216            Event::Rule => {
217                self.finalize_current_slide();
218                if let Some(last) = self.slides.last() {
219                    let title = format!("{} (continued)", last.title);
220                    self.current_slide = Some(SlideContent::new(&title));
221                }
222            }
223
224            // Text content
225            Event::Text(text) => {
226                self.push_text(&text);
227            }
228            Event::SoftBreak | Event::HardBreak => {
229                self.push_text(" ");
230            }
231
232            // Paragraphs
233            Event::Start(Tag::Paragraph) => {
234                if !self.in_list && !self.in_table && !self.in_blockquote && !self.in_code_block {
235                    self.current_text.clear();
236                }
237            }
238            Event::End(TagEnd::Paragraph) => {
239                if !self.in_list && !self.in_table && !self.in_blockquote && !self.in_code_block {
240                    let text = std::mem::take(&mut self.current_text).trim().to_string();
241                    if !text.is_empty() {
242                        self.add_paragraph(&text);
243                    }
244                }
245            }
246
247            _ => {}
248        }
249    }
250
251    fn push_text(&mut self, text: &str) {
252        let formatted = if self.is_bold && self.is_italic {
253            format!("***{}***", text)
254        } else if self.is_bold {
255            format!("**{}**", text)
256        } else if self.is_italic {
257            format!("*{}*", text)
258        } else {
259            text.to_string()
260        };
261
262        if self.in_code_block {
263            self.code_content.push_str(text);
264        } else if self.in_table {
265            self.current_cell.push_str(&formatted);
266        } else if self.in_blockquote {
267            self.blockquote_text.push_str(&formatted);
268        } else {
269            self.current_text.push_str(&formatted);
270        }
271    }
272
273    fn add_paragraph(&mut self, text: &str) {
274        if let Some(ref mut slide) = self.current_slide {
275            *slide = slide.clone().add_bullet(text);
276        } else {
277            let mut slide = SlideContent::new("Slide");
278            slide = slide.add_bullet(text);
279            self.current_slide = Some(slide);
280        }
281    }
282
283    fn flush_list_items(&mut self) {
284        if self.list_items.is_empty() {
285            return;
286        }
287
288        let items = std::mem::take(&mut self.list_items);
289
290        if let Some(ref mut slide) = self.current_slide {
291            for item in items {
292                *slide = slide.clone().add_bullet(&item);
293            }
294        } else {
295            let mut slide = SlideContent::new("Slide");
296            for item in items {
297                slide = slide.add_bullet(&item);
298            }
299            self.current_slide = Some(slide);
300        }
301    }
302
303    fn flush_table(&mut self) {
304        if self.table_rows.is_empty() {
305            return;
306        }
307
308        let rows = std::mem::take(&mut self.table_rows);
309        let col_count = rows.iter().map(|r| r.len()).max().unwrap_or(1);
310        let col_width = 8000000u32 / col_count as u32;
311        let col_widths: Vec<u32> = vec![col_width; col_count];
312
313        let mut builder = TableBuilder::new(col_widths);
314
315        for (i, row_data) in rows.iter().enumerate() {
316            let cells: Vec<TableCell> = row_data
317                .iter()
318                .map(|cell_text| {
319                    let mut cell = TableCell::new(cell_text);
320                    if i == 0 {
321                        cell = cell.bold().background_color("4472C4").text_color("FFFFFF");
322                    }
323                    cell
324                })
325                .collect();
326
327            let mut cells = cells;
328            while cells.len() < col_count {
329                cells.push(TableCell::new(""));
330            }
331
332            builder = builder.add_row(TableRow::new(cells));
333        }
334
335        let table = builder.position(500000, 1800000).build();
336
337        if let Some(ref mut slide) = self.current_slide {
338            slide.table = Some(table);
339            slide.has_table = true;
340        } else {
341            let mut slide = SlideContent::new("Data Table");
342            slide.table = Some(table);
343            slide.has_table = true;
344            self.current_slide = Some(slide);
345        }
346    }
347
348    fn flush_code_block(&mut self) {
349        if self.code_content.is_empty() {
350            return;
351        }
352
353        let code = std::mem::take(&mut self.code_content);
354        let lang = self.code_language.take();
355        let lang_str = lang.as_deref().unwrap_or("text");
356
357        if lang_str == "mermaid" {
358            self.add_mermaid_diagram(&code);
359            return;
360        }
361
362        let code_block = CodeBlock::new(code.trim(), lang_str);
363
364        if let Some(ref mut slide) = self.current_slide {
365            slide.code_blocks.push(code_block);
366        } else {
367            let mut slide = SlideContent::new("Code");
368            slide.code_blocks.push(code_block);
369            self.current_slide = Some(slide);
370        }
371    }
372
373    fn add_mermaid_diagram(&mut self, code: &str) {
374        let elements = mermaid::create_diagram_elements(code);
375        let diagram_type = mermaid::detect_type(code);
376        let (_, _, title, _) = mermaid::get_diagram_style(diagram_type);
377
378        // Center diagram on slide if bounds are available
379        // Slide dimensions: 9144000 x 6858000 EMU (standard 16:9)
380        let slide_width = 9_144_000u32;
381        let slide_height = 6_858_000u32;
382        let title_offset = 1_200_000u32; // Leave space for title
383
384        let (offset_x, offset_y) = if let Some(bounds) = &elements.bounds {
385            // Calculate offset to center diagram
386            let available_height = slide_height - title_offset;
387            let center_x = (slide_width.saturating_sub(bounds.width)) / 2;
388            let center_y = title_offset + (available_height.saturating_sub(bounds.height)) / 2;
389
390            // Offset from current position to centered position
391            (
392                center_x.saturating_sub(bounds.x) as i32,
393                center_y.saturating_sub(bounds.y) as i32,
394            )
395        } else {
396            (0, 0)
397        };
398
399        // Apply offset to shapes
400        let shapes: Vec<_> = elements
401            .shapes
402            .into_iter()
403            .map(|mut shape| {
404                shape.x = (shape.x as i32 + offset_x).max(0) as u32;
405                shape.y = (shape.y as i32 + offset_y).max(0) as u32;
406                shape
407            })
408            .collect();
409
410        // Apply offset to connectors
411        let connectors: Vec<_> = elements
412            .connectors
413            .into_iter()
414            .map(|mut conn| {
415                conn.start_x = (conn.start_x as i32 + offset_x).max(0) as u32;
416                conn.start_y = (conn.start_y as i32 + offset_y).max(0) as u32;
417                conn.end_x = (conn.end_x as i32 + offset_x).max(0) as u32;
418                conn.end_y = (conn.end_y as i32 + offset_y).max(0) as u32;
419                conn
420            })
421            .collect();
422
423        if let Some(ref mut slide) = self.current_slide {
424            for shape in shapes {
425                slide.shapes.push(shape);
426            }
427            for connector in connectors {
428                slide.connectors.push(connector);
429            }
430        } else {
431            let mut slide = SlideContent::new(title);
432            for shape in shapes {
433                slide.shapes.push(shape);
434            }
435            for connector in connectors {
436                slide.connectors.push(connector);
437            }
438            self.current_slide = Some(slide);
439        }
440    }
441
442    fn flush_blockquote(&mut self) {
443        if self.blockquote_text.is_empty() {
444            return;
445        }
446
447        let notes = std::mem::take(&mut self.blockquote_text).trim().to_string();
448
449        if let Some(ref mut slide) = self.current_slide {
450            slide.notes = Some(notes);
451        }
452    }
453
454    fn add_image_placeholder(&mut self, url: &str, alt: &str) {
455        let label = if alt.is_empty() { url } else { alt };
456
457        let shape = Shape::new(ShapeType::Rectangle, 2000000, 2000000, 5000000, 3000000)
458            .with_fill(ShapeFill::new("E0E0E0"))
459            .with_text(&format!("[Image: {}]", label));
460
461        if let Some(ref mut slide) = self.current_slide {
462            slide.shapes.push(shape);
463        } else {
464            let mut slide = SlideContent::new("Image");
465            slide.shapes.push(shape);
466            self.current_slide = Some(slide);
467        }
468    }
469
470    fn finalize_current_slide(&mut self) {
471        self.flush_list_items();
472
473        if let Some(slide) = self.current_slide.take() {
474            self.slides.push(slide);
475        }
476    }
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482
483    #[test]
484    fn test_basic_headings() {
485        let md = "# Slide 1\n- Bullet 1\n\n# Slide 2\n- Bullet 2";
486        let slides = parse(md).unwrap();
487        assert_eq!(slides.len(), 2);
488        assert_eq!(slides[0].title, "Slide 1");
489        assert_eq!(slides[1].title, "Slide 2");
490    }
491
492    #[test]
493    fn test_bullets() {
494        let md = "# Test\n- Item 1\n- Item 2\n- Item 3";
495        let slides = parse(md).unwrap();
496        assert_eq!(slides[0].content.len(), 3);
497    }
498
499    #[test]
500    fn test_table() {
501        let md = "# Data\n\n| A | B |\n|---|---|\n| 1 | 2 |";
502        let slides = parse(md).unwrap();
503        assert!(slides[0].table.is_some());
504    }
505
506    #[test]
507    fn test_code_block() {
508        let md = "# Code\n\n```rust\nfn main() {}\n```";
509        let slides = parse(md).unwrap();
510        assert!(!slides[0].code_blocks.is_empty());
511        assert_eq!(slides[0].code_blocks[0].language, "rust");
512    }
513
514    #[test]
515    fn test_speaker_notes() {
516        let md = "# Slide\n- Content\n\n> Speaker notes here";
517        let slides = parse(md).unwrap();
518        assert!(slides[0].notes.is_some());
519    }
520
521    #[test]
522    fn test_formatting() {
523        let md = "# Test\n- **Bold** and *italic*";
524        let slides = parse(md).unwrap();
525        assert!(slides[0].content[0].contains("**Bold**"));
526    }
527
528    #[test]
529    fn test_mermaid_flowchart() {
530        let md = "# Process\n\n```mermaid\nflowchart LR\n    A --> B --> C\n```";
531        let slides = parse(md).unwrap();
532        assert!(!slides[0].shapes.is_empty());
533    }
534
535    #[test]
536    fn test_mermaid_sequence() {
537        let md = "# Sequence\n\n```mermaid\nsequenceDiagram\n    Alice->>Bob: Hello\n```";
538        let slides = parse(md).unwrap();
539        assert!(!slides[0].shapes.is_empty());
540    }
541}