Skip to main content

ppt_rs/cli/markdown/
parser.rs

1//! Markdown parser state machine
2//!
3//! Handles parsing of markdown content into slide structures.
4
5use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
6use crate::generator::{SlideContent, TableBuilder, TableRow, TableCell, Shape, ShapeType, ShapeFill, CodeBlock};
7use super::mermaid;
8
9/// Parse markdown content into slides
10pub fn parse(content: &str) -> Result<Vec<SlideContent>, String> {
11    let mut parser = MarkdownParser::new();
12    parser.parse(content)
13}
14
15/// State machine for markdown parsing
16struct MarkdownParser {
17    slides: Vec<SlideContent>,
18    current_slide: Option<SlideContent>,
19    current_text: String,
20    // List state
21    in_list: bool,
22    list_items: Vec<String>,
23    // Table state
24    in_table: bool,
25    table_rows: Vec<Vec<String>>,
26    current_row: Vec<String>,
27    current_cell: String,
28    in_table_head: bool,
29    // Code block state
30    in_code_block: bool,
31    code_content: String,
32    code_language: Option<String>,
33    // Formatting state
34    is_bold: bool,
35    is_italic: bool,
36    // Blockquote (speaker notes)
37    in_blockquote: bool,
38    blockquote_text: String,
39    // Image state
40    pending_image: Option<(String, String)>,
41}
42
43impl MarkdownParser {
44    fn new() -> Self {
45        Self {
46            slides: Vec::new(),
47            current_slide: None,
48            current_text: String::new(),
49            in_list: false,
50            list_items: Vec::new(),
51            in_table: false,
52            table_rows: Vec::new(),
53            current_row: Vec::new(),
54            current_cell: String::new(),
55            in_table_head: false,
56            in_code_block: false,
57            code_content: String::new(),
58            code_language: None,
59            is_bold: false,
60            is_italic: false,
61            in_blockquote: false,
62            blockquote_text: String::new(),
63            pending_image: None,
64        }
65    }
66
67    fn parse(&mut self, content: &str) -> Result<Vec<SlideContent>, String> {
68        let options = Options::ENABLE_TABLES 
69            | Options::ENABLE_STRIKETHROUGH
70            | Options::ENABLE_TASKLISTS;
71        
72        let parser = Parser::new_ext(content, options);
73        
74        for event in parser {
75            self.handle_event(event);
76        }
77        
78        self.finalize_current_slide();
79        
80        if self.slides.is_empty() {
81            return Err("No slides found in markdown file".to_string());
82        }
83        
84        Ok(std::mem::take(&mut self.slides))
85    }
86
87    fn handle_event(&mut self, event: Event) {
88        match event {
89            // Headings create new slides
90            Event::Start(Tag::Heading { level, .. }) => {
91                if level == HeadingLevel::H1 {
92                    self.finalize_current_slide();
93                }
94                self.current_text.clear();
95            }
96            Event::End(TagEnd::Heading(level)) => {
97                let title = std::mem::take(&mut self.current_text).trim().to_string();
98                if level == HeadingLevel::H1 {
99                    self.current_slide = Some(SlideContent::new(&title));
100                } else if let Some(ref mut slide) = self.current_slide {
101                    let formatted = format!("**{}**", title);
102                    *slide = slide.clone().add_bullet(&formatted);
103                }
104            }
105            
106            // Lists
107            Event::Start(Tag::List(_)) => {
108                self.in_list = true;
109                self.list_items.clear();
110            }
111            Event::End(TagEnd::List(_)) => {
112                self.in_list = false;
113                self.flush_list_items();
114            }
115            Event::Start(Tag::Item) => {
116                self.current_text.clear();
117            }
118            Event::End(TagEnd::Item) => {
119                let item = std::mem::take(&mut self.current_text).trim().to_string();
120                if !item.is_empty() {
121                    self.list_items.push(item);
122                }
123            }
124            
125            // Tables
126            Event::Start(Tag::Table(_)) => {
127                self.in_table = true;
128                self.table_rows.clear();
129                self.in_table_head = false;
130            }
131            Event::End(TagEnd::Table) => {
132                self.in_table = false;
133                self.flush_table();
134            }
135            Event::Start(Tag::TableHead) => {
136                self.in_table_head = true;
137                self.current_row.clear();
138            }
139            Event::End(TagEnd::TableHead) => {
140                self.in_table_head = false;
141                if !self.current_row.is_empty() {
142                    self.table_rows.push(std::mem::take(&mut self.current_row));
143                }
144            }
145            Event::Start(Tag::TableRow) => {
146                self.current_row.clear();
147            }
148            Event::End(TagEnd::TableRow) => {
149                if !self.current_row.is_empty() {
150                    self.table_rows.push(std::mem::take(&mut self.current_row));
151                }
152            }
153            Event::Start(Tag::TableCell) => {
154                self.current_cell.clear();
155            }
156            Event::End(TagEnd::TableCell) => {
157                self.current_row.push(std::mem::take(&mut self.current_cell).trim().to_string());
158            }
159            
160            // Code blocks
161            Event::Start(Tag::CodeBlock(kind)) => {
162                self.in_code_block = true;
163                self.code_content.clear();
164                self.code_language = match kind {
165                    pulldown_cmark::CodeBlockKind::Fenced(lang) => {
166                        let lang_str = lang.to_string();
167                        if lang_str.is_empty() { None } else { Some(lang_str) }
168                    }
169                    _ => None,
170                };
171            }
172            Event::End(TagEnd::CodeBlock) => {
173                self.in_code_block = false;
174                self.flush_code_block();
175            }
176            
177            // Blockquotes (speaker notes)
178            Event::Start(Tag::BlockQuote) => {
179                self.in_blockquote = true;
180                self.blockquote_text.clear();
181            }
182            Event::End(TagEnd::BlockQuote) => {
183                self.in_blockquote = false;
184                self.flush_blockquote();
185            }
186            
187            // Inline formatting
188            Event::Start(Tag::Strong) => self.is_bold = true,
189            Event::End(TagEnd::Strong) => self.is_bold = false,
190            Event::Start(Tag::Emphasis) => self.is_italic = true,
191            Event::End(TagEnd::Emphasis) => self.is_italic = false,
192            Event::Code(code) => {
193                let formatted = format!("`{}`", code);
194                self.push_text(&formatted);
195            }
196            
197            // Images
198            Event::Start(Tag::Image { dest_url, title, .. }) => {
199                self.pending_image = Some((dest_url.to_string(), title.to_string()));
200            }
201            Event::End(TagEnd::Image) => {
202                if let Some((url, alt)) = self.pending_image.take() {
203                    self.add_image_placeholder(&url, &alt);
204                }
205            }
206            
207            // Horizontal rule = slide break
208            Event::Rule => {
209                self.finalize_current_slide();
210                if let Some(last) = self.slides.last() {
211                    let title = format!("{} (continued)", last.title);
212                    self.current_slide = Some(SlideContent::new(&title));
213                }
214            }
215            
216            // Text content
217            Event::Text(text) => {
218                self.push_text(&text);
219            }
220            Event::SoftBreak | Event::HardBreak => {
221                self.push_text(" ");
222            }
223            
224            // Paragraphs
225            Event::Start(Tag::Paragraph) => {
226                if !self.in_list && !self.in_table && !self.in_blockquote && !self.in_code_block {
227                    self.current_text.clear();
228                }
229            }
230            Event::End(TagEnd::Paragraph) => {
231                if !self.in_list && !self.in_table && !self.in_blockquote && !self.in_code_block {
232                    let text = std::mem::take(&mut self.current_text).trim().to_string();
233                    if !text.is_empty() {
234                        self.add_paragraph(&text);
235                    }
236                }
237            }
238            
239            _ => {}
240        }
241    }
242
243    fn push_text(&mut self, text: &str) {
244        let formatted = if self.is_bold && self.is_italic {
245            format!("***{}***", text)
246        } else if self.is_bold {
247            format!("**{}**", text)
248        } else if self.is_italic {
249            format!("*{}*", text)
250        } else {
251            text.to_string()
252        };
253        
254        if self.in_code_block {
255            self.code_content.push_str(text);
256        } else if self.in_table {
257            self.current_cell.push_str(&formatted);
258        } else if self.in_blockquote {
259            self.blockquote_text.push_str(&formatted);
260        } else {
261            self.current_text.push_str(&formatted);
262        }
263    }
264
265    fn add_paragraph(&mut self, text: &str) {
266        if let Some(ref mut slide) = self.current_slide {
267            *slide = slide.clone().add_bullet(text);
268        } else {
269            let mut slide = SlideContent::new("Slide");
270            slide = slide.add_bullet(text);
271            self.current_slide = Some(slide);
272        }
273    }
274
275    fn flush_list_items(&mut self) {
276        if self.list_items.is_empty() {
277            return;
278        }
279        
280        let items = std::mem::take(&mut self.list_items);
281        
282        if let Some(ref mut slide) = self.current_slide {
283            for item in items {
284                *slide = slide.clone().add_bullet(&item);
285            }
286        } else {
287            let mut slide = SlideContent::new("Slide");
288            for item in items {
289                slide = slide.add_bullet(&item);
290            }
291            self.current_slide = Some(slide);
292        }
293    }
294
295    fn flush_table(&mut self) {
296        if self.table_rows.is_empty() {
297            return;
298        }
299        
300        let rows = std::mem::take(&mut self.table_rows);
301        let col_count = rows.iter().map(|r| r.len()).max().unwrap_or(1);
302        let col_width = 8000000u32 / col_count as u32;
303        let col_widths: Vec<u32> = vec![col_width; col_count];
304        
305        let mut builder = TableBuilder::new(col_widths);
306        
307        for (i, row_data) in rows.iter().enumerate() {
308            let cells: Vec<TableCell> = row_data.iter().map(|cell_text| {
309                let mut cell = TableCell::new(cell_text);
310                if i == 0 {
311                    cell = cell.bold().background_color("4472C4").text_color("FFFFFF");
312                }
313                cell
314            }).collect();
315            
316            let mut cells = cells;
317            while cells.len() < col_count {
318                cells.push(TableCell::new(""));
319            }
320            
321            builder = builder.add_row(TableRow::new(cells));
322        }
323        
324        let table = builder.position(500000, 1800000).build();
325        
326        if let Some(ref mut slide) = self.current_slide {
327            slide.table = Some(table);
328            slide.has_table = true;
329        } else {
330            let mut slide = SlideContent::new("Data Table");
331            slide.table = Some(table);
332            slide.has_table = true;
333            self.current_slide = Some(slide);
334        }
335    }
336
337    fn flush_code_block(&mut self) {
338        if self.code_content.is_empty() {
339            return;
340        }
341        
342        let code = std::mem::take(&mut self.code_content);
343        let lang = self.code_language.take();
344        let lang_str = lang.as_deref().unwrap_or("text");
345        
346        if lang_str == "mermaid" {
347            self.add_mermaid_diagram(&code);
348            return;
349        }
350        
351        let code_block = CodeBlock::new(code.trim(), lang_str);
352        
353        if let Some(ref mut slide) = self.current_slide {
354            slide.code_blocks.push(code_block);
355        } else {
356            let mut slide = SlideContent::new("Code");
357            slide.code_blocks.push(code_block);
358            self.current_slide = Some(slide);
359        }
360    }
361
362    fn add_mermaid_diagram(&mut self, code: &str) {
363        let elements = mermaid::create_diagram_elements(code);
364        let diagram_type = mermaid::detect_type(code);
365        let (_, _, title, _) = mermaid::get_diagram_style(diagram_type);
366        
367        // Center diagram on slide if bounds are available
368        // Slide dimensions: 9144000 x 6858000 EMU (standard 16:9)
369        let slide_width = 9_144_000u32;
370        let slide_height = 6_858_000u32;
371        let title_offset = 1_200_000u32; // Leave space for title
372        
373        let (offset_x, offset_y) = if let Some(bounds) = &elements.bounds {
374            // Calculate offset to center diagram
375            let available_height = slide_height - title_offset;
376            let center_x = (slide_width.saturating_sub(bounds.width)) / 2;
377            let center_y = title_offset + (available_height.saturating_sub(bounds.height)) / 2;
378            
379            // Offset from current position to centered position
380            (center_x.saturating_sub(bounds.x) as i32, center_y.saturating_sub(bounds.y) as i32)
381        } else {
382            (0, 0)
383        };
384        
385        // Apply offset to shapes
386        let shapes: Vec<_> = elements.shapes.into_iter().map(|mut shape| {
387            shape.x = (shape.x as i32 + offset_x).max(0) as u32;
388            shape.y = (shape.y as i32 + offset_y).max(0) as u32;
389            shape
390        }).collect();
391        
392        // Apply offset to connectors
393        let connectors: Vec<_> = elements.connectors.into_iter().map(|mut conn| {
394            conn.start_x = (conn.start_x as i32 + offset_x).max(0) as u32;
395            conn.start_y = (conn.start_y as i32 + offset_y).max(0) as u32;
396            conn.end_x = (conn.end_x as i32 + offset_x).max(0) as u32;
397            conn.end_y = (conn.end_y as i32 + offset_y).max(0) as u32;
398            conn
399        }).collect();
400        
401        if let Some(ref mut slide) = self.current_slide {
402            for shape in shapes {
403                slide.shapes.push(shape);
404            }
405            for connector in connectors {
406                slide.connectors.push(connector);
407            }
408        } else {
409            let mut slide = SlideContent::new(title);
410            for shape in shapes {
411                slide.shapes.push(shape);
412            }
413            for connector in connectors {
414                slide.connectors.push(connector);
415            }
416            self.current_slide = Some(slide);
417        }
418    }
419
420    fn flush_blockquote(&mut self) {
421        if self.blockquote_text.is_empty() {
422            return;
423        }
424        
425        let notes = std::mem::take(&mut self.blockquote_text).trim().to_string();
426        
427        if let Some(ref mut slide) = self.current_slide {
428            slide.notes = Some(notes);
429        }
430    }
431
432    fn add_image_placeholder(&mut self, url: &str, alt: &str) {
433        let label = if alt.is_empty() { url } else { alt };
434        
435        let shape = Shape::new(ShapeType::Rectangle, 2000000, 2000000, 5000000, 3000000)
436            .with_fill(ShapeFill::new("E0E0E0"))
437            .with_text(&format!("[Image: {}]", label));
438        
439        if let Some(ref mut slide) = self.current_slide {
440            slide.shapes.push(shape);
441        } else {
442            let mut slide = SlideContent::new("Image");
443            slide.shapes.push(shape);
444            self.current_slide = Some(slide);
445        }
446    }
447
448    fn finalize_current_slide(&mut self) {
449        self.flush_list_items();
450        
451        if let Some(slide) = self.current_slide.take() {
452            self.slides.push(slide);
453        }
454    }
455}
456
457#[cfg(test)]
458mod tests {
459    use super::*;
460
461    #[test]
462    fn test_basic_headings() {
463        let md = "# Slide 1\n- Bullet 1\n\n# Slide 2\n- Bullet 2";
464        let slides = parse(md).unwrap();
465        assert_eq!(slides.len(), 2);
466        assert_eq!(slides[0].title, "Slide 1");
467        assert_eq!(slides[1].title, "Slide 2");
468    }
469
470    #[test]
471    fn test_bullets() {
472        let md = "# Test\n- Item 1\n- Item 2\n- Item 3";
473        let slides = parse(md).unwrap();
474        assert_eq!(slides[0].content.len(), 3);
475    }
476
477    #[test]
478    fn test_table() {
479        let md = "# Data\n\n| A | B |\n|---|---|\n| 1 | 2 |";
480        let slides = parse(md).unwrap();
481        assert!(slides[0].table.is_some());
482    }
483
484    #[test]
485    fn test_code_block() {
486        let md = "# Code\n\n```rust\nfn main() {}\n```";
487        let slides = parse(md).unwrap();
488        assert!(!slides[0].code_blocks.is_empty());
489        assert_eq!(slides[0].code_blocks[0].language, "rust");
490    }
491
492    #[test]
493    fn test_speaker_notes() {
494        let md = "# Slide\n- Content\n\n> Speaker notes here";
495        let slides = parse(md).unwrap();
496        assert!(slides[0].notes.is_some());
497    }
498
499    #[test]
500    fn test_formatting() {
501        let md = "# Test\n- **Bold** and *italic*";
502        let slides = parse(md).unwrap();
503        assert!(slides[0].content[0].contains("**Bold**"));
504    }
505
506    #[test]
507    fn test_mermaid_flowchart() {
508        let md = "# Process\n\n```mermaid\nflowchart LR\n    A --> B --> C\n```";
509        let slides = parse(md).unwrap();
510        assert!(!slides[0].shapes.is_empty());
511    }
512
513    #[test]
514    fn test_mermaid_sequence() {
515        let md = "# Sequence\n\n```mermaid\nsequenceDiagram\n    Alice->>Bob: Hello\n```";
516        let slides = parse(md).unwrap();
517        assert!(!slides[0].shapes.is_empty());
518    }
519}