Skip to main content

microcad_lang_markdown/
parser.rs

1// Copyright © 2026 The µcad authors <info@ucad.xyz>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4//! µcad markdown parser.
5
6use crate::{CodeBlock, Markdown, Paragraph, Section, code_block::CodeBlockHeader};
7use thiserror::Error;
8
9#[derive(Error, Debug)]
10pub enum ParseError {
11    #[error("Missing code block fence")]
12    MissingCodeBlockFence,
13
14    #[error("Unexpected end of file while parsing")]
15    UnexpectedEOF,
16
17    #[error("Malformed header")]
18    MalformedHeader,
19
20    #[error("Invalid test result: {0}")]
21    InvalidTestResult(String),
22
23    #[error("Duplicated code block name: {0}")]
24    DuplicatedCodeBlockName(String),
25}
26
27pub struct ParseContext<'a> {
28    current_line: Option<&'a str>,
29    current_line_number: usize,
30    lines: std::iter::Peekable<std::iter::Enumerate<std::str::Lines<'a>>>,
31}
32
33impl<'a> ParseContext<'a> {
34    pub fn new(input: &'a str) -> Self {
35        Self {
36            current_line: None,
37            current_line_number: 0,
38            lines: input.lines().enumerate().peekable(),
39        }
40    }
41
42    pub(crate) fn next(&mut self) -> Option<(usize, &'a str)> {
43        let next = self.lines.next();
44        match &next {
45            Some((line_number, line)) => {
46                self.current_line_number = *line_number;
47                self.current_line = Some(line);
48            }
49            None => {
50                self.current_line_number = 0;
51                self.current_line = None;
52            }
53        }
54        next
55    }
56}
57
58pub trait Parse
59where
60    Self: Sized,
61{
62    fn parse(context: &mut ParseContext) -> Result<Self, ParseError>;
63}
64
65impl Parse for CodeBlockHeader {
66    fn parse(context: &mut ParseContext) -> Result<Self, ParseError> {
67        // 1. Consume optional test banner and any subsequent empty lines
68        if Self::is_test_banner(context.current_line.expect("Some line")) {
69            while let Some((_, next_line)) = context.next() {
70                if !next_line.trim().is_empty() {
71                    break;
72                }
73            }
74        }
75
76        let header_line = context.current_line.expect("A current line");
77
78        let trimmed = header_line.trim();
79        assert!(trimmed.starts_with("```"));
80
81        // Metadata is everything after "```"
82        let meta = &trimmed[3..];
83
84        // 1. Locate structural delimiters
85        let hash_pos = meta.find('#');
86        let paren_pos = meta.find('(');
87
88        // 2. Parse Name (supports "µcad,my_name" or just "my_name")
89        let name_end = hash_pos.or(paren_pos).unwrap_or(meta.len());
90        let name_part = meta[..name_end].trim();
91        let name = name_part
92            .find(',')
93            .map(|comma_idx| name_part[comma_idx + 1..].trim().to_string());
94
95        // 3. Parse fragment (#ok, #fail, etc.)
96        let mut fragment = None;
97        if let Some(start) = hash_pos {
98            let end = paren_pos.unwrap_or(meta.len());
99            let status_str = meta[start + 1..end].trim();
100            fragment = Some(status_str.to_string());
101        }
102
103        // 4. Parse Parameters (hires, lowres)
104        let mut parameters = Vec::new();
105        if let Some(start) = paren_pos {
106            let end = meta.find(')').ok_or(ParseError::MalformedHeader)?;
107
108            parameters = meta[start + 1..end]
109                .split(',')
110                .map(|s| s.trim().to_string())
111                .filter(|s| !s.is_empty())
112                .collect();
113        }
114
115        Ok(Self {
116            name,
117            fragment,
118            parameters,
119        })
120    }
121}
122
123impl Parse for CodeBlock {
124    fn parse(context: &mut ParseContext) -> Result<Self, ParseError> {
125        let mut code_lines = Vec::new();
126        let mut closed = false;
127
128        let header = CodeBlockHeader::parse(context)?;
129        let mut start_line_no = None;
130
131        // Consume until closing backticks
132        while let Some((idx, line)) = context.next() {
133            if start_line_no.is_none() {
134                start_line_no = Some(idx);
135            }
136
137            if line.trim().starts_with("```") {
138                closed = true;
139                break;
140            }
141            code_lines.push(line);
142        }
143
144        if !closed {
145            return Err(ParseError::UnexpectedEOF);
146        }
147
148        Ok(Self {
149            header,
150            code: code_lines.join("\n"),
151            line_offset: start_line_no.expect("Some line"),
152        })
153    }
154}
155
156impl Parse for Markdown {
157    fn parse(context: &mut ParseContext) -> Result<Self, ParseError> {
158        let mut sections = Vec::new();
159        let mut current_section = Section::default();
160
161        let mut code_block_names = std::collections::HashSet::new();
162
163        while let Some((_, line)) = context.next() {
164            let trimmed = line.trim();
165
166            if trimmed.is_empty() {
167                continue;
168            }
169
170            // 1. Headings
171            if trimmed.starts_with('#') {
172                if !current_section.heading.is_empty() || !current_section.content.is_empty() {
173                    sections.push(current_section);
174                }
175
176                let level = trimmed.chars().take_while(|&c| c == '#').count() as i64;
177                assert!(level > 0);
178
179                current_section = Section {
180                    heading: trimmed.trim_start_matches('#').trim().to_string(),
181                    level,
182                    content: Vec::new(),
183                };
184            }
185            // 2. Code Blocks
186            else if CodeBlockHeader::is_code_block_start(line) {
187                let block = CodeBlock::parse(context)?;
188                if let Some(block_name) = &block.name() {
189                    if code_block_names.contains(block_name) {
190                        return Err(ParseError::DuplicatedCodeBlockName(block_name.clone()));
191                    } else {
192                        code_block_names.insert(block_name.clone());
193                    }
194                }
195
196                current_section.content.push(Paragraph::CodeBlock(block));
197            }
198            // 3. Tables
199            else if trimmed.starts_with('|') {
200                let mut content = vec![line.to_string()];
201                while let Some((_, line)) = context.next() {
202                    let trimmed = line.trim();
203                    if !trimmed.starts_with("|") {
204                        break;
205                    }
206                    content.push(line.to_string());
207                }
208                current_section
209                    .content
210                    .push(Paragraph::Table(content.join("\n").trim().to_string()));
211            }
212            // 4. Text
213            else {
214                let mut content = vec![line.to_string()];
215                while let Some((_, line)) = context.next() {
216                    let trimmed = line.trim();
217                    if trimmed.is_empty() {
218                        break;
219                    }
220                    content.push(line.to_string());
221                }
222                current_section
223                    .content
224                    .push(Paragraph::Text(content.join("\n").to_string()));
225            }
226        }
227
228        sections.push(current_section);
229        Ok(Self::new(sections))
230    }
231}
232
233/// Parse a markdown.
234pub fn parse(input: &str) -> Result<Markdown, ParseError> {
235    let mut context = ParseContext::new(input);
236    Markdown::parse(&mut context)
237}