arinamcnulty_markdown_parser/
lib.rs1use std::{
24 fs::{File, OpenOptions},
25 io::{BufRead, BufReader, Write},
26 path::Path,
27};
28
29use pest::{
30 Parser,
31 iterators::{Pair, Pairs},
32};
33use pest_derive::Parser;
34
35#[derive(Debug, thiserror::Error)]
38pub enum MarkdownError {
39 #[error("Parsing failed: {0}")]
40 ParseError(String),
41
42 #[error("File operation failed: {0}")]
43 IoError(#[from] std::io::Error),
44}
45
46#[derive(Parser)]
47#[grammar = "grammar.pest"]
48pub struct MarkdownParser;
49
50pub fn parse_markdown(input: &str) -> Result<Pairs<'_, Rule>, MarkdownError> {
59 MarkdownParser::parse(Rule::document_structure, input)
60 .map_err(|e| MarkdownError::ParseError(e.to_string()))
61}
62
63pub fn str_to_html(input: &str) -> Result<Vec<String>, MarkdownError> {
72 let mut parsed = parse_markdown(input)?;
73 let document = parsed
74 .next()
75 .ok_or_else(|| MarkdownError::ParseError("Empty document".to_string()))?;
76
77 let results: Result<Vec<String>, MarkdownError> = document
78 .into_inner()
79 .filter(|pair| !matches!(pair.as_rule(), Rule::EOI))
80 .map(convert_to_html)
81 .collect();
82
83 results
84}
85
86fn convert_to_html(pair: Pair<Rule>) -> Result<String, MarkdownError> {
95 match pair.as_rule() {
96 Rule::document_block => {
97 let inner = pair.into_inner().next().unwrap();
98 convert_to_html(inner)
99 }
100 Rule::document_heading => process_document_heading(pair),
101 Rule::h1_heading | Rule::h2_heading | Rule::h3_heading => process_heading(pair),
102 Rule::document_paragraph => process_document_paragraph(pair),
103 Rule::document_quote => process_document_quote(pair),
104 Rule::quote_line => process_quote_line(pair),
105 Rule::paragraph_text => process_paragraph_text(pair),
106 Rule::document_unordered_list => process_unordered_list(pair),
107 Rule::document_ordered_list => process_ordered_list(pair),
108 Rule::unordered_list_item => process_list_item(pair),
109 Rule::ordered_list_item => process_list_item(pair),
110 Rule::code_fence => process_code_fence(pair),
111 Rule::thematic_break => Ok("<hr>".to_string()),
112 Rule::blank_line => Ok("<br>".to_string()),
113 Rule::EOI => Ok(String::new()),
114 _ => Err(MarkdownError::ParseError(format!(
115 "Unknown rule: {:?}",
116 pair.as_rule()
117 ))),
118 }
119}
120
121fn process_document_heading(pair: Pair<Rule>) -> Result<String, MarkdownError> {
123 let inner = pair.into_inner().next().unwrap();
124 process_heading(inner)
125}
126
127fn process_heading(pair: Pair<Rule>) -> Result<String, MarkdownError> {
129 let level = match pair.as_rule() {
130 Rule::h1_heading => 1,
131 Rule::h2_heading => 2,
132 Rule::h3_heading => 3,
133 _ => return Err(MarkdownError::ParseError("Invalid heading".to_string())),
134 };
135
136 let content = pair.as_str();
137 let text = content
138 .trim_start_matches('#')
139 .trim_start_matches(char::is_whitespace)
140 .trim_end_matches('\n')
141 .trim();
142
143 Ok(format!(
144 "<h{level}>{}</h{level}>",
145 html_escape::encode_text(text)
146 ))
147}
148
149fn process_document_paragraph(pair: Pair<Rule>) -> Result<String, MarkdownError> {
150 process_paragraph(pair)
151}
152
153fn process_paragraph(pair: Pair<Rule>) -> Result<String, MarkdownError> {
154 let content: Result<String, MarkdownError> = pair
155 .into_inner()
156 .map(|line| process_paragraph_line(line))
157 .collect();
158
159 Ok(format!("<p>{}</p>", content?))
160}
161
162fn process_paragraph_text(pair: Pair<Rule>) -> Result<String, MarkdownError> {
163 pair.into_inner()
164 .map(|inline| process_inline_element(inline))
165 .collect()
166}
167
168fn process_paragraph_line(pair: Pair<Rule>) -> Result<String, MarkdownError> {
169 pair.into_inner()
170 .map(|inline| process_inline_element(inline))
171 .collect()
172}
173
174fn process_inline_element(pair: Pair<Rule>) -> Result<String, MarkdownError> {
176 match pair.as_rule() {
177 Rule::plain_text => Ok(html_escape::encode_text(pair.as_str()).to_string()),
178 Rule::inline_code => {
179 let full = pair.as_str();
180 let code = full
181 .strip_prefix('`')
182 .and_then(|s| s.strip_suffix('`'))
183 .unwrap_or("");
184 Ok(format!("<code>{}</code>", html_escape::encode_text(code)))
185 }
186 Rule::link => process_link(pair),
187 Rule::image => process_image(pair),
188 Rule::bold_formatting => {
189 let content = process_bold_content(pair)?;
190 Ok(format!("<strong>{content}</strong>"))
191 }
192 Rule::italic_formatting => {
193 let content = process_italic_content(pair)?;
194 Ok(format!("<em>{content}</em>"))
195 }
196 Rule::strikethrough_formatting => {
197 let content = process_strikethrough_content(pair)?;
198 Ok(format!("<del>{content}</del>"))
199 }
200 Rule::underline_formatting => {
201 let content = process_underline_content(pair)?;
202 Ok(format!("<u>{content}</u>"))
203 }
204 Rule::text_formatting => process_text_formatting(pair),
205 Rule::escape_sequence => process_escape_sequence(pair),
206 _ => Ok(html_escape::encode_text(pair.as_str()).to_string()),
207 }
208}
209
210fn process_text_formatting(pair: Pair<Rule>) -> Result<String, MarkdownError> {
212 let rule = pair.as_rule();
213 match rule {
214 Rule::bold_formatting => {
215 let content = process_bold_content(pair)?;
216 Ok(format!("<strong>{content}</strong>"))
217 }
218 Rule::italic_formatting => {
219 let content = process_italic_content(pair)?;
220 Ok(format!("<em>{content}</em>"))
221 }
222 Rule::strikethrough_formatting => {
223 let content = process_strikethrough_content(pair)?;
224 Ok(format!("<del>{content}</del>"))
225 }
226 Rule::underline_formatting => {
227 let content = process_underline_content(pair)?;
228 Ok(format!("<u>{content}</u>"))
229 }
230 _ => Ok(html_escape::encode_text(pair.as_str()).to_string()),
231 }
232}
233
234fn process_bold_content(pair: Pair<Rule>) -> Result<String, MarkdownError> {
235 pair.into_inner()
236 .next()
237 .map(|p| html_escape::encode_text(p.as_str()).to_string())
238 .ok_or_else(|| MarkdownError::ParseError("Empty bold content".to_string()))
239}
240
241fn process_italic_content(pair: Pair<Rule>) -> Result<String, MarkdownError> {
242 pair.into_inner()
243 .next()
244 .map(|p| html_escape::encode_text(p.as_str()).to_string())
245 .ok_or_else(|| MarkdownError::ParseError("Empty italic content".to_string()))
246}
247
248fn process_strikethrough_content(pair: Pair<Rule>) -> Result<String, MarkdownError> {
249 pair.into_inner()
250 .next()
251 .map(|p| html_escape::encode_text(p.as_str()).to_string())
252 .ok_or_else(|| MarkdownError::ParseError("Empty strikethrough content".to_string()))
253}
254
255fn process_underline_content(pair: Pair<Rule>) -> Result<String, MarkdownError> {
256 pair.into_inner()
257 .next()
258 .map(|p| html_escape::encode_text(p.as_str()).to_string())
259 .ok_or_else(|| MarkdownError::ParseError("Empty underline content".to_string()))
260}
261
262fn process_link(pair: Pair<Rule>) -> Result<String, MarkdownError> {
264 let mut inner = pair.into_inner();
265 let text = inner
266 .next()
267 .map(|p| p.into_inner().as_str())
268 .ok_or_else(|| MarkdownError::ParseError("Missing link text".to_string()))?;
269 let url = inner
270 .next()
271 .map(|p| p.as_str())
272 .ok_or_else(|| MarkdownError::ParseError("Missing link URL".to_string()))?;
273
274 Ok(format!(
275 "<a href=\"{}\">{}</a>",
276 url,
277 html_escape::encode_text(text)
278 ))
279}
280
281fn process_image(pair: Pair<Rule>) -> Result<String, MarkdownError> {
283 let mut inner = pair.into_inner();
284 let alt = inner
285 .next()
286 .map(|p| p.into_inner().as_str())
287 .ok_or_else(|| MarkdownError::ParseError("Missing image alt text".to_string()))?;
288 let url = inner
289 .next()
290 .map(|p| p.as_str())
291 .ok_or_else(|| MarkdownError::ParseError("Missing image URL".to_string()))?;
292
293 Ok(format!(
294 "<img src=\"{}\" alt=\"{}\">",
295 url,
296 html_escape::encode_text(alt)
297 ))
298}
299
300fn process_document_quote(pair: Pair<Rule>) -> Result<String, MarkdownError> {
301 process_quote(pair)
302}
303
304fn process_quote_line(pair: Pair<Rule>) -> Result<String, MarkdownError> {
305 let inner = pair.into_inner().next();
306 match inner {
307 Some(content) => {
308 let html = convert_to_html(content)?;
309 Ok(format!("<p>{}</p>", html))
310 }
311 None => Ok("<p></p>".to_string()),
312 }
313}
314
315fn process_quote(pair: Pair<Rule>) -> Result<String, MarkdownError> {
316 let mut lines: Vec<String> = Vec::new();
317
318 for line in pair.into_inner() {
319 let processed = process_quote_line(line)?;
320 if !processed.is_empty() {
321 lines.push(processed);
322 }
323 }
324
325 Ok(format!("<blockquote>\n{}\n</blockquote>", lines.join("\n")))
326}
327
328fn process_code_fence(pair: Pair<Rule>) -> Result<String, MarkdownError> {
329 process_code_block(pair)
330}
331
332fn process_unordered_list(pair: Pair<Rule>) -> Result<String, MarkdownError> {
333 let items: Result<Vec<String>, MarkdownError> =
334 pair.into_inner().map(process_list_item).collect();
335
336 Ok(format!("<ul>\n{}\n</ul>", items?.join("\n")))
337}
338
339fn process_ordered_list(pair: Pair<Rule>) -> Result<String, MarkdownError> {
340 let items: Result<Vec<String>, MarkdownError> =
341 pair.into_inner().map(process_list_item).collect();
342
343 Ok(format!("<ol>\n{}\n</ol>", items?.join("\n")))
344}
345
346fn process_list_item(pair: Pair<Rule>) -> Result<String, MarkdownError> {
347 let content = pair.as_str();
348 let text = content
349 .find(char::is_whitespace)
350 .map(|pos| &content[pos + 1..])
351 .unwrap_or("")
352 .trim_end_matches('\n')
353 .trim();
354
355 Ok(format!("<li>{}</li>", html_escape::encode_text(text)))
356}
357
358fn process_code_block(pair: Pair<Rule>) -> Result<String, MarkdownError> {
361 let mut language = String::new();
362 let mut code = String::new();
363
364 for inner_pair in pair.into_inner() {
365 match inner_pair.as_rule() {
366 Rule::language_spec => {
367 language = inner_pair.as_str().trim().to_string();
368 }
369 Rule::code_body => {
370 code = html_escape::encode_text(inner_pair.as_str()).to_string();
371 }
372 _ => {} }
374 }
375
376 let lang_attr = if language.is_empty() {
377 String::new()
378 } else {
379 format!(" class=\"language-{}\"", language)
380 };
381
382 Ok(format!("<pre><code{lang_attr}>{code}</code></pre>"))
383}
384
385fn process_escape_sequence(pair: Pair<Rule>) -> Result<String, MarkdownError> {
386 let escaped = pair.into_inner().next().map(|p| p.as_str()).unwrap_or("");
387 Ok(html_escape::encode_text(escaped).to_string())
388}
389
390pub fn convert_file_to_html(input_path: &Path, output_path: &Path) -> Result<(), MarkdownError> {
400 let file = File::open(input_path)?;
401 let reader = BufReader::new(file);
402
403 let mut content = String::new();
404 for line in reader.lines() {
405 content.push_str(&line?);
406 content.push('\n');
407 }
408
409 let html_lines = str_to_html(&content)?;
410
411 let mut output = OpenOptions::new()
412 .create(true)
413 .write(true)
414 .truncate(true)
415 .open(output_path)?;
416
417 for line in html_lines {
418 writeln!(output, "{}", line)?;
419 }
420
421 Ok(())
422}
423
424pub fn print_html_to_console(input: &str) -> Result<(), MarkdownError> {
433 let html_lines = str_to_html(input)?;
434 for line in html_lines {
435 println!("{}", line);
436 }
437 Ok(())
438}