1#![doc = include_str!("../README.md")]
2
3pub mod extras;
4pub mod utilities;
5
6#[derive(Debug, Copy, Clone, PartialEq, Eq)]
8pub enum MarkdownElement<'a> {
9 Heading {
10 level: u8,
11 text: RawText<'a>,
12 },
13 Quote(RawMarkdown<'a>),
14 Paragraph(RawText<'a>),
15 ListItem {
16 level: u8,
17 text: RawText<'a>,
18 },
19 Table(Table<'a>),
21 CodeBlock {
23 language: &'a str,
24 code: &'a str,
25 },
26 LaTeXBlock {
27 script: &'a str,
28 },
29 CommandBlock(CommandBlock<'a>),
30 CommentBlock(&'a str),
32 HTMLElement(&'a str),
35 Frontmatter(&'a str),
37 HorizontalRule,
38 Footnote,
44 Empty,
45}
46
47impl MarkdownElement<'_> {
48 #[must_use]
49 pub fn as_markdown(&self) -> String {
50 match self {
51 Self::Heading { level, text } => {
52 let mut s = "#".repeat(*level as usize);
53 s.push_str(text.0);
54 s.push(' ');
55 s
56 }
57 Self::ListItem { level, text } => {
58 let mut s = "\t".repeat(*level as usize);
59 s.push_str("- ");
60 s.push_str(text.0);
61 s
62 }
63 Self::CodeBlock { language, code } => {
64 format!("```{language}\n{code}```")
65 }
71 Self::Paragraph(text) => text.0.to_owned(),
72 Self::Quote(text) => {
73 format!("> {text}", text = text.0)
74 }
75 Self::Empty => String::new(),
76 item => format!("TODO {item:?}"),
77 }
78 }
79
80 #[must_use]
82 pub fn inner_paragraph_raw(&self) -> Option<&str> {
83 if let MarkdownElement::Paragraph(text) = self {
84 Some(text.0)
85 } else if let MarkdownElement::Quote(text) = self {
86 Some(text.0)
88 } else {
89 None
90 }
91 }
92
93 #[must_use]
94 pub fn parts_like(&self) -> Option<RawText> {
95 if let MarkdownElement::Heading { text, .. }
96 | MarkdownElement::Paragraph(text)
97 | MarkdownElement::ListItem { level: _, text } = self
98 {
99 Some(*text)
100 } else if let MarkdownElement::Quote(text) = self {
101 Some(RawText(text.0))
103 } else {
104 None
105 }
106 }
107
108 #[allow(clippy::match_same_arms)]
109 #[must_use]
110 pub fn debug_without_text(&self) -> String {
111 match self {
112 MarkdownElement::Heading { level, text: _ } => {
113 format!("Heading {{ level: {level} }}")
114 }
115 MarkdownElement::Quote(_) => "Quote".to_owned(),
116 MarkdownElement::Paragraph(_) => "Paragraph".to_owned(),
117 MarkdownElement::ListItem { level, text: _ } => {
118 format!("ListItem {{ level: {level} }}")
119 }
120 MarkdownElement::Table(_table) => "Table".to_owned(),
121 MarkdownElement::CodeBlock { language, code: _ } => format!("CodeBlock ({language})"),
122 MarkdownElement::LaTeXBlock { script: _ } => "LaTeXBlock {{ .. }}".to_owned(),
123 MarkdownElement::CommandBlock(_) => "CommandBlock".to_owned(),
124 MarkdownElement::CommentBlock(_) => "CommentBlock".to_owned(),
125 MarkdownElement::HTMLElement(_) => "HTMLElement".to_owned(),
126 MarkdownElement::Frontmatter(_) => "Frontmatter".to_owned(),
127 MarkdownElement::HorizontalRule => "HorizontalRule".to_owned(),
128 MarkdownElement::Footnote => "Footnote".to_owned(),
129 MarkdownElement::Empty => "Empty".to_owned(),
130 }
131 }
132}
133
134#[derive(Debug, Copy, Clone, PartialEq, Eq)]
136pub struct RawText<'a>(pub &'a str);
137
138impl<'a> RawText<'a> {
139 #[must_use]
140 pub fn parts(&self) -> PartsIterator<'a> {
141 PartsIterator::new(self.0)
142 }
143
144 #[must_use]
145 pub fn no_decoration(&self) -> String {
146 let mut s = String::new();
147 for part in PartsIterator::new(self.0) {
148 s.push_str(part.no_decoration());
149 }
150 s
151 }
152}
153
154#[derive(Debug, Copy, Clone, PartialEq, Eq)]
156pub enum MarkdownTextElement<'a> {
157 Plain(&'a str),
158 Italic(&'a str),
160 Bold(&'a str),
162 BoldAndItalic(&'a str),
164 Code(&'a str),
166 StrikeThrough(&'a str),
168 Emoji(&'a str),
170 Latex(&'a str),
172 Expression(&'a str),
174 Highlight(&'a str),
176 Superscript(&'a str),
178 Subscript(&'a str),
180 Tag(&'a str),
182 Link {
184 on: RawText<'a>,
186 to: &'a str,
187 },
188 Media {
190 alt: &'a str,
191 source: &'a str,
192 },
193}
194
195impl<'a> MarkdownTextElement<'a> {
196 #[must_use]
197 pub fn no_decoration(&self) -> &'a str {
198 match self {
199 MarkdownTextElement::Plain(i)
200 | MarkdownTextElement::Bold(i)
201 | MarkdownTextElement::Italic(i)
202 | MarkdownTextElement::BoldAndItalic(i)
203 | MarkdownTextElement::Code(i)
204 | MarkdownTextElement::StrikeThrough(i)
205 | MarkdownTextElement::Emoji(i)
206 | MarkdownTextElement::Latex(i)
207 | MarkdownTextElement::Highlight(i)
208 | MarkdownTextElement::Subscript(i)
209 | MarkdownTextElement::Superscript(i)
210 | MarkdownTextElement::Tag(i) => i,
211 MarkdownTextElement::Expression(_) | MarkdownTextElement::Media { .. } => "",
212 MarkdownTextElement::Link { on: _, to: _ } => {
213 eprintln!("TODO no decoration link");
214 ""
215 }
216 }
217 }
218}
219
220#[allow(clippy::needless_lifetimes)]
222fn decide<'a>(item: &'a str) -> MarkdownElement<'a> {
223 let item = item.trim();
224 if item.starts_with('#') {
225 let level = item.chars().take_while(|c| *c == '#').count();
226 MarkdownElement::Heading {
227 level: level.try_into().expect("deep header"),
228 text: RawText(item[level..].trim()),
229 }
230 } else if let Some(item) = item.strip_prefix('>') {
231 MarkdownElement::Quote(RawMarkdown(item))
232 } else if let "---" = item {
233 MarkdownElement::HorizontalRule
234 } else if let Some(item) = item.trim_start().strip_prefix('-') {
235 let level = item.chars().take_while(|c| *c == '\t' || *c == ' ').count();
237 MarkdownElement::ListItem {
238 level: level.try_into().expect("deep list item"),
239 text: RawText(item.trim()),
240 }
241 } else if item.is_empty() {
242 MarkdownElement::Empty
243 } else {
244 MarkdownElement::Paragraph(RawText(item))
245 }
246}
247
248#[derive(Default, Copy, Clone)]
249pub struct ParseOptions {
250 include_new_lines: bool,
251}
252
253pub fn parse<'a>(on: &'a str, cb: impl FnMut(MarkdownElement<'a>)) -> Result<(), ()> {
256 parse_with_options(on, &ParseOptions::default(), cb)
257}
258
259pub fn strip_surrounds<'a>(on: &'a str, left: &str, right: &str) -> Option<&'a str> {
260 on.trim()
261 .strip_prefix(left)
262 .and_then(|line| line.strip_suffix(right))
263 .map(str::trim)
264}
265
266#[allow(clippy::result_unit_err, clippy::too_many_lines)]
270pub fn parse_with_options<'a>(
271 on: &'a str,
272 options: &ParseOptions,
273 mut cb: impl FnMut(MarkdownElement<'a>),
274) -> Result<(), ()> {
275 let mut since_new_line = 0;
276 let mut start = 0;
277
278 let mut current_code_language = None;
280
281 let mut current_command_and_arguments: Option<(&str, &str)> = None;
282
283 let mut in_frontmatter = false;
284 let mut in_table = false;
285 let mut in_latex_block = false;
286 let mut in_markdown_comment = false;
287
288 for (idx, chr) in on.char_indices() {
289 if let '\n' = chr {
290 let line = &on[since_new_line..idx];
291
292 if current_code_language.is_some() {
293 if let "```" = line.trim() {
294 cb(MarkdownElement::CodeBlock {
295 language: current_code_language.take().unwrap(),
296 code: &on[start..since_new_line],
297 });
298 start = idx + 1;
299 }
300 since_new_line = idx + 1;
301 continue;
302 }
303
304 if let Some((current_command, arguments)) = current_command_and_arguments {
305 if let Some(command_line) = strip_surrounds(line, "{%", "%}") {
306 if command_line
307 .trim()
308 .strip_prefix('/')
309 .is_some_and(|command| current_command == command)
310 {
311 cb(MarkdownElement::CommandBlock(CommandBlock {
312 name: current_command,
313 arguments,
314 inner: RawMarkdown(&on[start..since_new_line]),
315 }));
316 current_command_and_arguments = None;
317 start = idx + 1;
318 }
319 }
320 since_new_line = idx + 1;
321 continue;
322 }
323
324 if in_latex_block {
325 if let "$$" = line.trim() {
326 cb(MarkdownElement::LaTeXBlock {
327 script: on[start..since_new_line].trim(),
328 });
329 in_latex_block = false;
330 start = idx + 1;
331 }
332 since_new_line = idx + 1;
333 continue;
334 }
335
336 if in_markdown_comment {
337 if line.trim().ends_with("%%") {
338 cb(MarkdownElement::CommentBlock(
339 on[start..since_new_line].trim(),
340 ));
341 in_markdown_comment = false;
342 start = idx + 1;
343 }
344 since_new_line = idx + 1;
345 continue;
346 }
347
348 if in_table {
349 if !line.ends_with('|') {
350 cb(MarkdownElement::Table(Table(&on[start..since_new_line])));
351 in_table = false;
352 start = idx + 1;
353 }
354 since_new_line = idx + 1;
355 continue;
356 }
357
358 let is_horizontal_rule = "---" == line.trim();
359
360 if in_frontmatter {
361 if is_horizontal_rule {
362 cb(MarkdownElement::Frontmatter(&on[start..since_new_line]));
363 in_frontmatter = false;
364 }
365 since_new_line = idx + 1;
366 continue;
367 }
368
369 since_new_line = idx + 1;
370
371 if let Some(rest) = line.trim().strip_prefix("```") {
372 let language = rest.trim_end();
374 current_code_language = Some(language);
375 } else if let "$$" = line.trim() {
376 in_latex_block = true;
377 } else if let Some(line) = line.trim_start().strip_prefix("%%") {
378 if let Some(out) = line.trim_end().strip_suffix("%%") {
379 cb(MarkdownElement::CommentBlock(out.trim()));
380 } else {
381 in_markdown_comment = true;
382 }
383 } else if start == 0 && is_horizontal_rule {
384 in_frontmatter = true;
385 } else if let Some(command_line) = strip_surrounds(line, "{%", "%}") {
386 current_command_and_arguments =
387 Some(command_line.split_once(' ').unwrap_or((command_line, "")));
388 } else {
389 let result = decide(line);
390 let to_add = !matches!(
391 (options.include_new_lines, result),
392 (false, MarkdownElement::Empty)
393 );
394 if to_add {
395 cb(result);
396 }
397 }
398
399 start = since_new_line;
400 }
401 }
402
403 if current_code_language.is_some() {
404 eprintln!("TODO error {current_code_language:?}");
405 } else if in_latex_block {
407 eprintln!("TODO unclosed latex block");
408 }
409
410 if in_table {
411 cb(MarkdownElement::Table(Table(&on[start..since_new_line])));
412 } else {
413 let line = &on[start..];
414 let result = decide(line);
415 let to_add = !matches!(
416 (options.include_new_lines, result),
417 (false, MarkdownElement::Empty)
418 );
419 if to_add {
420 cb(result);
421 }
422 }
423
424 Ok(())
425}
426
427#[allow(clippy::struct_excessive_bools)]
430pub struct PartsIterator<'a> {
431 on: &'a str,
432 last: usize,
433 in_tag: bool,
434 pub in_bold: bool,
435 pub in_italic: bool,
436 in_code: bool,
437 in_latex: bool,
438 in_emoji: bool,
439 in_link: bool,
440 in_chevron_link: bool,
441 in_media: bool,
442 in_expression: bool,
443}
444
445impl<'a> PartsIterator<'a> {
446 #[must_use]
447 pub fn new(on: &'a str) -> Self {
448 Self {
449 on,
450 last: 0,
451 in_tag: false,
452 in_bold: false,
453 in_italic: false,
454 in_emoji: false,
455 in_code: false,
456 in_latex: false,
457 in_link: false,
458 in_chevron_link: false,
459 in_media: false,
460 in_expression: false,
461 }
462 }
463}
464
465impl<'a> Iterator for PartsIterator<'a> {
466 type Item = MarkdownTextElement<'a>;
467
468 #[allow(clippy::too_many_lines)]
469 fn next(&mut self) -> Option<Self::Item> {
470 if self.last >= self.on.len() {
471 None
472 } else {
473 let mut link_text_end: Option<usize> = None;
474 let mut bracket_depth: usize = 0;
475
476 let mut range = &self.on[self.last..];
477 let mut iterator = range.char_indices();
478
479 while let Some((idx, chr)) = iterator.next() {
480 if self.in_link || self.in_media {
481 if let Some(link_text_end) = link_text_end {
482 if idx == link_text_end + 1 {
483 if chr != '(' {
484 if self.in_link {
485 self.last += idx;
486 self.in_link = false;
487 return Some(MarkdownTextElement::Link {
488 on: RawText(&range[..link_text_end]),
489 to: "",
490 });
491 }
492 panic!("media parsing broken {chr}");
493 }
494 } else if let ')' = chr {
495 let in_brackets = &range[..link_text_end];
496 let in_parenthesis = &range[link_text_end + "](".len()..idx];
497 let element = if self.in_link {
498 self.in_link = false;
499 MarkdownTextElement::Link {
500 on: RawText(in_brackets),
501 to: in_parenthesis,
502 }
503 } else {
504 self.in_media = false;
505 MarkdownTextElement::Media {
506 alt: in_brackets,
507 source: in_parenthesis,
508 }
509 };
510
511 self.last += idx + 1;
512 return Some(element);
513 }
514 } else if let ']' = chr {
515 if let Some(reduced_depth) = bracket_depth.checked_sub(1) {
516 bracket_depth = reduced_depth;
517 } else {
518 link_text_end = Some(idx);
519 }
520 } else if let '[' = chr {
521 bracket_depth += 1;
522 }
523
524 continue;
525 }
526
527 if self.in_code {
529 if let '`' = chr {
530 self.last += idx + 1;
531 self.in_code = false;
532 return Some(MarkdownTextElement::Code(&range[..idx]));
533 }
534 continue;
535 }
536 if let (true, '$') = (self.in_latex, chr) {
538 self.last += idx + 1;
539 self.in_latex = false;
540 return Some(MarkdownTextElement::Latex(&range[..idx]));
541 }
542 if let (true, ':') = (self.in_emoji, chr) {
544 self.last += idx + 1;
545 self.in_emoji = false;
546 return Some(MarkdownTextElement::Emoji(&range[..idx]));
547 }
548 if let (true, '}') = (self.in_expression, chr) {
550 self.last += idx + 1;
551 self.in_expression = false;
552 return Some(MarkdownTextElement::Expression(&range[..idx]));
553 }
554 if let (true, '>') = (self.in_chevron_link, chr) {
556 self.last += idx + 1;
557 self.in_chevron_link = false;
558 let inner = &range[..idx];
559 return Some(MarkdownTextElement::Link {
560 on: RawText(inner),
562 to: inner,
563 });
564 }
565
566 if self.in_tag && chr.is_whitespace() {
567 self.last += idx + 1;
568 self.in_tag = false;
569 return Some(MarkdownTextElement::Tag(&range[..idx]));
570 }
571
572 macro_rules! yield_current {
573 () => {{
574 let item = &range[..idx];
575 if !item.is_empty() {
576 return Some(MarkdownTextElement::Plain(item));
577 }
578 range = &self.on[self.last..];
580 iterator = range.char_indices();
581 }};
582 }
583
584 match chr {
585 '`' => {
586 self.last += idx + 1;
587 self.in_code = true;
588 yield_current!();
589 }
590 '$' => {
591 self.last += idx + 1;
592 self.in_latex = true;
593 yield_current!();
594 }
595 '{' => {
596 self.last += idx + 1;
597 self.in_expression = true;
598 yield_current!();
599 }
600 ':' if range[(idx + 1)..]
601 .chars()
602 .next()
603 .is_some_and(char::is_alphanumeric) =>
604 {
605 self.last += idx + 1;
607 self.in_emoji = true;
608 yield_current!();
609 }
610 '#' => {
611 self.last += idx + 1;
612 self.in_tag = true;
613 yield_current!();
614 }
615 '<' if range[idx..]
616 .chars()
617 .next()
618 .is_some_and(char::is_alphanumeric) =>
619 {
620 self.last += idx + 1;
621 self.in_chevron_link = true;
622 yield_current!();
623 }
624 '!' if range[idx..].starts_with("![") => {
625 self.last += idx + "![".len();
626 self.in_media = true;
627 yield_current!();
628 }
629 '[' => {
630 self.last += idx + '['.len_utf8();
631 self.in_link = true;
632 yield_current!();
633 }
634 '*' | '_' => {
635 let start = &range[idx..];
636 if start.starts_with("**") || start.starts_with("__") {
637 self.last += idx + 2;
638 self.in_bold = !self.in_bold;
639 if self.in_bold {
640 yield_current!();
641 } else {
642 return Some(MarkdownTextElement::Bold(&range[..idx]));
643 }
644 } else {
645 self.last += idx + 1;
646 self.in_italic = !self.in_italic;
647 if self.in_italic {
648 yield_current!();
649 } else {
650 return Some(MarkdownTextElement::Italic(&range[..idx]));
651 }
652 }
653 }
654 _ => {}
655 }
656 }
657
658 self.last = self.on.len();
659 if range.is_empty() {
660 None
661 } else {
662 Some(MarkdownTextElement::Plain(range))
664 }
665 }
666 }
667}
668
669#[derive(Debug, Copy, Clone, PartialEq, Eq)]
670pub struct RawMarkdown<'a>(pub &'a str);
671
672#[derive(Debug, Copy, Clone, PartialEq, Eq)]
684pub struct Table<'a>(pub(crate) &'a str);
685
686impl<'a> Table<'a> {
687 pub fn rows(&self) -> impl Iterator<Item = TableRow<'a>> {
688 let mut lines = self.0.lines();
689 let header = lines.next().expect("no heading (empty table)");
690 std::iter::once(TableRow(header)).chain(lines.skip(1).map(TableRow))
691 }
692}
693
694#[derive(Debug, Copy, Clone, PartialEq, Eq)]
695pub struct TableRow<'a>(pub(crate) &'a str);
696
697impl<'a> TableRow<'a> {
698 pub fn cells(&self) -> impl Iterator<Item = RawText<'a>> {
699 let inner = &self.0[1..(self.0.len() - 1)];
700 inner.split('|').map(RawText)
701 }
702}
703#[derive(Debug, Copy, Clone, PartialEq, Eq)]
704pub struct CommandBlock<'a> {
705 pub name: &'a str,
706 pub arguments: &'a str,
707 pub inner: RawMarkdown<'a>,
708}
709
710impl<'a> CommandBlock<'a> {
711 #[must_use]
712 #[allow(clippy::collapsible_else_if)]
713 pub fn arguments(&self) -> Vec<(&'a str, &'a str)> {
714 let mut arguments = Vec::new();
715 let mut key: Option<&str> = None;
716 let mut start = 0;
717 let mut in_string = false;
718
719 for (idx, chr) in self.arguments.char_indices() {
720 if let Some(current_key) = key {
721 let value = self.arguments[start..idx].trim();
722 if let (' ', false, false) = (chr, in_string, value.is_empty()) {
723 arguments.push((current_key, value));
724 start = idx;
725 key = None;
726 } else if let '"' = chr {
727 in_string = !in_string;
728 }
729 } else {
730 if let '=' = chr {
731 let key_acc = &self.arguments[start..idx];
732 key = Some(key_acc.trim());
733 start = idx + 1;
734 }
735 }
736 }
737 if let Some(current_key) = key {
738 if in_string {
739 eprintln!("missing '\"'");
740 }
741 let value = self.arguments[start..].trim();
742 arguments.push((current_key, value));
743 }
744
745 arguments
746 }
747}