1use super::references::classify_reference_node;
65use crate::lex::ast::elements::inlines::{InlineContent, InlineNode, ReferenceInline};
66use crate::lex::token::InlineKind;
67use once_cell::sync::Lazy;
68use std::collections::HashMap;
69
70static DEFAULT_INLINE_PARSER: Lazy<InlineParser> = Lazy::new(InlineParser::new);
71
72pub fn parse_inlines(text: &str) -> InlineContent {
74 DEFAULT_INLINE_PARSER.parse(text)
75}
76
77pub fn parse_inlines_with_parser(text: &str, parser: &InlineParser) -> InlineContent {
79 parser.parse(text)
80}
81
82pub type InlinePostProcessor = fn(InlineNode) -> InlineNode;
84
85#[derive(Clone)]
93pub struct InlineSpec {
94 pub kind: InlineKind,
95 pub start_token: char,
96 pub end_token: char,
97 pub literal: bool,
98 pub post_process: Option<InlinePostProcessor>,
99}
100
101impl InlineSpec {
102 fn apply_post_process(&self, node: InlineNode) -> InlineNode {
103 if let Some(callback) = self.post_process {
104 callback(node)
105 } else {
106 node
107 }
108 }
109}
110
111#[derive(Clone)]
112pub struct InlineParser {
113 specs: Vec<InlineSpec>,
114 token_map: HashMap<char, usize>,
115}
116
117impl InlineParser {
118 pub fn new() -> Self {
119 Self::from_specs(default_specs())
120 }
121
122 pub fn with_post_processor(mut self, kind: InlineKind, processor: InlinePostProcessor) -> Self {
124 if let Some(spec) = self.specs.iter_mut().find(|spec| spec.kind == kind) {
125 spec.post_process = Some(processor);
126 }
127 self
128 }
129
130 pub fn parse(&self, text: &str) -> InlineContent {
131 parse_with(self, text)
132 }
133
134 fn from_specs(specs: Vec<InlineSpec>) -> Self {
135 let mut token_map = HashMap::new();
136 for (index, spec) in specs.iter().enumerate() {
137 token_map.insert(spec.start_token, index);
138 }
139 Self { specs, token_map }
140 }
141
142 fn spec(&self, index: usize) -> &InlineSpec {
143 &self.specs[index]
144 }
145
146 fn spec_index_for_start(&self, ch: char) -> Option<usize> {
147 self.token_map.get(&ch).copied()
148 }
149
150 fn spec_count(&self) -> usize {
151 self.specs.len()
152 }
153}
154
155impl Default for InlineParser {
156 fn default() -> Self {
157 InlineParser::new()
158 }
159}
160
161fn default_specs() -> Vec<InlineSpec> {
162 vec![
163 InlineSpec {
164 kind: InlineKind::Strong,
165 start_token: '*',
166 end_token: '*',
167 literal: false,
168 post_process: None,
169 },
170 InlineSpec {
171 kind: InlineKind::Emphasis,
172 start_token: '_',
173 end_token: '_',
174 literal: false,
175 post_process: None,
176 },
177 InlineSpec {
178 kind: InlineKind::Code,
179 start_token: '`',
180 end_token: '`',
181 literal: true,
182 post_process: None,
183 },
184 InlineSpec {
185 kind: InlineKind::Math,
186 start_token: '#',
187 end_token: '#',
188 literal: true,
189 post_process: None,
190 },
191 InlineSpec {
192 kind: InlineKind::Reference,
193 start_token: '[',
194 end_token: ']',
195 literal: true,
196 post_process: Some(classify_reference_node),
197 },
198 ]
199}
200
201fn parse_with(parser: &InlineParser, text: &str) -> InlineContent {
202 let chars: Vec<char> = text.chars().collect();
203 if chars.is_empty() {
204 return Vec::new();
205 }
206
207 let mut stack = vec![InlineFrame::root()];
208 let mut blocked = BlockedClosings::new(parser.spec_count());
209
210 let mut i = 0;
211 while i < chars.len() {
212 let ch = chars[i];
213 let prev = if i == 0 { None } else { Some(chars[i - 1]) };
214 let next = if i + 1 < chars.len() {
215 Some(chars[i + 1])
216 } else {
217 None
218 };
219
220 if ch == '\\' {
221 if let Some(next_char) = next {
222 if !next_char.is_alphanumeric() {
223 stack.last_mut().unwrap().push_char(next_char);
225 i += 2;
226 continue;
227 } else {
228 stack.last_mut().unwrap().push_char('\\');
230 i += 1;
231 continue;
232 }
233 } else {
234 stack.last_mut().unwrap().push_char('\\');
235 break;
236 }
237 }
238
239 let mut consumed = false;
240 if let Some(spec_index) = stack.last().unwrap().spec_index {
241 let spec = parser.spec(spec_index);
242 if ch == spec.end_token {
243 if blocked.consume(spec_index) {
244 } else if is_valid_end(prev, next, spec) {
246 let mut frame = stack.pop().unwrap();
247 frame.flush_buffer();
248 let had_content = frame.has_content();
249 if !had_content {
250 let parent = stack.last_mut().unwrap();
251 parent.push_char(spec.start_token);
252 parent.push_char(spec.end_token);
253 } else {
254 let node = frame.into_node(spec);
255 let node = spec.apply_post_process(node);
256 stack.last_mut().unwrap().push_node(node);
257 }
258 consumed = true;
259 }
260 }
261 }
262
263 if !consumed && !stack.last().unwrap().is_literal(parser) {
264 if let Some(spec_index) = parser.spec_index_for_start(ch) {
265 let spec = parser.spec(spec_index);
266 if is_valid_start(prev, next, spec) {
267 if stack
268 .iter()
269 .any(|frame| frame.spec_index == Some(spec_index))
270 {
271 blocked.increment(spec_index);
272 } else {
273 stack.last_mut().unwrap().flush_buffer();
274 stack.push(InlineFrame::new(spec_index));
275 consumed = true;
276 }
277 }
278 }
279 }
280
281 if !consumed {
282 stack.last_mut().unwrap().push_char(ch);
283 }
284
285 i += 1;
286 }
287
288 if let Some(frame) = stack.last_mut() {
289 frame.flush_buffer();
290 }
291
292 while stack.len() > 1 {
293 let mut frame = stack.pop().unwrap();
294 frame.flush_buffer();
295 let spec_index = frame
296 .spec_index
297 .expect("non-root stack frame must have a spec");
298 let spec = parser.spec(spec_index);
299 let parent = stack.last_mut().unwrap();
300 parent.push_char(spec.start_token);
301 for child in frame.children {
302 parent.push_node(child);
303 }
304 }
305
306 let mut root = stack.pop().unwrap();
307 root.flush_buffer();
308 root.children
309}
310
311struct InlineFrame {
312 spec_index: Option<usize>,
313 buffer: String,
314 children: InlineContent,
315}
316
317impl InlineFrame {
318 fn root() -> Self {
319 Self {
320 spec_index: None,
321 buffer: String::new(),
322 children: Vec::new(),
323 }
324 }
325
326 fn new(spec_index: usize) -> Self {
327 Self {
328 spec_index: Some(spec_index),
329 buffer: String::new(),
330 children: Vec::new(),
331 }
332 }
333
334 fn has_content(&self) -> bool {
335 !self.buffer.is_empty() || !self.children.is_empty()
336 }
337
338 fn push_char(&mut self, ch: char) {
339 self.buffer.push(ch);
340 }
341
342 fn flush_buffer(&mut self) {
343 if self.buffer.is_empty() {
344 return;
345 }
346 let text = std::mem::take(&mut self.buffer);
347 if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
348 existing.push_str(&text);
349 } else {
350 self.children.push(InlineNode::Plain {
351 text,
352 annotations: Vec::new(),
353 });
354 }
355 }
356
357 fn push_node(&mut self, node: InlineNode) {
358 self.flush_buffer();
359 match node {
360 InlineNode::Plain { text, annotations } => {
361 if text.is_empty() {
362 return;
363 }
364 if let Some(InlineNode::Plain { text: existing, .. }) = self.children.last_mut() {
365 existing.push_str(&text);
366 } else {
369 self.children.push(InlineNode::Plain { text, annotations });
370 }
371 }
372 other => self.children.push(other),
373 }
374 }
375
376 fn into_node(self, spec: &InlineSpec) -> InlineNode {
377 match spec.kind {
378 InlineKind::Strong => InlineNode::Strong {
379 content: self.children,
380 annotations: Vec::new(),
381 },
382 InlineKind::Emphasis => InlineNode::Emphasis {
383 content: self.children,
384 annotations: Vec::new(),
385 },
386 InlineKind::Code => InlineNode::Code {
387 text: flatten_literal(self.children),
388 annotations: Vec::new(),
389 },
390 InlineKind::Math => InlineNode::Math {
391 text: flatten_literal(self.children),
392 annotations: Vec::new(),
393 },
394 InlineKind::Reference => InlineNode::Reference {
395 data: ReferenceInline::new(flatten_literal(self.children)),
396 annotations: Vec::new(),
397 },
398 }
399 }
400
401 fn is_literal(&self, parser: &InlineParser) -> bool {
402 self.spec_index
403 .map(|index| parser.spec(index).literal)
404 .unwrap_or(false)
405 }
406}
407
408fn flatten_literal(children: InlineContent) -> String {
409 let mut text = String::new();
410 for node in children {
411 match node {
412 InlineNode::Plain { text: segment, .. } => text.push_str(&segment),
413 _ => fatal_literal_content(),
414 }
415 }
416 text
417}
418
419fn fatal_literal_content() -> ! {
420 panic!("Literal inline nodes must not contain nested nodes");
421}
422
423struct BlockedClosings {
424 counts: Vec<usize>,
425}
426
427impl BlockedClosings {
428 fn new(spec_len: usize) -> Self {
429 Self {
430 counts: vec![0; spec_len],
431 }
432 }
433
434 fn increment(&mut self, spec_index: usize) {
435 if let Some(slot) = self.counts.get_mut(spec_index) {
436 *slot += 1;
437 }
438 }
439
440 fn consume(&mut self, spec_index: usize) -> bool {
441 if let Some(slot) = self.counts.get_mut(spec_index) {
442 if *slot > 0 {
443 *slot -= 1;
444 return true;
445 }
446 }
447 false
448 }
449}
450
451fn is_valid_start(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
452 if spec.kind == InlineKind::Reference {
453 !is_word(prev) && next.is_some()
454 } else {
455 !is_word(prev) && is_word(next)
456 }
457}
458
459fn is_valid_end(prev: Option<char>, next: Option<char>, spec: &InlineSpec) -> bool {
460 let inside_valid = if spec.literal {
461 prev.is_some()
462 } else {
463 matches!(prev, Some(ch) if !ch.is_whitespace())
464 };
465
466 inside_valid && !is_word(next)
467}
468
469fn is_word(ch: Option<char>) -> bool {
470 ch.map(|c| c.is_alphanumeric()).unwrap_or(false)
471}
472
473#[cfg(test)]
474mod tests {
475 use super::*;
476 use crate::lex::inlines::{InlineNode, PageFormat, ReferenceType};
477
478 #[test]
479 fn parses_plain_text() {
480 let nodes = parse_inlines("hello world");
481 assert_eq!(
482 nodes,
483 vec![InlineNode::Plain {
484 text: "hello world".into(),
485 annotations: Vec::new()
486 }]
487 );
488 }
489
490 #[test]
491 fn parses_strong_and_emphasis() {
492 let nodes = parse_inlines("*strong _inner_* text");
493 assert_eq!(nodes.len(), 2);
494 match &nodes[0] {
495 InlineNode::Strong { content, .. } => {
496 assert_eq!(content.len(), 2);
497 assert_eq!(
498 content[0],
499 InlineNode::Plain {
500 text: "strong ".into(),
501 annotations: Vec::new()
502 }
503 );
504 match &content[1] {
505 InlineNode::Emphasis { content: inner, .. } => {
506 assert_eq!(
507 inner,
508 &vec![InlineNode::Plain {
509 text: "inner".into(),
510 annotations: Vec::new()
511 }]
512 );
513 }
514 other => panic!("Unexpected child: {other:?}"),
515 }
516 }
517 other => panic!("Unexpected node: {other:?}"),
518 }
519 assert_eq!(
520 nodes[1],
521 InlineNode::Plain {
522 text: " text".into(),
523 annotations: Vec::new()
524 }
525 );
526 }
527
528 #[test]
529 fn nested_emphasis_inside_strong() {
530 let nodes = parse_inlines("*strong and _emphasis_* text");
531 assert_eq!(nodes.len(), 2);
532 match &nodes[0] {
533 InlineNode::Strong { content, .. } => {
534 assert_eq!(content.len(), 2);
535 assert_eq!(content[0], InlineNode::plain("strong and ".into()));
536 match &content[1] {
537 InlineNode::Emphasis { content: inner, .. } => {
538 assert_eq!(inner, &vec![InlineNode::plain("emphasis".into())]);
539 }
540 other => panic!("Unexpected child: {other:?}"),
541 }
542 }
543 _ => panic!("Expected strong node"),
544 }
545 }
546
547 #[test]
548 fn code_is_literal() {
549 let nodes = parse_inlines("`a * literal _` text");
550 assert_eq!(nodes.len(), 2);
551 assert_eq!(nodes[0], InlineNode::code("a * literal _".into()));
552 assert_eq!(nodes[1], InlineNode::plain(" text".into()));
553 }
554
555 #[test]
556 fn math_is_literal() {
557 let nodes = parse_inlines("#x + y#");
558 assert_eq!(nodes, vec![InlineNode::math("x + y".into())]);
559 }
560
561 #[test]
562 fn unmatched_start_is_literal() {
563 let nodes = parse_inlines("prefix *text");
564 assert_eq!(nodes, vec![InlineNode::plain("prefix *text".into())]);
565 }
566
567 #[test]
568 fn unmatched_nested_preserves_children() {
569 let nodes = parse_inlines("*a _b_ c");
570 assert_eq!(nodes.len(), 3);
571 assert_eq!(nodes[0], InlineNode::plain("*a ".into()));
572 match &nodes[1] {
573 InlineNode::Emphasis { content, .. } => {
574 assert_eq!(content, &vec![InlineNode::plain("b".into())]);
575 }
576 other => panic!("Unexpected node: {other:?}"),
577 }
578 assert_eq!(nodes[2], InlineNode::plain(" c".into()));
579 }
580
581 #[test]
582 fn same_type_nesting_skips_inner_pair() {
583 let nodes = parse_inlines("*outer *inner* text*");
584 assert_eq!(nodes.len(), 1);
585 match &nodes[0] {
586 InlineNode::Strong { content, .. } => {
587 assert_eq!(
588 content,
589 &vec![InlineNode::plain("outer *inner* text".into())]
590 );
591 }
592 other => panic!("Unexpected node: {other:?}"),
593 }
594 }
595
596 #[test]
597 fn reference_detects_url() {
598 let nodes = parse_inlines("[https://example.com]");
599 match &nodes[0] {
600 InlineNode::Reference { data, .. } => match &data.reference_type {
601 ReferenceType::Url { target } => assert_eq!(target, "https://example.com"),
602 other => panic!("Expected URL reference, got {other:?}"),
603 },
604 other => panic!("Unexpected node: {other:?}"),
605 }
606 }
607
608 #[test]
609 fn reference_detects_tk_identifier() {
610 let nodes = parse_inlines("[TK-feature]");
611 match &nodes[0] {
612 InlineNode::Reference { data, .. } => match &data.reference_type {
613 ReferenceType::ToCome { identifier } => {
614 assert_eq!(identifier.as_deref(), Some("feature"));
615 }
616 other => panic!("Expected TK reference, got {other:?}"),
617 },
618 other => panic!("Unexpected node: {other:?}"),
619 }
620 }
621
622 #[test]
623 fn reference_detects_citation_and_footnotes() {
624 let citation = parse_inlines("[@doe2024]");
625 let labeled = parse_inlines("[^note1]");
626 let numbered = parse_inlines("[42]");
627
628 match &citation[0] {
629 InlineNode::Reference { data, .. } => match &data.reference_type {
630 ReferenceType::Citation(citation_data) => {
631 assert_eq!(citation_data.keys, vec!["doe2024".to_string()]);
632 assert!(citation_data.locator.is_none());
633 }
634 other => panic!("Expected citation, got {other:?}"),
635 },
636 _ => panic!("Expected reference"),
637 }
638 match &labeled[0] {
639 InlineNode::Reference { data, .. } => match &data.reference_type {
640 ReferenceType::FootnoteLabeled { label } => assert_eq!(label, "note1"),
641 other => panic!("Expected labeled footnote, got {other:?}"),
642 },
643 _ => panic!("Expected reference"),
644 }
645 match &numbered[0] {
646 InlineNode::Reference { data, .. } => match &data.reference_type {
647 ReferenceType::FootnoteNumber { number } => assert_eq!(*number, 42),
648 other => panic!("Expected numeric footnote, got {other:?}"),
649 },
650 _ => panic!("Expected reference"),
651 }
652 }
653
654 #[test]
655 fn reference_parses_citation_locator() {
656 let nodes = parse_inlines("[@doe2024; @smith2023, pp. 45-46,47]");
657 match &nodes[0] {
658 InlineNode::Reference { data, .. } => match &data.reference_type {
659 ReferenceType::Citation(citation_data) => {
660 assert_eq!(
661 citation_data.keys,
662 vec!["doe2024".to_string(), "smith2023".to_string()]
663 );
664 let locator = citation_data.locator.as_ref().expect("expected locator");
665 assert!(matches!(locator.format, PageFormat::Pp));
666 assert_eq!(locator.ranges.len(), 2);
667 assert_eq!(locator.ranges[0].start, 45);
668 assert_eq!(locator.ranges[0].end, Some(46));
669 assert_eq!(locator.ranges[1].start, 47);
670 assert!(locator.ranges[1].end.is_none());
671 }
672 other => panic!("Expected citation, got {other:?}"),
673 },
674 _ => panic!("Expected reference"),
675 }
676 }
677
678 #[test]
679 fn reference_detects_general_and_not_sure() {
680 let general = parse_inlines("[Section Title]");
681 let unsure = parse_inlines("[!!!]");
682 match &general[0] {
683 InlineNode::Reference { data, .. } => match &data.reference_type {
684 ReferenceType::General { target } => assert_eq!(target, "Section Title"),
685 other => panic!("Expected general reference, got {other:?}"),
686 },
687 _ => panic!("Expected reference"),
688 }
689 match &unsure[0] {
690 InlineNode::Reference { data, .. } => {
691 assert!(matches!(data.reference_type, ReferenceType::NotSure));
692 }
693 _ => panic!("Expected reference"),
694 }
695 }
696
697 fn annotate_strong(node: InlineNode) -> InlineNode {
698 match node {
699 InlineNode::Strong {
700 mut content,
701 annotations,
702 } => {
703 let mut annotated = vec![InlineNode::plain("[strong]".into())];
704 annotated.append(&mut content);
705 InlineNode::Strong {
706 content: annotated,
707 annotations,
708 }
709 }
710 other => other,
711 }
712 }
713
714 #[test]
715 fn post_process_callback_transforms_node() {
716 let parser = InlineParser::new().with_post_processor(InlineKind::Strong, annotate_strong);
717 let nodes = parser.parse("*bold*");
718 assert_eq!(nodes.len(), 1);
719 match &nodes[0] {
720 InlineNode::Strong { content, .. } => {
721 assert_eq!(content[0], InlineNode::plain("[strong]".into()));
722 assert_eq!(content[1], InlineNode::plain("bold".into()));
723 }
724 other => panic!("Unexpected inline node: {other:?}"),
725 }
726 }
727
728 #[test]
729 fn escaped_tokens_are_literal() {
730 let nodes = parse_inlines("\\*literal\\*");
731 assert_eq!(nodes, vec![InlineNode::plain("*literal*".into())]);
732 }
733
734 #[test]
735 fn backslash_before_alphanumeric_preserved() {
736 let nodes = parse_inlines("C:\\Users\\name");
737 assert_eq!(nodes, vec![InlineNode::plain("C:\\Users\\name".into())]);
738 }
739
740 #[test]
741 fn escape_works_in_paths() {
742 let nodes = parse_inlines("Path: C:\\\\Users\\\\name");
743 assert_eq!(
744 nodes,
745 vec![InlineNode::plain("Path: C:\\Users\\name".into())]
746 );
747 }
748
749 #[test]
750 fn arithmetic_not_parsed_as_inline() {
751 let nodes = parse_inlines("7 * 8");
752 assert_eq!(nodes, vec![InlineNode::plain("7 * 8".into())]);
753 }
754
755 #[test]
756 fn word_boundary_start_invalid() {
757 let nodes = parse_inlines("word*s*");
758 assert_eq!(nodes, vec![InlineNode::plain("word*s*".into())]);
759 }
760
761 #[test]
762 fn multiple_arithmetic_expressions() {
763 let nodes = parse_inlines("Calculate 7 * 8 + 3 * 4");
764 assert_eq!(
765 nodes,
766 vec![InlineNode::plain("Calculate 7 * 8 + 3 * 4".into())]
767 );
768 }
769
770 #[test]
771 fn inline_node_annotations_empty_by_default() {
772 let nodes = parse_inlines("*bold* text");
773 assert_eq!(nodes.len(), 2);
774 assert!(nodes[0].annotations().is_empty());
775 assert!(nodes[1].annotations().is_empty());
776 }
777
778 #[test]
779 fn with_annotation_adds_annotation_to_node() {
780 use crate::lex::ast::elements::{Annotation, Label};
781
782 let annotation = Annotation::marker(Label::new("test".to_string()));
783 let node = InlineNode::plain("text".into()).with_annotation(annotation.clone());
784
785 assert_eq!(node.annotations().len(), 1);
786 assert_eq!(node.annotations()[0].data.label.value, "test");
787 }
788
789 #[test]
790 fn with_annotations_adds_multiple_annotations() {
791 use crate::lex::ast::elements::{Annotation, Label, Parameter};
792
793 let anno1 = Annotation::marker(Label::new("doc.data".to_string()));
794 let anno2 = Annotation::with_parameters(
795 Label::new("test".to_string()),
796 vec![Parameter::new("key".to_string(), "value".to_string())],
797 );
798
799 let node = InlineNode::math("x + y".into()).with_annotations(vec![anno1, anno2]);
800
801 assert_eq!(node.annotations().len(), 2);
802 assert_eq!(node.annotations()[0].data.label.value, "doc.data");
803 assert_eq!(node.annotations()[1].data.label.value, "test");
804 }
805
806 #[test]
807 fn annotations_mut_allows_modification() {
808 use crate::lex::ast::elements::{Annotation, Label};
809
810 let mut node = InlineNode::code("code".into());
811 assert!(node.annotations().is_empty());
812
813 let annotation = Annotation::marker(Label::new("highlighted".to_string()));
814 node.annotations_mut().push(annotation);
815
816 assert_eq!(node.annotations().len(), 1);
817 assert_eq!(node.annotations()[0].data.label.value, "highlighted");
818 }
819
820 #[test]
821 fn post_processor_can_add_annotations() {
822 use crate::lex::ast::elements::{Annotation, Label, Parameter};
823
824 fn add_mathml_annotation(node: InlineNode) -> InlineNode {
825 match node {
826 InlineNode::Math {
827 text,
828 mut annotations,
829 } => {
830 let anno = Annotation::with_parameters(
831 Label::new("doc.data".to_string()),
832 vec![Parameter::new("type".to_string(), "mathml".to_string())],
833 );
834 annotations.push(anno);
835 InlineNode::Math { text, annotations }
836 }
837 other => other,
838 }
839 }
840
841 let parser =
842 InlineParser::new().with_post_processor(InlineKind::Math, add_mathml_annotation);
843 let nodes = parser.parse("#x + y#");
844
845 assert_eq!(nodes.len(), 1);
846 match &nodes[0] {
847 InlineNode::Math { text, annotations } => {
848 assert_eq!(text, "x + y");
849 assert_eq!(annotations.len(), 1);
850 assert_eq!(annotations[0].data.label.value, "doc.data");
851 assert_eq!(annotations[0].data.parameters.len(), 1);
852 assert_eq!(annotations[0].data.parameters[0].key, "type");
853 assert_eq!(annotations[0].data.parameters[0].value, "mathml");
854 }
855 other => panic!("Expected math node, got {other:?}"),
856 }
857 }
858}