markdown_that/plugins/cmark/inline/
entity.rs

1//! Entity and numeric character references
2//!
3//! `{`, `¯`, `"`
4//!
5//! <https://spec.commonmark.org/0.30/#entity-and-numeric-character-references>
6
7use regex::Regex;
8use std::sync::LazyLock;
9
10use crate::common::utils::{get_entity_from_str, is_valid_entity_code};
11use crate::parser::inline::{InlineRule, InlineState, TextSpecial};
12use crate::{MarkdownThat, Node};
13
14pub fn add(md: &mut MarkdownThat) {
15    md.inline.add_rule::<EntityScanner>();
16}
17
18static DIGITAL_RE: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new("(?i)^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));").unwrap());
20
21static NAMED_RE: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new("(?i)^&([a-z][a-z0-9]{1,31});").unwrap());
23
24#[doc(hidden)]
25pub struct EntityScanner;
26
27impl EntityScanner {
28    fn parse_digital_entity(state: &mut InlineState) -> Option<(Node, usize)> {
29        let capture = DIGITAL_RE.captures(&state.src[state.pos..])?;
30        let entity_len = capture[0].len();
31        let entity = &capture[1];
32        #[allow(clippy::from_str_radix_10)]
33        let code = if entity.starts_with('x') || entity.starts_with('X') {
34            u32::from_str_radix(&entity[1..], 16).unwrap()
35        } else {
36            u32::from_str_radix(entity, 10).unwrap()
37        };
38
39        let content_str = if is_valid_entity_code(code) {
40            char::from_u32(code).unwrap().into()
41        } else {
42            '\u{FFFD}'.into()
43        };
44
45        let markup_str = capture[0].to_owned();
46
47        let node = Node::new(TextSpecial {
48            content: content_str,
49            markup: markup_str,
50            info: "entity",
51        });
52        Some((node, entity_len))
53    }
54
55    fn parse_named_entity(state: &mut InlineState) -> Option<(Node, usize)> {
56        let capture = NAMED_RE.captures(&state.src[state.pos..])?;
57        let str = get_entity_from_str(&capture[0])?;
58        let entity_len = capture[0].len();
59        let markup_str = capture[0].to_owned();
60        let content_str = (*str).to_owned();
61
62        let node = Node::new(TextSpecial {
63            content: content_str,
64            markup: markup_str,
65            info: "entity",
66        });
67        Some((node, entity_len))
68    }
69}
70
71impl InlineRule for EntityScanner {
72    const MARKER: char = '&';
73
74    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
75        let mut chars = state.src[state.pos..state.pos_max].chars();
76        if chars.next().unwrap() != '&' {
77            return None;
78        }
79
80        if let Some('#') = chars.next() {
81            Self::parse_digital_entity(state)
82        } else {
83            Self::parse_named_entity(state)
84        }
85    }
86}