markdown_it/plugins/cmark/inline/
entity.rs

1//! Entity and numeric character references
2//!
3//! `{`, `¯`, `"`
4//!
5//! <https://spec.commonmark.org/0.30/#entity-and-numeric-character-references>
6use once_cell::sync::Lazy;
7use regex::Regex;
8
9use crate::common::utils::{get_entity_from_str, is_valid_entity_code};
10use crate::parser::inline::{InlineRule, InlineState, TextSpecial};
11use crate::{MarkdownIt, Node};
12
13pub fn add(md: &mut MarkdownIt) {
14    md.inline.add_rule::<EntityScanner>();
15}
16
17static DIGITAL_RE : Lazy<Regex> = Lazy::new(|| {
18    Regex::new("(?i)^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));").unwrap()
19});
20
21static NAMED_RE : Lazy<Regex> = Lazy::new(|| {
22    Regex::new("(?i)^&([a-z][a-z0-9]{1,31});").unwrap()
23});
24
25#[doc(hidden)]
26pub struct EntityScanner;
27
28impl EntityScanner {
29    fn parse_digital_entity(state: &mut InlineState) -> Option<(Node, usize)> {
30        let capture = DIGITAL_RE.captures(&state.src[state.pos..])?;
31        let entity_len = capture[0].len();
32        let entity = &capture[1];
33        #[allow(clippy::from_str_radix_10)]
34        let code = if entity.starts_with('x') || entity.starts_with('X') {
35            u32::from_str_radix(&entity[1..], 16).unwrap()
36        } else {
37            u32::from_str_radix(entity, 10).unwrap()
38        };
39
40        let content_str = if is_valid_entity_code(code) {
41            char::from_u32(code).unwrap().into()
42        } else {
43            '\u{FFFD}'.into()
44        };
45
46        let markup_str = capture[0].to_owned();
47
48        let node = Node::new(TextSpecial {
49            content: content_str,
50            markup: markup_str,
51            info: "entity",
52        });
53        Some((node, entity_len))
54    }
55
56    fn parse_named_entity(state: &mut InlineState) -> Option<(Node, usize)> {
57        let capture = NAMED_RE.captures(&state.src[state.pos..])?;
58        let str = get_entity_from_str(&capture[0])?;
59        let entity_len = capture[0].len();
60        let markup_str = capture[0].to_owned();
61        let content_str = (*str).to_owned();
62
63        let node = Node::new(TextSpecial {
64            content: content_str,
65            markup: markup_str,
66            info: "entity",
67        });
68        Some((node, entity_len))
69    }
70}
71
72impl InlineRule for EntityScanner {
73    const MARKER: char = '&';
74
75    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
76        let mut chars = state.src[state.pos..state.pos_max].chars();
77        if chars.next().unwrap() != '&' { return None; }
78
79        if let Some('#') = chars.next() {
80            Self::parse_digital_entity(state)
81        } else {
82            Self::parse_named_entity(state)
83        }
84    }
85}