markdown_it/plugins/cmark/inline/
entity.rs1use once_cell::sync::Lazy;
7use regex::Regex;
8
9use crate::common::utils::{get_entity_from_str, is_valid_entity_code};
10use crate::parser::inline::{InlineRule, InlineState, TextSpecial};
11use crate::{MarkdownIt, Node};
12
13pub fn add(md: &mut MarkdownIt) {
14 md.inline.add_rule::<EntityScanner>();
15}
16
17static DIGITAL_RE : Lazy<Regex> = Lazy::new(|| {
18 Regex::new("(?i)^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));").unwrap()
19});
20
21static NAMED_RE : Lazy<Regex> = Lazy::new(|| {
22 Regex::new("(?i)^&([a-z][a-z0-9]{1,31});").unwrap()
23});
24
25#[doc(hidden)]
26pub struct EntityScanner;
27
28impl EntityScanner {
29 fn parse_digital_entity(state: &mut InlineState) -> Option<(Node, usize)> {
30 let capture = DIGITAL_RE.captures(&state.src[state.pos..])?;
31 let entity_len = capture[0].len();
32 let entity = &capture[1];
33 #[allow(clippy::from_str_radix_10)]
34 let code = if entity.starts_with('x') || entity.starts_with('X') {
35 u32::from_str_radix(&entity[1..], 16).unwrap()
36 } else {
37 u32::from_str_radix(entity, 10).unwrap()
38 };
39
40 let content_str = if is_valid_entity_code(code) {
41 char::from_u32(code).unwrap().into()
42 } else {
43 '\u{FFFD}'.into()
44 };
45
46 let markup_str = capture[0].to_owned();
47
48 let node = Node::new(TextSpecial {
49 content: content_str,
50 markup: markup_str,
51 info: "entity",
52 });
53 Some((node, entity_len))
54 }
55
56 fn parse_named_entity(state: &mut InlineState) -> Option<(Node, usize)> {
57 let capture = NAMED_RE.captures(&state.src[state.pos..])?;
58 let str = get_entity_from_str(&capture[0])?;
59 let entity_len = capture[0].len();
60 let markup_str = capture[0].to_owned();
61 let content_str = (*str).to_owned();
62
63 let node = Node::new(TextSpecial {
64 content: content_str,
65 markup: markup_str,
66 info: "entity",
67 });
68 Some((node, entity_len))
69 }
70}
71
72impl InlineRule for EntityScanner {
73 const MARKER: char = '&';
74
75 fn run(state: &mut InlineState) -> Option<(Node, usize)> {
76 let mut chars = state.src[state.pos..state.pos_max].chars();
77 if chars.next().unwrap() != '&' { return None; }
78
79 if let Some('#') = chars.next() {
80 Self::parse_digital_entity(state)
81 } else {
82 Self::parse_named_entity(state)
83 }
84 }
85}