markdown_that/plugins/cmark/inline/
entity.rs1use regex::Regex;
8use std::sync::LazyLock;
9
10use crate::common::utils::{get_entity_from_str, is_valid_entity_code};
11use crate::parser::inline::{InlineRule, InlineState, TextSpecial};
12use crate::{MarkdownThat, Node};
13
14pub fn add(md: &mut MarkdownThat) {
15 md.inline.add_rule::<EntityScanner>();
16}
17
18static DIGITAL_RE: LazyLock<Regex> =
19 LazyLock::new(|| Regex::new("(?i)^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));").unwrap());
20
21static NAMED_RE: LazyLock<Regex> =
22 LazyLock::new(|| Regex::new("(?i)^&([a-z][a-z0-9]{1,31});").unwrap());
23
24#[doc(hidden)]
25pub struct EntityScanner;
26
27impl EntityScanner {
28 fn parse_digital_entity(state: &mut InlineState) -> Option<(Node, usize)> {
29 let capture = DIGITAL_RE.captures(&state.src[state.pos..])?;
30 let entity_len = capture[0].len();
31 let entity = &capture[1];
32 #[allow(clippy::from_str_radix_10)]
33 let code = if entity.starts_with('x') || entity.starts_with('X') {
34 u32::from_str_radix(&entity[1..], 16).unwrap()
35 } else {
36 u32::from_str_radix(entity, 10).unwrap()
37 };
38
39 let content_str = if is_valid_entity_code(code) {
40 char::from_u32(code).unwrap().into()
41 } else {
42 '\u{FFFD}'.into()
43 };
44
45 let markup_str = capture[0].to_owned();
46
47 let node = Node::new(TextSpecial {
48 content: content_str,
49 markup: markup_str,
50 info: "entity",
51 });
52 Some((node, entity_len))
53 }
54
55 fn parse_named_entity(state: &mut InlineState) -> Option<(Node, usize)> {
56 let capture = NAMED_RE.captures(&state.src[state.pos..])?;
57 let str = get_entity_from_str(&capture[0])?;
58 let entity_len = capture[0].len();
59 let markup_str = capture[0].to_owned();
60 let content_str = (*str).to_owned();
61
62 let node = Node::new(TextSpecial {
63 content: content_str,
64 markup: markup_str,
65 info: "entity",
66 });
67 Some((node, entity_len))
68 }
69}
70
71impl InlineRule for EntityScanner {
72 const MARKER: char = '&';
73
74 fn run(state: &mut InlineState) -> Option<(Node, usize)> {
75 let mut chars = state.src[state.pos..state.pos_max].chars();
76 if chars.next().unwrap() != '&' {
77 return None;
78 }
79
80 if let Some('#') = chars.next() {
81 Self::parse_digital_entity(state)
82 } else {
83 Self::parse_named_entity(state)
84 }
85 }
86}