1use std::{borrow::Cow, sync::LazyLock};
2
3use regex::{Captures, Regex, RegexBuilder, Replacer};
4
5use crate::{
6 Parser,
7 attributes::Attrlist,
8 content::Content,
9 document::InterpretedValue,
10 internal::{LookaheadReplacer, LookaheadResult, replace_with_lookahead},
11 parser::{
12 CharacterReplacementType, InlineSubstitutionRenderer, QuoteScope, QuoteType,
13 SpecialCharacter,
14 },
15};
16
17#[derive(Clone, Copy, Debug, Eq, PartialEq)]
23pub enum SubstitutionStep {
24 SpecialCharacters,
27
28 Quotes,
30
31 AttributeReferences,
33
34 CharacterReplacements,
38
39 Macros,
42
43 PostReplacement,
45
46 Callouts,
48}
49
50impl SubstitutionStep {
51 pub(crate) fn apply(
52 &self,
53 content: &mut Content<'_>,
54 parser: &Parser,
55 attrlist: Option<&Attrlist<'_>>,
56 ) {
57 match self {
58 Self::SpecialCharacters => {
59 apply_special_characters(content, parser.renderer);
60 }
61 Self::Quotes => {
62 apply_quotes(content, parser);
63 }
64 Self::AttributeReferences => {
65 apply_attributes(content, parser);
66 }
67 Self::CharacterReplacements => {
68 apply_character_replacements(content, parser.renderer);
69 }
70 Self::Macros => {
71 super::macros::apply_macros(content, parser);
72 }
73 Self::PostReplacement => {
74 apply_post_replacements(content, parser, attrlist);
75 }
76 _ => {
77 todo!("Implement apply for SubstitutionStep::{self:?}");
78 }
79 }
80 }
81}
82
83fn apply_special_characters(content: &mut Content<'_>, renderer: &dyn InlineSubstitutionRenderer) {
84 if !content.rendered.contains(['<', '>', '&']) {
85 return;
86 }
87
88 let mut result: Cow<'_, str> = content.rendered.to_string().into();
89 let replacer = SpecialCharacterReplacer { renderer };
90
91 if let Cow::Owned(new_result) = SPECIAL_CHARS.replace_all(&result, replacer) {
92 result = new_result.into();
93 }
94
95 content.rendered = result.into();
96}
97
98#[derive(Debug)]
99struct SpecialCharacterReplacer<'r> {
100 renderer: &'r dyn InlineSubstitutionRenderer,
101}
102
103impl Replacer for SpecialCharacterReplacer<'_> {
104 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
105 if let Some(which) = match caps[0].as_ref() {
106 "<" => Some(SpecialCharacter::Lt),
107 ">" => Some(SpecialCharacter::Gt),
108 "&" => Some(SpecialCharacter::Ampersand),
109 _ => None,
110 } {
111 self.renderer.render_special_character(which, dest);
112 } else {
113 dest.push_str(caps[0].as_ref());
114 }
115 }
116}
117
118static SPECIAL_CHARS: LazyLock<Regex> = LazyLock::new(|| {
119 #[allow(clippy::unwrap_used)]
120 Regex::new("[<>&]").unwrap()
121});
122
123static QUOTED_TEXT_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
124 #[allow(clippy::unwrap_used)]
125 Regex::new("[*_`#^~]").unwrap()
126});
127
128struct QuoteSub {
129 type_: QuoteType,
130 scope: QuoteScope,
131 pattern: Regex,
132}
133
134static QUOTE_SUBS: LazyLock<Vec<QuoteSub>> = LazyLock::new(|| {
155 vec![
156 QuoteSub {
157 type_: QuoteType::Strong,
159 scope: QuoteScope::Unconstrained,
160 #[allow(clippy::unwrap_used)]
161 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?\*\*(.+?)\*\*"#)
162 .dot_matches_new_line(true)
163 .build()
164 .unwrap(),
165 },
166 QuoteSub {
167 type_: QuoteType::Strong,
169 scope: QuoteScope::Constrained,
170 #[allow(clippy::unwrap_used)]
171 pattern: RegexBuilder::new(
172 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?\*(\S|\S.*?\S)\*\b{end-half}"#,
173 )
174 .dot_matches_new_line(true)
175 .build()
176 .unwrap(),
177 },
178 QuoteSub {
179 type_: QuoteType::DoubleQuote,
181 scope: QuoteScope::Constrained,
182 #[allow(clippy::unwrap_used)]
183 pattern: RegexBuilder::new(
184 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?"`(\S|\S.*?\S)`"\b{end-half}"#,
185 )
186 .dot_matches_new_line(true)
187 .build()
188 .unwrap(),
189 },
190 QuoteSub {
191 type_: QuoteType::SingleQuote,
193 scope: QuoteScope::Constrained,
194 #[allow(clippy::unwrap_used)]
195 pattern: RegexBuilder::new(
196 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?'`(\S|\S.*?\S)`'\b{end-half}"#,
197 )
198 .dot_matches_new_line(true)
199 .build()
200 .unwrap(),
201 },
202 QuoteSub {
203 type_: QuoteType::Monospaced,
205 scope: QuoteScope::Unconstrained,
206 #[allow(clippy::unwrap_used)]
207 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?``(.+?)``"#)
208 .dot_matches_new_line(true)
209 .build()
210 .unwrap(),
211 },
212 QuoteSub {
213 type_: QuoteType::Monospaced,
215 scope: QuoteScope::Constrained,
216 #[allow(clippy::unwrap_used)]
217 pattern: RegexBuilder::new(
218 r#"(^|[^\w&;:"'`}])(?:\[([^\[\]]+)\])?`(\S|\S.*?\S)`\b{end-half}"#,
219 )
223 .dot_matches_new_line(true)
224 .build()
225 .unwrap(),
226 },
227 QuoteSub {
228 type_: QuoteType::Emphasis,
230 scope: QuoteScope::Unconstrained,
231 #[allow(clippy::unwrap_used)]
232 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?__(.+?)__"#)
233 .dot_matches_new_line(true)
234 .build()
235 .unwrap(),
236 },
237 QuoteSub {
238 type_: QuoteType::Emphasis,
240 scope: QuoteScope::Constrained,
241 #[allow(clippy::unwrap_used)]
242 pattern: RegexBuilder::new(
243 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?_(\S|\S.*?\S)_\b{end-half}"#,
244 )
245 .dot_matches_new_line(true)
246 .build()
247 .unwrap(),
248 },
249 QuoteSub {
250 type_: QuoteType::Mark,
252 scope: QuoteScope::Unconstrained,
253 #[allow(clippy::unwrap_used)]
254 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?##(.+?)##"#)
255 .dot_matches_new_line(true)
256 .build()
257 .unwrap(),
258 },
259 QuoteSub {
260 type_: QuoteType::Mark,
262 scope: QuoteScope::Constrained,
263 #[allow(clippy::unwrap_used)]
264 pattern: RegexBuilder::new(
265 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?#(\S|\S.*?\S)#\b{end-half}"#,
266 )
267 .dot_matches_new_line(true)
268 .build()
269 .unwrap(),
270 },
271 QuoteSub {
272 type_: QuoteType::Superscript,
274 scope: QuoteScope::Unconstrained,
275 #[allow(clippy::unwrap_used)]
276 pattern: Regex::new(r#"\\?(?:\[([^\[\]]+)\])?\^(\S+?)\^"#).unwrap(),
277 },
278 QuoteSub {
279 type_: QuoteType::Subscript,
281 scope: QuoteScope::Unconstrained,
282 #[allow(clippy::unwrap_used)]
283 pattern: Regex::new(r#"\\?(?:\[([^\[\]]+)\])?~(\S+?)~"#).unwrap(),
284 },
285 ]
286});
287
288#[derive(Debug)]
289struct QuoteReplacer<'r> {
290 type_: QuoteType,
291 scope: QuoteScope,
292 parser: &'r Parser<'r>,
293}
294
295impl LookaheadReplacer for QuoteReplacer<'_> {
296 fn replace_append(
297 &mut self,
298 caps: &Captures<'_>,
299 dest: &mut String,
300 after: &str,
301 ) -> LookaheadResult {
302 if self.type_ == QuoteType::Monospaced
309 && self.scope == QuoteScope::Constrained
310 && after.starts_with(['"', '\'', '`'])
311 {
312 let skip_ahead = if caps[0].starts_with('\\') { 2 } else { 1 };
313 dest.push_str(&caps[0][0..skip_ahead]);
314 return LookaheadResult::SkipAheadAndRetry(skip_ahead);
315 }
316
317 let unescaped_attrs: Option<String> = if caps[0].starts_with('\\') {
318 let maybe_attrs = caps.get(2).map(|a| a.as_str());
319 if self.scope == QuoteScope::Constrained && maybe_attrs.is_some() {
320 Some(format!(
321 "[{attrs}]",
322 attrs = maybe_attrs.unwrap_or_default()
323 ))
324 } else {
325 dest.push_str(&caps[0][1..]);
326 return LookaheadResult::Continue;
327 }
328 } else {
329 None
330 };
331
332 match self.scope {
333 QuoteScope::Constrained => {
334 if let Some(attrs) = unescaped_attrs {
335 dest.push_str(&attrs);
336 self.parser.renderer.render_quoted_substitition(
337 self.type_, self.scope, None, None, &caps[3], dest,
338 );
339 } else {
340 let (attrlist, type_): (Option<Attrlist<'_>>, QuoteType) =
341 if let Some(attrlist) = caps.get(2) {
342 let type_ = if self.type_ == QuoteType::Mark {
343 QuoteType::Unquoted
344 } else {
345 self.type_
346 };
347
348 (
349 Some(
350 Attrlist::parse(
351 crate::Span::new(attrlist.as_str()),
352 self.parser,
353 )
354 .item
355 .item,
356 ),
357 type_,
358 )
359 } else {
360 (None, self.type_)
361 };
362
363 if let Some(prefix) = caps.get(1) {
364 dest.push_str(prefix.as_str());
365 }
366
367 let id = attrlist
368 .as_ref()
369 .and_then(|a| a.id().map(|s| s.to_string()));
370
371 self.parser.renderer.render_quoted_substitition(
372 type_, self.scope, attrlist, id, &caps[3], dest,
373 );
374 }
375 }
376
377 QuoteScope::Unconstrained => {
378 let (attrlist, type_): (Option<Attrlist<'_>>, QuoteType) =
379 if let Some(attrlist) = caps.get(1) {
380 let type_ = if self.type_ == QuoteType::Mark {
381 QuoteType::Unquoted
382 } else {
383 self.type_
384 };
385
386 (
387 Some(
388 Attrlist::parse(crate::Span::new(attrlist.as_str()), self.parser)
389 .item
390 .item,
391 ),
392 type_,
393 )
394 } else {
395 (None, self.type_)
396 };
397
398 let id = attrlist
399 .as_ref()
400 .and_then(|a| a.id().map(|s| s.to_string()));
401
402 self.parser
403 .renderer
404 .render_quoted_substitition(type_, self.scope, attrlist, id, &caps[2], dest);
405 }
406 }
407
408 LookaheadResult::Continue
409 }
410}
411
412fn apply_quotes(content: &mut Content<'_>, parser: &Parser) {
413 if !QUOTED_TEXT_SNIFF.is_match(content.rendered.as_ref()) {
414 return;
415 }
416
417 let mut result: Cow<'_, str> = content.rendered.to_string().into();
418
419 for sub in &*QUOTE_SUBS {
420 let replacer = QuoteReplacer {
421 type_: sub.type_,
422 scope: sub.scope,
423 parser,
424 };
425
426 if let Cow::Owned(new_result) = replace_with_lookahead(&sub.pattern, &result, replacer) {
427 result = new_result.into();
428 }
429 }
432
433 content.rendered = result.into();
434}
435
436static ATTRIBUTE_REFERENCE: LazyLock<Regex> = LazyLock::new(|| {
437 #[allow(clippy::unwrap_used)]
438 Regex::new(r#"\\?\{([A-Za-z0-9_][A-Za-z0-9_-]*)\}"#).unwrap()
439});
440
441#[derive(Debug)]
442struct AttributeReplacer<'p>(&'p Parser<'p>);
443
444impl Replacer for AttributeReplacer<'_> {
445 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
446 let attr_name = &caps[1];
447
448 if !self.0.has_attribute(attr_name) {
450 dest.push_str(&caps[0]);
451 return;
452 }
453
454 if caps[0].starts_with('\\') {
455 dest.push_str(&caps[0][1..]);
456 return;
457 }
458
459 match self.0.attribute_value(attr_name) {
460 InterpretedValue::Value(value) => {
461 dest.push_str(value.as_ref());
462 }
463 _ => {
464 }
467 }
468 }
469}
470
471fn apply_attributes(content: &mut Content<'_>, parser: &Parser) {
472 if !content.rendered.contains('{') {
473 return;
474 }
475
476 let mut result: Cow<'_, str> = content.rendered.to_string().into();
477
478 if let Cow::Owned(new_result) =
479 ATTRIBUTE_REFERENCE.replace_all(&result, AttributeReplacer(parser))
480 {
481 result = new_result.into();
482 }
483 content.rendered = result.into();
487}
488
489fn apply_character_replacements(
490 content: &mut Content<'_>,
491 renderer: &dyn InlineSubstitutionRenderer,
492) {
493 if !REPLACEABLE_TEXT_SNIFF.is_match(content.rendered.as_ref()) {
494 return;
495 }
496
497 let mut result: Cow<'_, str> = content.rendered.to_string().into();
498
499 for repl in &*REPLACEMENTS {
500 let replacer = CharacterReplacer {
501 type_: repl.type_.clone(),
502 renderer,
503 };
504
505 if let Cow::Owned(new_result) = repl.pattern.replace_all(&result, replacer) {
506 result = new_result.into();
507 }
508 }
511
512 content.rendered = result.into();
513}
514
515struct CharacterReplacement {
516 type_: CharacterReplacementType,
517 pattern: Regex,
518}
519
520static REPLACEABLE_TEXT_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
521 #[allow(clippy::unwrap_used)]
522 Regex::new(r#"[&']|--|\.\.\.|\([CRT]M?\)"#).unwrap()
523});
524
525static REPLACEMENTS: LazyLock<Vec<CharacterReplacement>> = LazyLock::new(|| {
531 vec![
532 CharacterReplacement {
533 type_: CharacterReplacementType::Copyright,
535 #[allow(clippy::unwrap_used)]
536 pattern: Regex::new(r#"\\?\(C\)"#).unwrap(),
537 },
538 CharacterReplacement {
539 type_: CharacterReplacementType::Registered,
541 #[allow(clippy::unwrap_used)]
542 pattern: Regex::new(r#"\\?\(R\)"#).unwrap(),
543 },
544 CharacterReplacement {
545 type_: CharacterReplacementType::Trademark,
547 #[allow(clippy::unwrap_used)]
548 pattern: Regex::new(r#"\\?\(TM\)"#).unwrap(),
549 },
550 CharacterReplacement {
551 type_: CharacterReplacementType::EmDashSurroundedBySpaces,
553 #[allow(clippy::unwrap_used)]
554 pattern: Regex::new(r#"(?: |\n|^|\\)--(?: |\n|$)"#).unwrap(),
555 },
556 CharacterReplacement {
557 type_: CharacterReplacementType::EmDashWithoutSpace,
559 #[allow(clippy::unwrap_used)]
560 pattern: Regex::new(r#"(\w)\\?--\b{start-half}"#).unwrap(),
561 },
562 CharacterReplacement {
563 type_: CharacterReplacementType::Ellipsis,
565 #[allow(clippy::unwrap_used)]
566 pattern: Regex::new(r#"\\?\.\.\."#).unwrap(),
567 },
568 CharacterReplacement {
569 type_: CharacterReplacementType::TypographicApostrophe,
571 #[allow(clippy::unwrap_used)]
572 pattern: Regex::new(r#"\\?`'"#).unwrap(),
573 },
574 CharacterReplacement {
575 type_: CharacterReplacementType::TypographicApostrophe,
577 #[allow(clippy::unwrap_used)]
578 pattern: Regex::new(r#"([[:alnum:]])\\?'([[:alpha:]])"#).unwrap(),
579 },
580 CharacterReplacement {
581 type_: CharacterReplacementType::SingleRightArrow,
583 #[allow(clippy::unwrap_used)]
584 pattern: Regex::new(r#"\\?->"#).unwrap(),
585 },
586 CharacterReplacement {
587 type_: CharacterReplacementType::DoubleRightArrow,
589 #[allow(clippy::unwrap_used)]
590 pattern: Regex::new(r#"\\?=>"#).unwrap(),
591 },
592 CharacterReplacement {
593 type_: CharacterReplacementType::SingleLeftArrow,
595 #[allow(clippy::unwrap_used)]
596 pattern: Regex::new(r#"\\?<-"#).unwrap(),
597 },
598 CharacterReplacement {
599 type_: CharacterReplacementType::DoubleLeftArrow,
601 #[allow(clippy::unwrap_used)]
602 pattern: Regex::new(r#"\\?<="#).unwrap(),
603 },
604 CharacterReplacement {
605 type_: CharacterReplacementType::CharacterReference("".to_owned()),
607 #[allow(clippy::unwrap_used)]
608 pattern: Regex::new(r#"\\?&((?:[a-zA-Z][a-zA-Z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-fA-F][\da-fA-F][\da-fA-F]{0,3}));"#).unwrap(),
609 },
610 ]
611});
612
613#[derive(Debug)]
614struct CharacterReplacer<'r> {
615 type_: CharacterReplacementType,
616 renderer: &'r dyn InlineSubstitutionRenderer,
617}
618
619impl Replacer for CharacterReplacer<'_> {
620 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
621 if caps[0].contains('\\') {
622 let unescaped = &caps[0].replace("\\", "");
624 dest.push_str(unescaped);
625 return;
626 }
627
628 match self.type_ {
629 CharacterReplacementType::Copyright
630 | CharacterReplacementType::Registered
631 | CharacterReplacementType::Trademark
632 | CharacterReplacementType::EmDashSurroundedBySpaces
633 | CharacterReplacementType::Ellipsis
634 | CharacterReplacementType::SingleLeftArrow
635 | CharacterReplacementType::DoubleLeftArrow
636 | CharacterReplacementType::SingleRightArrow
637 | CharacterReplacementType::DoubleRightArrow => {
638 self.renderer
639 .render_character_replacement(self.type_.clone(), dest);
640 }
641
642 CharacterReplacementType::EmDashWithoutSpace => {
643 dest.push_str(&caps[1]);
644 self.renderer.render_character_replacement(
645 CharacterReplacementType::EmDashWithoutSpace,
646 dest,
647 );
648 }
649
650 CharacterReplacementType::TypographicApostrophe => {
651 if let Some(before) = caps.get(1) {
652 dest.push_str(before.as_str());
653 }
654
655 self.renderer.render_character_replacement(
656 CharacterReplacementType::TypographicApostrophe,
657 dest,
658 );
659
660 if let Some(after) = caps.get(2) {
661 dest.push_str(after.as_str());
662 }
663 }
664
665 CharacterReplacementType::CharacterReference(_) => {
666 self.renderer.render_character_replacement(
667 CharacterReplacementType::CharacterReference(caps[1].to_string()),
668 dest,
669 );
670 }
671 }
672 }
673}
674
675fn apply_post_replacements(
676 content: &mut Content<'_>,
677 parser: &Parser,
678 attrlist: Option<&Attrlist<'_>>,
679) {
680 if attrlist.is_some_and(|attrlist| attrlist.has_option("hardbreaks")) {
683 let text = content.rendered.as_ref();
684 if !text.contains('\n') {
685 return;
686 }
687
688 let mut lines: Vec<&str> = content.rendered.as_ref().lines().collect();
689 let last = lines.pop().unwrap_or_default();
690
691 let mut lines: Vec<String> = lines
692 .iter()
693 .map(|line| {
694 let line = if line.ends_with(" +") {
695 &line[0..line.len() - 2]
696 } else {
697 *line
698 };
699
700 let mut line = line.to_owned();
701 parser.renderer.render_line_break(&mut line);
702 line
703 })
704 .collect();
705
706 lines.push(last.to_owned());
707
708 let new_result = lines.join("\n");
709 content.rendered = new_result.into();
710 } else {
711 let rendered = content.rendered.as_ref();
712 if !(rendered.contains('+') && rendered.contains('\n')) {
713 return;
714 }
715
716 let replacer = PostReplacementReplacer(parser.renderer);
717
718 if let Cow::Owned(new_result) = HARD_LINE_BREAK.replace_all(rendered, replacer) {
719 content.rendered = new_result.into();
720 }
721 }
722}
723
724#[derive(Debug)]
725struct PostReplacementReplacer<'r>(&'r dyn InlineSubstitutionRenderer);
726
727impl Replacer for PostReplacementReplacer<'_> {
728 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
729 dest.push_str(&caps[1]);
730 self.0.render_line_break(dest);
731 }
732}
733
734static HARD_LINE_BREAK: LazyLock<Regex> = LazyLock::new(|| {
735 #[allow(clippy::unwrap_used)]
736 Regex::new(r#"(?m)^(.*) \+$"#).unwrap()
737});