1use std::{borrow::Cow, sync::LazyLock};
2
3use regex::{Captures, Regex, RegexBuilder, Replacer};
4
5use crate::{
6 Parser,
7 attributes::{Attrlist, AttrlistContext},
8 content::Content,
9 document::InterpretedValue,
10 internal::{LookaheadReplacer, LookaheadResult, replace_with_lookahead},
11 parser::{
12 CharacterReplacementType, InlineSubstitutionRenderer, QuoteScope, QuoteType,
13 SpecialCharacter,
14 },
15};
16
17#[derive(Clone, Copy, Debug, Eq, PartialEq)]
23pub enum SubstitutionStep {
24 SpecialCharacters,
27
28 Quotes,
30
31 AttributeReferences,
33
34 CharacterReplacements,
38
39 Macros,
42
43 PostReplacement,
45
46 Callouts,
48}
49
50impl SubstitutionStep {
51 pub(crate) fn apply(
52 &self,
53 content: &mut Content<'_>,
54 parser: &Parser,
55 attrlist: Option<&Attrlist<'_>>,
56 ) {
57 match self {
58 Self::SpecialCharacters => {
59 apply_special_characters(content, parser.renderer);
60 }
61 Self::Quotes => {
62 apply_quotes(content, parser);
63 }
64 Self::AttributeReferences => {
65 apply_attributes(content, parser);
66 }
67 Self::CharacterReplacements => {
68 apply_character_replacements(content, parser.renderer);
69 }
70 Self::Macros => {
71 super::macros::apply_macros(content, parser);
72 }
73 Self::PostReplacement => {
74 apply_post_replacements(content, parser, attrlist);
75 }
76 _ => {
77 todo!("Implement apply for SubstitutionStep::{self:?}");
78 }
79 }
80 }
81}
82
83fn apply_special_characters(content: &mut Content<'_>, renderer: &dyn InlineSubstitutionRenderer) {
84 if !content.rendered.contains(['<', '>', '&']) {
85 return;
86 }
87
88 let mut result: Cow<'_, str> = content.rendered.to_string().into();
89 let replacer = SpecialCharacterReplacer { renderer };
90
91 if let Cow::Owned(new_result) = SPECIAL_CHARS.replace_all(&result, replacer) {
92 result = new_result.into();
93 }
94
95 content.rendered = result.into();
96}
97
98static SPECIAL_CHARS: LazyLock<Regex> = LazyLock::new(|| {
99 #[allow(clippy::unwrap_used)]
100 Regex::new("[<>&]").unwrap()
101});
102
103#[derive(Debug)]
104struct SpecialCharacterReplacer<'r> {
105 renderer: &'r dyn InlineSubstitutionRenderer,
106}
107
108impl Replacer for SpecialCharacterReplacer<'_> {
109 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
110 let ch = &caps[0];
113
114 if ch == "<" {
115 self.renderer
116 .render_special_character(SpecialCharacter::Lt, dest);
117 } else if ch == ">" {
118 self.renderer
119 .render_special_character(SpecialCharacter::Gt, dest);
120 } else if ch == "&" {
121 self.renderer
122 .render_special_character(SpecialCharacter::Ampersand, dest);
123 }
124
125 }
128}
129
130static QUOTED_TEXT_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
131 #[allow(clippy::unwrap_used)]
132 Regex::new("[*_`#^~]").unwrap()
133});
134
135struct QuoteSub {
136 type_: QuoteType,
137 scope: QuoteScope,
138 pattern: Regex,
139}
140
141static QUOTE_SUBS: LazyLock<Vec<QuoteSub>> = LazyLock::new(|| {
162 vec![
163 QuoteSub {
164 type_: QuoteType::Strong,
166 scope: QuoteScope::Unconstrained,
167 #[allow(clippy::unwrap_used)]
168 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?\*\*(.+?)\*\*"#)
169 .dot_matches_new_line(true)
170 .build()
171 .unwrap(),
172 },
173 QuoteSub {
174 type_: QuoteType::Strong,
176 scope: QuoteScope::Constrained,
177 #[allow(clippy::unwrap_used)]
178 pattern: RegexBuilder::new(
179 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?\*(\S|\S.*?\S)\*\b{end-half}"#,
180 )
181 .dot_matches_new_line(true)
182 .build()
183 .unwrap(),
184 },
185 QuoteSub {
186 type_: QuoteType::DoubleQuote,
188 scope: QuoteScope::Constrained,
189 #[allow(clippy::unwrap_used)]
190 pattern: RegexBuilder::new(
191 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?"`(\S|\S.*?\S)`"\b{end-half}"#,
192 )
193 .dot_matches_new_line(true)
194 .build()
195 .unwrap(),
196 },
197 QuoteSub {
198 type_: QuoteType::SingleQuote,
200 scope: QuoteScope::Constrained,
201 #[allow(clippy::unwrap_used)]
202 pattern: RegexBuilder::new(
203 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?'`(\S|\S.*?\S)`'\b{end-half}"#,
204 )
205 .dot_matches_new_line(true)
206 .build()
207 .unwrap(),
208 },
209 QuoteSub {
210 type_: QuoteType::Monospaced,
212 scope: QuoteScope::Unconstrained,
213 #[allow(clippy::unwrap_used)]
214 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?``(.+?)``"#)
215 .dot_matches_new_line(true)
216 .build()
217 .unwrap(),
218 },
219 QuoteSub {
220 type_: QuoteType::Monospaced,
222 scope: QuoteScope::Constrained,
223 #[allow(clippy::unwrap_used)]
224 pattern: RegexBuilder::new(
225 r#"(^|[^\w&;:"'`}])(?:\[([^\[\]]+)\])?`(\S|\S.*?\S)`\b{end-half}"#,
226 )
230 .dot_matches_new_line(true)
231 .build()
232 .unwrap(),
233 },
234 QuoteSub {
235 type_: QuoteType::Emphasis,
237 scope: QuoteScope::Unconstrained,
238 #[allow(clippy::unwrap_used)]
239 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?__(.+?)__"#)
240 .dot_matches_new_line(true)
241 .build()
242 .unwrap(),
243 },
244 QuoteSub {
245 type_: QuoteType::Emphasis,
247 scope: QuoteScope::Constrained,
248 #[allow(clippy::unwrap_used)]
249 pattern: RegexBuilder::new(
250 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?_(\S|\S.*?\S)_\b{end-half}"#,
251 )
252 .dot_matches_new_line(true)
253 .build()
254 .unwrap(),
255 },
256 QuoteSub {
257 type_: QuoteType::Mark,
259 scope: QuoteScope::Unconstrained,
260 #[allow(clippy::unwrap_used)]
261 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?##(.+?)##"#)
262 .dot_matches_new_line(true)
263 .build()
264 .unwrap(),
265 },
266 QuoteSub {
267 type_: QuoteType::Mark,
269 scope: QuoteScope::Constrained,
270 #[allow(clippy::unwrap_used)]
271 pattern: RegexBuilder::new(
272 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?#(\S|\S.*?\S)#\b{end-half}"#,
273 )
274 .dot_matches_new_line(true)
275 .build()
276 .unwrap(),
277 },
278 QuoteSub {
279 type_: QuoteType::Superscript,
281 scope: QuoteScope::Unconstrained,
282 #[allow(clippy::unwrap_used)]
283 pattern: Regex::new(r#"\\?(?:\[([^\[\]]+)\])?\^(\S+?)\^"#).unwrap(),
284 },
285 QuoteSub {
286 type_: QuoteType::Subscript,
288 scope: QuoteScope::Unconstrained,
289 #[allow(clippy::unwrap_used)]
290 pattern: Regex::new(r#"\\?(?:\[([^\[\]]+)\])?~(\S+?)~"#).unwrap(),
291 },
292 ]
293});
294
295#[derive(Debug)]
296struct QuoteReplacer<'r> {
297 type_: QuoteType,
298 scope: QuoteScope,
299 parser: &'r Parser<'r>,
300}
301
302impl LookaheadReplacer for QuoteReplacer<'_> {
303 fn replace_append(
304 &mut self,
305 caps: &Captures<'_>,
306 dest: &mut String,
307 after: &str,
308 ) -> LookaheadResult {
309 if self.type_ == QuoteType::Monospaced
316 && self.scope == QuoteScope::Constrained
317 && after.starts_with(['"', '\'', '`'])
318 {
319 let skip_ahead = if caps[0].starts_with('\\') { 2 } else { 1 };
320 dest.push_str(&caps[0][0..skip_ahead]);
321 return LookaheadResult::SkipAheadAndRetry(skip_ahead);
322 }
323
324 let unescaped_attrs: Option<String> = if caps[0].starts_with('\\') {
325 let maybe_attrs = caps.get(2).map(|a| a.as_str());
326 if self.scope == QuoteScope::Constrained && maybe_attrs.is_some() {
327 Some(format!(
328 "[{attrs}]",
329 attrs = maybe_attrs.unwrap_or_default()
330 ))
331 } else {
332 dest.push_str(&caps[0][1..]);
333 return LookaheadResult::Continue;
334 }
335 } else {
336 None
337 };
338
339 match self.scope {
340 QuoteScope::Constrained => {
341 if let Some(attrs) = unescaped_attrs {
342 dest.push_str(&attrs);
343 self.parser.renderer.render_quoted_substitition(
344 self.type_, self.scope, None, None, &caps[3], dest,
345 );
346 } else {
347 let (attrlist, type_): (Option<Attrlist<'_>>, QuoteType) =
348 if let Some(attrlist) = caps.get(2) {
349 let type_ = if self.type_ == QuoteType::Mark {
350 QuoteType::Unquoted
351 } else {
352 self.type_
353 };
354
355 (
356 Some(
357 Attrlist::parse(
358 crate::Span::new(attrlist.as_str()),
359 self.parser,
360 AttrlistContext::Inline,
361 )
362 .item
363 .item,
364 ),
365 type_,
366 )
367 } else {
368 (None, self.type_)
369 };
370
371 if let Some(prefix) = caps.get(1) {
372 dest.push_str(prefix.as_str());
373 }
374
375 let id = attrlist
376 .as_ref()
377 .and_then(|a| a.id().map(|s| s.to_string()));
378
379 self.parser.renderer.render_quoted_substitition(
380 type_, self.scope, attrlist, id, &caps[3], dest,
381 );
382 }
383 }
384
385 QuoteScope::Unconstrained => {
386 let (attrlist, type_): (Option<Attrlist<'_>>, QuoteType) =
387 if let Some(attrlist) = caps.get(1) {
388 let type_ = if self.type_ == QuoteType::Mark {
389 QuoteType::Unquoted
390 } else {
391 self.type_
392 };
393
394 (
395 Some(
396 Attrlist::parse(
397 crate::Span::new(attrlist.as_str()),
398 self.parser,
399 AttrlistContext::Inline,
400 )
401 .item
402 .item,
403 ),
404 type_,
405 )
406 } else {
407 (None, self.type_)
408 };
409
410 let id = attrlist
411 .as_ref()
412 .and_then(|a| a.id().map(|s| s.to_string()));
413
414 self.parser
415 .renderer
416 .render_quoted_substitition(type_, self.scope, attrlist, id, &caps[2], dest);
417 }
418 }
419
420 LookaheadResult::Continue
421 }
422}
423
424fn apply_quotes(content: &mut Content<'_>, parser: &Parser) {
425 if !QUOTED_TEXT_SNIFF.is_match(content.rendered.as_ref()) {
426 return;
427 }
428
429 let mut result: Cow<'_, str> = content.rendered.to_string().into();
430
431 for sub in &*QUOTE_SUBS {
432 let replacer = QuoteReplacer {
433 type_: sub.type_,
434 scope: sub.scope,
435 parser,
436 };
437
438 if let Cow::Owned(new_result) = replace_with_lookahead(&sub.pattern, &result, replacer) {
439 result = new_result.into();
440 }
441 }
444
445 content.rendered = result.into();
446}
447
448static ATTRIBUTE_REFERENCE: LazyLock<Regex> = LazyLock::new(|| {
449 #[allow(clippy::unwrap_used)]
450 Regex::new(r#"\\?\{([A-Za-z0-9_][A-Za-z0-9_-]*)\}"#).unwrap()
451});
452
453#[derive(Debug)]
454struct AttributeReplacer<'p>(&'p Parser<'p>);
455
456impl Replacer for AttributeReplacer<'_> {
457 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
458 let attr_name = &caps[1];
459
460 if !self.0.has_attribute(attr_name) {
462 dest.push_str(&caps[0]);
463 return;
464 }
465
466 if caps[0].starts_with('\\') {
467 dest.push_str(&caps[0][1..]);
468 return;
469 }
470
471 if let InterpretedValue::Value(value) = self.0.attribute_value(attr_name) {
472 dest.push_str(value.as_ref());
473 }
474 }
477}
478
479fn apply_attributes(content: &mut Content<'_>, parser: &Parser) {
480 if !content.rendered.contains('{') {
481 return;
482 }
483
484 let mut result: Cow<'_, str> = content.rendered.to_string().into();
485
486 if let Cow::Owned(new_result) =
487 ATTRIBUTE_REFERENCE.replace_all(&result, AttributeReplacer(parser))
488 {
489 result = new_result.into();
490 }
491 content.rendered = result.into();
495}
496
497fn apply_character_replacements(
498 content: &mut Content<'_>,
499 renderer: &dyn InlineSubstitutionRenderer,
500) {
501 if !REPLACEABLE_TEXT_SNIFF.is_match(content.rendered.as_ref()) {
502 return;
503 }
504
505 let mut result: Cow<'_, str> = content.rendered.to_string().into();
506
507 for repl in &*REPLACEMENTS {
508 let replacer = CharacterReplacer {
509 type_: repl.type_.clone(),
510 renderer,
511 };
512
513 if let Cow::Owned(new_result) = repl.pattern.replace_all(&result, replacer) {
514 result = new_result.into();
515 }
516 }
519
520 content.rendered = result.into();
521}
522
523struct CharacterReplacement {
524 type_: CharacterReplacementType,
525 pattern: Regex,
526}
527
528static REPLACEABLE_TEXT_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
529 #[allow(clippy::unwrap_used)]
530 Regex::new(r#"[&']|--|\.\.\.|\([CRT]M?\)"#).unwrap()
531});
532
533static REPLACEMENTS: LazyLock<Vec<CharacterReplacement>> = LazyLock::new(|| {
539 vec![
540 CharacterReplacement {
541 type_: CharacterReplacementType::Copyright,
543 #[allow(clippy::unwrap_used)]
544 pattern: Regex::new(r#"\\?\(C\)"#).unwrap(),
545 },
546 CharacterReplacement {
547 type_: CharacterReplacementType::Registered,
549 #[allow(clippy::unwrap_used)]
550 pattern: Regex::new(r#"\\?\(R\)"#).unwrap(),
551 },
552 CharacterReplacement {
553 type_: CharacterReplacementType::Trademark,
555 #[allow(clippy::unwrap_used)]
556 pattern: Regex::new(r#"\\?\(TM\)"#).unwrap(),
557 },
558 CharacterReplacement {
559 type_: CharacterReplacementType::EmDashSurroundedBySpaces,
561 #[allow(clippy::unwrap_used)]
562 pattern: Regex::new(r#"(?: |\n|^|\\)--(?: |\n|$)"#).unwrap(),
563 },
564 CharacterReplacement {
565 type_: CharacterReplacementType::EmDashWithoutSpace,
567 #[allow(clippy::unwrap_used)]
568 pattern: Regex::new(r#"(\w)\\?--\b{start-half}"#).unwrap(),
569 },
570 CharacterReplacement {
571 type_: CharacterReplacementType::Ellipsis,
573 #[allow(clippy::unwrap_used)]
574 pattern: Regex::new(r#"\\?\.\.\."#).unwrap(),
575 },
576 CharacterReplacement {
577 type_: CharacterReplacementType::TypographicApostrophe,
579 #[allow(clippy::unwrap_used)]
580 pattern: Regex::new(r#"\\?`'"#).unwrap(),
581 },
582 CharacterReplacement {
583 type_: CharacterReplacementType::TypographicApostrophe,
585 #[allow(clippy::unwrap_used)]
586 pattern: Regex::new(r#"([[:alnum:]])\\?'([[:alpha:]])"#).unwrap(),
587 },
588 CharacterReplacement {
589 type_: CharacterReplacementType::SingleRightArrow,
591 #[allow(clippy::unwrap_used)]
592 pattern: Regex::new(r#"\\?->"#).unwrap(),
593 },
594 CharacterReplacement {
595 type_: CharacterReplacementType::DoubleRightArrow,
597 #[allow(clippy::unwrap_used)]
598 pattern: Regex::new(r#"\\?=>"#).unwrap(),
599 },
600 CharacterReplacement {
601 type_: CharacterReplacementType::SingleLeftArrow,
603 #[allow(clippy::unwrap_used)]
604 pattern: Regex::new(r#"\\?<-"#).unwrap(),
605 },
606 CharacterReplacement {
607 type_: CharacterReplacementType::DoubleLeftArrow,
609 #[allow(clippy::unwrap_used)]
610 pattern: Regex::new(r#"\\?<="#).unwrap(),
611 },
612 CharacterReplacement {
613 type_: CharacterReplacementType::CharacterReference("".to_owned()),
615 #[allow(clippy::unwrap_used)]
616 pattern: Regex::new(r#"\\?&((?:[a-zA-Z][a-zA-Z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-fA-F][\da-fA-F][\da-fA-F]{0,3}));"#).unwrap(),
617 },
618 ]
619});
620
621#[derive(Debug)]
622struct CharacterReplacer<'r> {
623 type_: CharacterReplacementType,
624 renderer: &'r dyn InlineSubstitutionRenderer,
625}
626
627impl Replacer for CharacterReplacer<'_> {
628 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
629 if caps[0].contains('\\') {
630 let unescaped = &caps[0].replace("\\", "");
632 dest.push_str(unescaped);
633 return;
634 }
635
636 match self.type_ {
637 CharacterReplacementType::Copyright
638 | CharacterReplacementType::Registered
639 | CharacterReplacementType::Trademark
640 | CharacterReplacementType::EmDashSurroundedBySpaces
641 | CharacterReplacementType::Ellipsis
642 | CharacterReplacementType::SingleLeftArrow
643 | CharacterReplacementType::DoubleLeftArrow
644 | CharacterReplacementType::SingleRightArrow
645 | CharacterReplacementType::DoubleRightArrow => {
646 self.renderer
647 .render_character_replacement(self.type_.clone(), dest);
648 }
649
650 CharacterReplacementType::EmDashWithoutSpace => {
651 dest.push_str(&caps[1]);
652 self.renderer.render_character_replacement(
653 CharacterReplacementType::EmDashWithoutSpace,
654 dest,
655 );
656 }
657
658 CharacterReplacementType::TypographicApostrophe => {
659 if let Some(before) = caps.get(1) {
660 dest.push_str(before.as_str());
661 }
662
663 self.renderer.render_character_replacement(
664 CharacterReplacementType::TypographicApostrophe,
665 dest,
666 );
667
668 if let Some(after) = caps.get(2) {
669 dest.push_str(after.as_str());
670 }
671 }
672
673 CharacterReplacementType::CharacterReference(_) => {
674 self.renderer.render_character_replacement(
675 CharacterReplacementType::CharacterReference(caps[1].to_string()),
676 dest,
677 );
678 }
679 }
680 }
681}
682
683fn apply_post_replacements(
684 content: &mut Content<'_>,
685 parser: &Parser,
686 attrlist: Option<&Attrlist<'_>>,
687) {
688 if attrlist.is_some_and(|attrlist| attrlist.has_option("hardbreaks")) {
691 let text = content.rendered.as_ref();
692 if !text.contains('\n') {
693 return;
694 }
695
696 let mut lines: Vec<&str> = content.rendered.as_ref().lines().collect();
697 let last = lines.pop().unwrap_or_default();
698
699 let mut lines: Vec<String> = lines
700 .iter()
701 .map(|line| {
702 let line = if line.ends_with(" +") {
703 &line[0..line.len() - 2]
704 } else {
705 *line
706 };
707
708 let mut line = line.to_owned();
709 parser.renderer.render_line_break(&mut line);
710 line
711 })
712 .collect();
713
714 lines.push(last.to_owned());
715
716 let new_result = lines.join("\n");
717 content.rendered = new_result.into();
718 } else {
719 let rendered = content.rendered.as_ref();
720 if !(rendered.contains('+') && rendered.contains('\n')) {
721 return;
722 }
723
724 let replacer = PostReplacementReplacer(parser.renderer);
725
726 if let Cow::Owned(new_result) = HARD_LINE_BREAK.replace_all(rendered, replacer) {
727 content.rendered = new_result.into();
728 }
729 }
730}
731
732#[derive(Debug)]
733struct PostReplacementReplacer<'r>(&'r dyn InlineSubstitutionRenderer);
734
735impl Replacer for PostReplacementReplacer<'_> {
736 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
737 dest.push_str(&caps[1]);
738 self.0.render_line_break(dest);
739 }
740}
741
742static HARD_LINE_BREAK: LazyLock<Regex> = LazyLock::new(|| {
743 #[allow(clippy::unwrap_used)]
744 Regex::new(r#"(?m)^(.*) \+$"#).unwrap()
745});