1use std::{borrow::Cow, sync::LazyLock};
2
3use regex::{Captures, Regex, RegexBuilder, Replacer};
4
5use crate::{
6 Parser,
7 attributes::{Attrlist, AttrlistContext},
8 content::Content,
9 document::InterpretedValue,
10 internal::{LookaheadReplacer, LookaheadResult, replace_with_lookahead},
11 parser::{
12 CharacterReplacementType, InlineSubstitutionRenderer, QuoteScope, QuoteType,
13 SpecialCharacter,
14 },
15};
16
17#[derive(Clone, Copy, Debug, Eq, PartialEq)]
23pub enum SubstitutionStep {
24 SpecialCharacters,
27
28 Quotes,
30
31 AttributeReferences,
33
34 CharacterReplacements,
38
39 Macros,
42
43 PostReplacement,
45
46 Callouts,
48}
49
50impl SubstitutionStep {
51 pub(crate) fn apply(
52 &self,
53 content: &mut Content<'_>,
54 parser: &Parser,
55 attrlist: Option<&Attrlist<'_>>,
56 ) {
57 match self {
58 Self::SpecialCharacters => {
59 apply_special_characters(content, parser.renderer);
60 }
61 Self::Quotes => {
62 apply_quotes(content, parser);
63 }
64 Self::AttributeReferences => {
65 apply_attributes(content, parser);
66 }
67 Self::CharacterReplacements => {
68 apply_character_replacements(content, parser.renderer);
69 }
70 Self::Macros => {
71 super::macros::apply_macros(content, parser);
72 }
73 Self::PostReplacement => {
74 apply_post_replacements(content, parser, attrlist);
75 }
76 _ => {
77 todo!("Implement apply for SubstitutionStep::{self:?}");
78 }
79 }
80 }
81}
82
83fn apply_special_characters(content: &mut Content<'_>, renderer: &dyn InlineSubstitutionRenderer) {
84 if !content.rendered.contains(['<', '>', '&']) {
85 return;
86 }
87
88 let mut result: Cow<'_, str> = content.rendered.to_string().into();
89 let replacer = SpecialCharacterReplacer { renderer };
90
91 if let Cow::Owned(new_result) = SPECIAL_CHARS.replace_all(&result, replacer) {
92 result = new_result.into();
93 }
94
95 content.rendered = result.into();
96}
97
98#[derive(Debug)]
99struct SpecialCharacterReplacer<'r> {
100 renderer: &'r dyn InlineSubstitutionRenderer,
101}
102
103impl Replacer for SpecialCharacterReplacer<'_> {
104 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
105 if let Some(which) = match caps[0].as_ref() {
106 "<" => Some(SpecialCharacter::Lt),
107 ">" => Some(SpecialCharacter::Gt),
108 "&" => Some(SpecialCharacter::Ampersand),
109 _ => None,
110 } {
111 self.renderer.render_special_character(which, dest);
112 } else {
113 dest.push_str(caps[0].as_ref());
114 }
115 }
116}
117
118static SPECIAL_CHARS: LazyLock<Regex> = LazyLock::new(|| {
119 #[allow(clippy::unwrap_used)]
120 Regex::new("[<>&]").unwrap()
121});
122
123static QUOTED_TEXT_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
124 #[allow(clippy::unwrap_used)]
125 Regex::new("[*_`#^~]").unwrap()
126});
127
128struct QuoteSub {
129 type_: QuoteType,
130 scope: QuoteScope,
131 pattern: Regex,
132}
133
134static QUOTE_SUBS: LazyLock<Vec<QuoteSub>> = LazyLock::new(|| {
155 vec![
156 QuoteSub {
157 type_: QuoteType::Strong,
159 scope: QuoteScope::Unconstrained,
160 #[allow(clippy::unwrap_used)]
161 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?\*\*(.+?)\*\*"#)
162 .dot_matches_new_line(true)
163 .build()
164 .unwrap(),
165 },
166 QuoteSub {
167 type_: QuoteType::Strong,
169 scope: QuoteScope::Constrained,
170 #[allow(clippy::unwrap_used)]
171 pattern: RegexBuilder::new(
172 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?\*(\S|\S.*?\S)\*\b{end-half}"#,
173 )
174 .dot_matches_new_line(true)
175 .build()
176 .unwrap(),
177 },
178 QuoteSub {
179 type_: QuoteType::DoubleQuote,
181 scope: QuoteScope::Constrained,
182 #[allow(clippy::unwrap_used)]
183 pattern: RegexBuilder::new(
184 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?"`(\S|\S.*?\S)`"\b{end-half}"#,
185 )
186 .dot_matches_new_line(true)
187 .build()
188 .unwrap(),
189 },
190 QuoteSub {
191 type_: QuoteType::SingleQuote,
193 scope: QuoteScope::Constrained,
194 #[allow(clippy::unwrap_used)]
195 pattern: RegexBuilder::new(
196 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?'`(\S|\S.*?\S)`'\b{end-half}"#,
197 )
198 .dot_matches_new_line(true)
199 .build()
200 .unwrap(),
201 },
202 QuoteSub {
203 type_: QuoteType::Monospaced,
205 scope: QuoteScope::Unconstrained,
206 #[allow(clippy::unwrap_used)]
207 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?``(.+?)``"#)
208 .dot_matches_new_line(true)
209 .build()
210 .unwrap(),
211 },
212 QuoteSub {
213 type_: QuoteType::Monospaced,
215 scope: QuoteScope::Constrained,
216 #[allow(clippy::unwrap_used)]
217 pattern: RegexBuilder::new(
218 r#"(^|[^\w&;:"'`}])(?:\[([^\[\]]+)\])?`(\S|\S.*?\S)`\b{end-half}"#,
219 )
223 .dot_matches_new_line(true)
224 .build()
225 .unwrap(),
226 },
227 QuoteSub {
228 type_: QuoteType::Emphasis,
230 scope: QuoteScope::Unconstrained,
231 #[allow(clippy::unwrap_used)]
232 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?__(.+?)__"#)
233 .dot_matches_new_line(true)
234 .build()
235 .unwrap(),
236 },
237 QuoteSub {
238 type_: QuoteType::Emphasis,
240 scope: QuoteScope::Constrained,
241 #[allow(clippy::unwrap_used)]
242 pattern: RegexBuilder::new(
243 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?_(\S|\S.*?\S)_\b{end-half}"#,
244 )
245 .dot_matches_new_line(true)
246 .build()
247 .unwrap(),
248 },
249 QuoteSub {
250 type_: QuoteType::Mark,
252 scope: QuoteScope::Unconstrained,
253 #[allow(clippy::unwrap_used)]
254 pattern: RegexBuilder::new(r#"\\?(?:\[([^\[\]]+)\])?##(.+?)##"#)
255 .dot_matches_new_line(true)
256 .build()
257 .unwrap(),
258 },
259 QuoteSub {
260 type_: QuoteType::Mark,
262 scope: QuoteScope::Constrained,
263 #[allow(clippy::unwrap_used)]
264 pattern: RegexBuilder::new(
265 r#"(^|[^\w&;:}])(?:\[([^\[\]]+)\])?#(\S|\S.*?\S)#\b{end-half}"#,
266 )
267 .dot_matches_new_line(true)
268 .build()
269 .unwrap(),
270 },
271 QuoteSub {
272 type_: QuoteType::Superscript,
274 scope: QuoteScope::Unconstrained,
275 #[allow(clippy::unwrap_used)]
276 pattern: Regex::new(r#"\\?(?:\[([^\[\]]+)\])?\^(\S+?)\^"#).unwrap(),
277 },
278 QuoteSub {
279 type_: QuoteType::Subscript,
281 scope: QuoteScope::Unconstrained,
282 #[allow(clippy::unwrap_used)]
283 pattern: Regex::new(r#"\\?(?:\[([^\[\]]+)\])?~(\S+?)~"#).unwrap(),
284 },
285 ]
286});
287
288#[derive(Debug)]
289struct QuoteReplacer<'r> {
290 type_: QuoteType,
291 scope: QuoteScope,
292 parser: &'r Parser<'r>,
293}
294
295impl LookaheadReplacer for QuoteReplacer<'_> {
296 fn replace_append(
297 &mut self,
298 caps: &Captures<'_>,
299 dest: &mut String,
300 after: &str,
301 ) -> LookaheadResult {
302 if self.type_ == QuoteType::Monospaced
309 && self.scope == QuoteScope::Constrained
310 && after.starts_with(['"', '\'', '`'])
311 {
312 let skip_ahead = if caps[0].starts_with('\\') { 2 } else { 1 };
313 dest.push_str(&caps[0][0..skip_ahead]);
314 return LookaheadResult::SkipAheadAndRetry(skip_ahead);
315 }
316
317 let unescaped_attrs: Option<String> = if caps[0].starts_with('\\') {
318 let maybe_attrs = caps.get(2).map(|a| a.as_str());
319 if self.scope == QuoteScope::Constrained && maybe_attrs.is_some() {
320 Some(format!(
321 "[{attrs}]",
322 attrs = maybe_attrs.unwrap_or_default()
323 ))
324 } else {
325 dest.push_str(&caps[0][1..]);
326 return LookaheadResult::Continue;
327 }
328 } else {
329 None
330 };
331
332 match self.scope {
333 QuoteScope::Constrained => {
334 if let Some(attrs) = unescaped_attrs {
335 dest.push_str(&attrs);
336 self.parser.renderer.render_quoted_substitition(
337 self.type_, self.scope, None, None, &caps[3], dest,
338 );
339 } else {
340 let (attrlist, type_): (Option<Attrlist<'_>>, QuoteType) =
341 if let Some(attrlist) = caps.get(2) {
342 let type_ = if self.type_ == QuoteType::Mark {
343 QuoteType::Unquoted
344 } else {
345 self.type_
346 };
347
348 (
349 Some(
350 Attrlist::parse(
351 crate::Span::new(attrlist.as_str()),
352 self.parser,
353 AttrlistContext::Inline,
354 )
355 .item
356 .item,
357 ),
358 type_,
359 )
360 } else {
361 (None, self.type_)
362 };
363
364 if let Some(prefix) = caps.get(1) {
365 dest.push_str(prefix.as_str());
366 }
367
368 let id = attrlist
369 .as_ref()
370 .and_then(|a| a.id().map(|s| s.to_string()));
371
372 self.parser.renderer.render_quoted_substitition(
373 type_, self.scope, attrlist, id, &caps[3], dest,
374 );
375 }
376 }
377
378 QuoteScope::Unconstrained => {
379 let (attrlist, type_): (Option<Attrlist<'_>>, QuoteType) =
380 if let Some(attrlist) = caps.get(1) {
381 let type_ = if self.type_ == QuoteType::Mark {
382 QuoteType::Unquoted
383 } else {
384 self.type_
385 };
386
387 (
388 Some(
389 Attrlist::parse(
390 crate::Span::new(attrlist.as_str()),
391 self.parser,
392 AttrlistContext::Inline,
393 )
394 .item
395 .item,
396 ),
397 type_,
398 )
399 } else {
400 (None, self.type_)
401 };
402
403 let id = attrlist
404 .as_ref()
405 .and_then(|a| a.id().map(|s| s.to_string()));
406
407 self.parser
408 .renderer
409 .render_quoted_substitition(type_, self.scope, attrlist, id, &caps[2], dest);
410 }
411 }
412
413 LookaheadResult::Continue
414 }
415}
416
417fn apply_quotes(content: &mut Content<'_>, parser: &Parser) {
418 if !QUOTED_TEXT_SNIFF.is_match(content.rendered.as_ref()) {
419 return;
420 }
421
422 let mut result: Cow<'_, str> = content.rendered.to_string().into();
423
424 for sub in &*QUOTE_SUBS {
425 let replacer = QuoteReplacer {
426 type_: sub.type_,
427 scope: sub.scope,
428 parser,
429 };
430
431 if let Cow::Owned(new_result) = replace_with_lookahead(&sub.pattern, &result, replacer) {
432 result = new_result.into();
433 }
434 }
437
438 content.rendered = result.into();
439}
440
441static ATTRIBUTE_REFERENCE: LazyLock<Regex> = LazyLock::new(|| {
442 #[allow(clippy::unwrap_used)]
443 Regex::new(r#"\\?\{([A-Za-z0-9_][A-Za-z0-9_-]*)\}"#).unwrap()
444});
445
446#[derive(Debug)]
447struct AttributeReplacer<'p>(&'p Parser<'p>);
448
449impl Replacer for AttributeReplacer<'_> {
450 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
451 let attr_name = &caps[1];
452
453 if !self.0.has_attribute(attr_name) {
455 dest.push_str(&caps[0]);
456 return;
457 }
458
459 if caps[0].starts_with('\\') {
460 dest.push_str(&caps[0][1..]);
461 return;
462 }
463
464 match self.0.attribute_value(attr_name) {
465 InterpretedValue::Value(value) => {
466 dest.push_str(value.as_ref());
467 }
468 _ => {
469 }
472 }
473 }
474}
475
476fn apply_attributes(content: &mut Content<'_>, parser: &Parser) {
477 if !content.rendered.contains('{') {
478 return;
479 }
480
481 let mut result: Cow<'_, str> = content.rendered.to_string().into();
482
483 if let Cow::Owned(new_result) =
484 ATTRIBUTE_REFERENCE.replace_all(&result, AttributeReplacer(parser))
485 {
486 result = new_result.into();
487 }
488 content.rendered = result.into();
492}
493
494fn apply_character_replacements(
495 content: &mut Content<'_>,
496 renderer: &dyn InlineSubstitutionRenderer,
497) {
498 if !REPLACEABLE_TEXT_SNIFF.is_match(content.rendered.as_ref()) {
499 return;
500 }
501
502 let mut result: Cow<'_, str> = content.rendered.to_string().into();
503
504 for repl in &*REPLACEMENTS {
505 let replacer = CharacterReplacer {
506 type_: repl.type_.clone(),
507 renderer,
508 };
509
510 if let Cow::Owned(new_result) = repl.pattern.replace_all(&result, replacer) {
511 result = new_result.into();
512 }
513 }
516
517 content.rendered = result.into();
518}
519
520struct CharacterReplacement {
521 type_: CharacterReplacementType,
522 pattern: Regex,
523}
524
525static REPLACEABLE_TEXT_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
526 #[allow(clippy::unwrap_used)]
527 Regex::new(r#"[&']|--|\.\.\.|\([CRT]M?\)"#).unwrap()
528});
529
530static REPLACEMENTS: LazyLock<Vec<CharacterReplacement>> = LazyLock::new(|| {
536 vec![
537 CharacterReplacement {
538 type_: CharacterReplacementType::Copyright,
540 #[allow(clippy::unwrap_used)]
541 pattern: Regex::new(r#"\\?\(C\)"#).unwrap(),
542 },
543 CharacterReplacement {
544 type_: CharacterReplacementType::Registered,
546 #[allow(clippy::unwrap_used)]
547 pattern: Regex::new(r#"\\?\(R\)"#).unwrap(),
548 },
549 CharacterReplacement {
550 type_: CharacterReplacementType::Trademark,
552 #[allow(clippy::unwrap_used)]
553 pattern: Regex::new(r#"\\?\(TM\)"#).unwrap(),
554 },
555 CharacterReplacement {
556 type_: CharacterReplacementType::EmDashSurroundedBySpaces,
558 #[allow(clippy::unwrap_used)]
559 pattern: Regex::new(r#"(?: |\n|^|\\)--(?: |\n|$)"#).unwrap(),
560 },
561 CharacterReplacement {
562 type_: CharacterReplacementType::EmDashWithoutSpace,
564 #[allow(clippy::unwrap_used)]
565 pattern: Regex::new(r#"(\w)\\?--\b{start-half}"#).unwrap(),
566 },
567 CharacterReplacement {
568 type_: CharacterReplacementType::Ellipsis,
570 #[allow(clippy::unwrap_used)]
571 pattern: Regex::new(r#"\\?\.\.\."#).unwrap(),
572 },
573 CharacterReplacement {
574 type_: CharacterReplacementType::TypographicApostrophe,
576 #[allow(clippy::unwrap_used)]
577 pattern: Regex::new(r#"\\?`'"#).unwrap(),
578 },
579 CharacterReplacement {
580 type_: CharacterReplacementType::TypographicApostrophe,
582 #[allow(clippy::unwrap_used)]
583 pattern: Regex::new(r#"([[:alnum:]])\\?'([[:alpha:]])"#).unwrap(),
584 },
585 CharacterReplacement {
586 type_: CharacterReplacementType::SingleRightArrow,
588 #[allow(clippy::unwrap_used)]
589 pattern: Regex::new(r#"\\?->"#).unwrap(),
590 },
591 CharacterReplacement {
592 type_: CharacterReplacementType::DoubleRightArrow,
594 #[allow(clippy::unwrap_used)]
595 pattern: Regex::new(r#"\\?=>"#).unwrap(),
596 },
597 CharacterReplacement {
598 type_: CharacterReplacementType::SingleLeftArrow,
600 #[allow(clippy::unwrap_used)]
601 pattern: Regex::new(r#"\\?<-"#).unwrap(),
602 },
603 CharacterReplacement {
604 type_: CharacterReplacementType::DoubleLeftArrow,
606 #[allow(clippy::unwrap_used)]
607 pattern: Regex::new(r#"\\?<="#).unwrap(),
608 },
609 CharacterReplacement {
610 type_: CharacterReplacementType::CharacterReference("".to_owned()),
612 #[allow(clippy::unwrap_used)]
613 pattern: Regex::new(r#"\\?&((?:[a-zA-Z][a-zA-Z]+\d{0,2}|#\d\d\d{0,4}|#x[\da-fA-F][\da-fA-F][\da-fA-F]{0,3}));"#).unwrap(),
614 },
615 ]
616});
617
618#[derive(Debug)]
619struct CharacterReplacer<'r> {
620 type_: CharacterReplacementType,
621 renderer: &'r dyn InlineSubstitutionRenderer,
622}
623
624impl Replacer for CharacterReplacer<'_> {
625 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
626 if caps[0].contains('\\') {
627 let unescaped = &caps[0].replace("\\", "");
629 dest.push_str(unescaped);
630 return;
631 }
632
633 match self.type_ {
634 CharacterReplacementType::Copyright
635 | CharacterReplacementType::Registered
636 | CharacterReplacementType::Trademark
637 | CharacterReplacementType::EmDashSurroundedBySpaces
638 | CharacterReplacementType::Ellipsis
639 | CharacterReplacementType::SingleLeftArrow
640 | CharacterReplacementType::DoubleLeftArrow
641 | CharacterReplacementType::SingleRightArrow
642 | CharacterReplacementType::DoubleRightArrow => {
643 self.renderer
644 .render_character_replacement(self.type_.clone(), dest);
645 }
646
647 CharacterReplacementType::EmDashWithoutSpace => {
648 dest.push_str(&caps[1]);
649 self.renderer.render_character_replacement(
650 CharacterReplacementType::EmDashWithoutSpace,
651 dest,
652 );
653 }
654
655 CharacterReplacementType::TypographicApostrophe => {
656 if let Some(before) = caps.get(1) {
657 dest.push_str(before.as_str());
658 }
659
660 self.renderer.render_character_replacement(
661 CharacterReplacementType::TypographicApostrophe,
662 dest,
663 );
664
665 if let Some(after) = caps.get(2) {
666 dest.push_str(after.as_str());
667 }
668 }
669
670 CharacterReplacementType::CharacterReference(_) => {
671 self.renderer.render_character_replacement(
672 CharacterReplacementType::CharacterReference(caps[1].to_string()),
673 dest,
674 );
675 }
676 }
677 }
678}
679
680fn apply_post_replacements(
681 content: &mut Content<'_>,
682 parser: &Parser,
683 attrlist: Option<&Attrlist<'_>>,
684) {
685 if attrlist.is_some_and(|attrlist| attrlist.has_option("hardbreaks")) {
688 let text = content.rendered.as_ref();
689 if !text.contains('\n') {
690 return;
691 }
692
693 let mut lines: Vec<&str> = content.rendered.as_ref().lines().collect();
694 let last = lines.pop().unwrap_or_default();
695
696 let mut lines: Vec<String> = lines
697 .iter()
698 .map(|line| {
699 let line = if line.ends_with(" +") {
700 &line[0..line.len() - 2]
701 } else {
702 *line
703 };
704
705 let mut line = line.to_owned();
706 parser.renderer.render_line_break(&mut line);
707 line
708 })
709 .collect();
710
711 lines.push(last.to_owned());
712
713 let new_result = lines.join("\n");
714 content.rendered = new_result.into();
715 } else {
716 let rendered = content.rendered.as_ref();
717 if !(rendered.contains('+') && rendered.contains('\n')) {
718 return;
719 }
720
721 let replacer = PostReplacementReplacer(parser.renderer);
722
723 if let Cow::Owned(new_result) = HARD_LINE_BREAK.replace_all(rendered, replacer) {
724 content.rendered = new_result.into();
725 }
726 }
727}
728
729#[derive(Debug)]
730struct PostReplacementReplacer<'r>(&'r dyn InlineSubstitutionRenderer);
731
732impl Replacer for PostReplacementReplacer<'_> {
733 fn replace_append(&mut self, caps: &Captures<'_>, dest: &mut String) {
734 dest.push_str(&caps[1]);
735 self.0.render_line_break(dest);
736 }
737}
738
739static HARD_LINE_BREAK: LazyLock<Regex> = LazyLock::new(|| {
740 #[allow(clippy::unwrap_used)]
741 Regex::new(r#"(?m)^(.*) \+$"#).unwrap()
742});