asciidoc_parser/parser/
inline_substitution_renderer.rs

1use std::{fmt::Debug, sync::LazyLock};
2
3use regex::Regex;
4
5use crate::{Parser, attributes::Attrlist};
6
7/// An implementation of `InlineSubstitutionRenderer` is used when converting
8/// the basic raw text of a simple block to the format which will ultimately be
9/// presented in the final converted output.
10///
11/// An implementation is provided for HTML output; alternative implementations
12/// (not provided in this crate) could support other output formats.
13pub trait InlineSubstitutionRenderer: Debug {
14    /// Renders the substitution for a special character.
15    ///
16    /// The renderer should write the appropriate rendering to `dest`.
17    fn render_special_character(&self, type_: SpecialCharacter, dest: &mut String);
18
19    /// Renders the content of a [quote substitution].
20    ///
21    /// The renderer should write the appropriate rendering to `dest`.
22    ///
23    /// [quote substitution]: https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/
24    fn render_quoted_substitition(
25        &self,
26        type_: QuoteType,
27        scope: QuoteScope,
28        attrlist: Option<Attrlist<'_>>,
29        id: Option<String>,
30        body: &str,
31        dest: &mut String,
32    );
33
34    /// Renders the content of a [character replacement].
35    ///
36    /// The renderer should write the appropriate rendering to `dest`.
37    ///
38    /// [character replacement]: https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/
39    fn render_character_replacement(&self, type_: CharacterReplacementType, dest: &mut String);
40
41    /// Renders a line break.
42    ///
43    /// The renderer should write an appropriate rendering of line break to
44    /// `dest`.
45    ///
46    /// This is used in the implementation of [post-replacement substitutions].
47    ///
48    /// [post-replacement substitutions]: https://docs.asciidoctor.org/asciidoc/latest/subs/post-replacements/
49    fn render_line_break(&self, dest: &mut String);
50
51    /// Renders an image.
52    ///
53    /// The renderer should write an appropriate rendering of the specified
54    /// image to `dest`.
55    fn render_image(&self, params: &ImageRenderParams, dest: &mut String);
56
57    /// Construct a URI reference or data URI to the target image.
58    ///
59    /// If the `target_image_path` is a URI reference, then leave it untouched.
60    ///
61    /// The `target_image_path` is resolved relative to the directory retrieved
62    /// from the specified document-scoped attribute key, if provided.
63    ///
64    /// NOT YET IMPLEMENTED:
65    /// If the `data-uri` attribute is set on the document, and the safe mode
66    /// level is less than `SafeMode::SECURE`, the image will be safely
67    /// converted to a data URI by reading it from the same directory. If
68    /// neither of these conditions are satisfied, a relative path (i.e., URL)
69    /// will be returned.
70    ///
71    /// ## Parameters
72    ///
73    /// * `target_image_path`: path to the target image
74    /// * `parser`: Current document parser state
75    /// * `asset_dir_key`: If provided, the attribute key used to look up the
76    ///   directory where the image is located. If not provided, `imagesdir` is
77    ///   used.
78    ///
79    /// ## Return
80    ///
81    /// Returns a string reference or data URI for the target image that can be
82    /// safely used in an image tag.
83    fn image_uri(
84        &self,
85        target_image_path: &str,
86        parser: &Parser,
87        asset_dir_key: Option<&str>,
88    ) -> String;
89
90    /// Renders an icon.
91    ///
92    /// The renderer should write an appropriate rendering of the specified
93    /// icon to `dest`.
94    fn render_icon(&self, params: &IconRenderParams, dest: &mut String);
95
96    /// Construct a reference or data URI to an icon image for the specified
97    /// icon name.
98    ///
99    /// If the `icon` attribute is set on this block, the name is ignored and
100    /// the value of this attribute is used as the target image path. Otherwise,
101    /// construct a target image path by concatenating the value of the
102    /// `iconsdir` attribute, the icon name, and the value of the `icontype`
103    /// attribute (defaulting to `png`).
104    ///
105    /// The target image path is then passed through the `image_uri()` method.
106    /// If the `data-uri` attribute is set on the document, the image will be
107    /// safely converted to a data URI.
108    ///
109    /// The return value of this method can be safely used in an image tag.
110    fn icon_uri(&self, name: &str, _attrlist: &Attrlist, parser: &Parser) -> String {
111        let icontype = parser
112            .attribute_value("icontype")
113            .as_maybe_str()
114            .unwrap_or("png")
115            .to_owned();
116
117        if false {
118            todo!(
119                "Enable this when doing block-related icon attributes: {}",
120                r#"
121                let icon = if let Some(icon) = attrlist.named_attribute("icon") {
122                    let icon_str = icon.value();
123                    if has_extname(icon_str) {
124                        icon_str.to_string()
125                    } else {
126                        format!("{icon_str}.{icontype}")
127                    }
128                } else {
129                    // This part is defaulted for now.
130                    format!("{name}.{icontype}")
131                };
132            "#
133            );
134        }
135
136        let icon = format!("{name}.{icontype}");
137
138        self.image_uri(&icon, parser, Some("iconsdir"))
139    }
140
141    /// Renders a link.
142    ///
143    /// The renderer should write an appropriate rendering of the specified
144    /// link, to `dest`.
145    fn render_link(&self, params: &LinkRenderParams, dest: &mut String);
146}
147
148/// Specifies which special character is being replaced in a call to
149/// [`InlineSubstitutionRenderer::render_special_character`].
150#[derive(Clone, Copy, Debug, Eq, PartialEq)]
151pub enum SpecialCharacter {
152    /// Replace `<` character.
153    Lt,
154
155    /// Replace `>` character.
156    Gt,
157
158    /// Replace `&` character.
159    Ampersand,
160}
161
162/// Specifies which [quote type] is being rendered.
163///
164/// [quote type]: https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/
165#[derive(Clone, Copy, Debug, Eq, PartialEq)]
166pub enum QuoteType {
167    /// Strong (often bold) formatting.
168    Strong,
169
170    /// Word(s) surrounded by smart double quotes.
171    DoubleQuote,
172
173    /// Word(s) surrounded by smart single quotes.
174    SingleQuote,
175
176    /// Monospace (code) formatting.
177    Monospaced,
178
179    /// Emphasis (often italic) formatting.
180    Emphasis,
181
182    /// Text range (span) formatted with zero or more styles.
183    Mark,
184
185    /// Superscript formatting.
186    Superscript,
187
188    /// Subscript formatting.
189    Subscript,
190
191    /// Surrounds a block of text that may need a `<span>` or similar tag.
192    Unquoted,
193}
194
195/// Specifies whether the block is aligned to word boundaries or not.
196#[derive(Clone, Copy, Debug, Eq, PartialEq)]
197pub enum QuoteScope {
198    /// The quoted section was aligned to word boundaries.
199    Constrained,
200
201    /// The quoted section may not have been aligned to word boundaries.
202    Unconstrained,
203}
204
205/// Specifies which [character replacement] is being rendered.
206///
207/// [character replacement]: https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/
208#[derive(Clone, Debug, Eq, PartialEq)]
209pub enum CharacterReplacementType {
210    /// Copyright `(C)`.
211    Copyright,
212
213    /// Registered `(R)`.
214    Registered,
215
216    /// Trademark `(TM)`.
217    Trademark,
218
219    /// Em-dash surrounded by spaces ` -- `.
220    EmDashSurroundedBySpaces,
221
222    /// Em-dash without space `--`.
223    EmDashWithoutSpace,
224
225    /// Ellipsis `...`.
226    Ellipsis,
227
228    /// Single right arrow `->`.
229    SingleRightArrow,
230
231    /// Double right arrow `=>`.
232    DoubleRightArrow,
233
234    /// Single left arrow `<-`.
235    SingleLeftArrow,
236
237    /// Double left arrow `<=`.
238    DoubleLeftArrow,
239
240    /// Typographic apostrophe `'` within a word.
241    TypographicApostrophe,
242
243    /// Character reference `&___;`.
244    CharacterReference(String),
245}
246
247/// Provides parsed parameters for an image to be rendered.
248#[derive(Clone, Debug)]
249pub struct ImageRenderParams<'a> {
250    /// Target (the reference to the image).
251    pub target: &'a str,
252
253    /// Alt text (either explicitly set or defaulted).
254    pub alt: String,
255
256    /// Width. The data type is not checked; this may be any string.
257    pub width: Option<&'a str>,
258
259    /// Height. The data type is not checked; this may be any string.
260    pub height: Option<&'a str>,
261
262    /// Attribute list.
263    pub attrlist: &'a Attrlist<'a>,
264
265    /// Parser. The rendered may find document settings (such as an image
266    /// directory) in the parser's document attributes.
267    pub parser: &'a Parser<'a>,
268}
269
270/// Provides parsed parameters for an icon to be rendered.
271#[derive(Clone, Debug)]
272pub struct IconRenderParams<'a> {
273    /// Target (the reference to the image).
274    pub target: &'a str,
275
276    /// Alt text (either explicitly set or defaulted).
277    pub alt: String,
278
279    /// Size. The data type is not checked; this may be any string.
280    pub size: Option<&'a str>,
281
282    /// Attribute list.
283    pub attrlist: &'a Attrlist<'a>,
284
285    /// Parser. The rendered may find document settings (such as an image
286    /// directory) in the parser's document attributes.
287    pub parser: &'a Parser<'a>,
288}
289
290/// Provides parsed parameters for an icon to be rendered.
291#[derive(Clone, Debug)]
292pub struct LinkRenderParams<'a> {
293    /// Target (the target of this link).
294    pub target: String,
295
296    /// Link text.
297    pub link_text: String,
298
299    /// Roles (CSS classes) for this link not specified in the attrlist.
300    pub extra_roles: Vec<&'a str>,
301
302    /// Target window selection (passed through to `window` function in HTML).
303    pub window: Option<&'static str>,
304
305    /// What type of link is being rendered?
306    pub type_: LinkRenderType,
307
308    /// Attribute list.
309    pub attrlist: &'a Attrlist<'a>,
310
311    /// Parser. The rendered may find document settings (such as an image
312    /// directory) in the parser's document attributes.
313    pub parser: &'a Parser<'a>,
314}
315
316/// What type of link is being rendered?
317#[derive(Clone, Debug)]
318pub enum LinkRenderType {
319    /// TEMPORARY: I don't know the different types of links yet.
320    Link,
321}
322
323/// Implementation of [`InlineSubstitutionRenderer`] that renders substitutions
324/// for common HTML-based applications.
325#[derive(Debug)]
326pub struct HtmlSubstitutionRenderer {}
327
328impl InlineSubstitutionRenderer for HtmlSubstitutionRenderer {
329    fn render_special_character(&self, type_: SpecialCharacter, dest: &mut String) {
330        match type_ {
331            SpecialCharacter::Lt => {
332                dest.push_str("&lt;");
333            }
334            SpecialCharacter::Gt => {
335                dest.push_str("&gt;");
336            }
337            SpecialCharacter::Ampersand => {
338                dest.push_str("&amp;");
339            }
340        }
341    }
342
343    fn render_quoted_substitition(
344        &self,
345        type_: QuoteType,
346        _scope: QuoteScope,
347        attrlist: Option<Attrlist<'_>>,
348        mut id: Option<String>,
349        body: &str,
350        dest: &mut String,
351    ) {
352        let mut roles: Vec<&str> = attrlist.as_ref().map(|a| a.roles()).unwrap_or_default();
353
354        if let Some(block_style) = attrlist
355            .as_ref()
356            .and_then(|a| a.nth_attribute(1))
357            .and_then(|attr1| attr1.block_style())
358        {
359            roles.insert(0, block_style);
360        }
361
362        if id.is_none() {
363            id = attrlist
364                .as_ref()
365                .and_then(|a| a.nth_attribute(1))
366                .and_then(|attr1| attr1.id())
367                .map(|id| id.to_owned())
368        }
369
370        match type_ {
371            QuoteType::Strong => {
372                wrap_body_in_html_tag(attrlist.as_ref(), "strong", id, roles, body, dest);
373            }
374
375            QuoteType::DoubleQuote => {
376                dest.push_str("&#8220;");
377                dest.push_str(body);
378                dest.push_str("&#8221;");
379            }
380
381            QuoteType::SingleQuote => {
382                dest.push_str("&#8216;");
383                dest.push_str(body);
384                dest.push_str("&#8217;");
385            }
386
387            QuoteType::Monospaced => {
388                wrap_body_in_html_tag(attrlist.as_ref(), "code", id, roles, body, dest);
389            }
390
391            QuoteType::Emphasis => {
392                wrap_body_in_html_tag(attrlist.as_ref(), "em", id, roles, body, dest);
393            }
394
395            QuoteType::Mark => {
396                if roles.is_empty() && id.is_none() {
397                    wrap_body_in_html_tag(attrlist.as_ref(), "mark", id, roles, body, dest);
398                } else {
399                    wrap_body_in_html_tag(attrlist.as_ref(), "span", id, roles, body, dest);
400                }
401            }
402
403            QuoteType::Superscript => {
404                wrap_body_in_html_tag(attrlist.as_ref(), "sup", id, roles, body, dest);
405            }
406
407            QuoteType::Subscript => {
408                wrap_body_in_html_tag(attrlist.as_ref(), "sub", id, roles, body, dest);
409            }
410
411            QuoteType::Unquoted => {
412                if roles.is_empty() && id.is_none() {
413                    dest.push_str(body);
414                } else {
415                    wrap_body_in_html_tag(attrlist.as_ref(), "span", id, roles, body, dest);
416                }
417            }
418        }
419    }
420
421    fn render_character_replacement(&self, type_: CharacterReplacementType, dest: &mut String) {
422        match type_ {
423            CharacterReplacementType::Copyright => {
424                dest.push_str("&#169;");
425            }
426
427            CharacterReplacementType::Registered => {
428                dest.push_str("&#174;");
429            }
430
431            CharacterReplacementType::Trademark => {
432                dest.push_str("&#8482;");
433            }
434
435            CharacterReplacementType::EmDashSurroundedBySpaces => {
436                dest.push_str("&#8201;&#8212;&#8201;");
437            }
438
439            CharacterReplacementType::EmDashWithoutSpace => {
440                dest.push_str("&#8212;&#8203;");
441            }
442
443            CharacterReplacementType::Ellipsis => {
444                dest.push_str("&#8230;&#8203;");
445            }
446
447            CharacterReplacementType::SingleLeftArrow => {
448                dest.push_str("&#8592;");
449            }
450
451            CharacterReplacementType::DoubleLeftArrow => {
452                dest.push_str("&#8656;");
453            }
454
455            CharacterReplacementType::SingleRightArrow => {
456                dest.push_str("&#8594;");
457            }
458
459            CharacterReplacementType::DoubleRightArrow => {
460                dest.push_str("&#8658;");
461            }
462
463            CharacterReplacementType::TypographicApostrophe => {
464                dest.push_str("&#8217;");
465            }
466
467            CharacterReplacementType::CharacterReference(name) => {
468                dest.push('&');
469                dest.push_str(&name);
470                dest.push(';');
471            }
472        }
473    }
474
475    fn render_line_break(&self, dest: &mut String) {
476        dest.push_str("<br>");
477    }
478
479    fn render_image(&self, params: &ImageRenderParams, dest: &mut String) {
480        let src = self.image_uri(params.target, params.parser, None);
481
482        let mut attrs: Vec<String> = vec![
483            format!(r#"src="{src}""#),
484            format!(
485                r#"alt="{alt}""#,
486                alt = encode_attribute_value(params.alt.to_string())
487            ),
488        ];
489
490        if let Some(width) = params.width {
491            attrs.push(format!(r#"width="{width}""#));
492        }
493
494        if let Some(height) = params.height {
495            attrs.push(format!(r#"height="{height}""#));
496        }
497
498        if let Some(title) = params.attrlist.named_attribute("title") {
499            attrs.push(format!(
500                r#"title="{title}""#,
501                title = encode_attribute_value(title.value().to_owned())
502            ));
503        }
504
505        let format = params
506            .attrlist
507            .named_attribute("format")
508            .map(|format| format.value());
509
510        // TO DO (https://github.com/scouten/asciidoc-parser/issues/277):
511        // Enforce non-safe mode. Add this contraint to following `if` clause:
512        // `&& node.document.safe < SafeMode::SECURE`
513
514        let img = if format == Some("svg") || params.target.contains(".svg") {
515            // NOTE: In the SVG case we may have to ignore the attrs list.
516            if params.attrlist.has_option("inline") {
517                todo!(
518                    "Port this: {}",
519                    r#"img = (read_svg_contents node, target) || %(<span class="alt">#{node.alt}</span>)
520                    NOTE: The attrs list calculated above may not be usable.
521                    "#
522                );
523            } else if params.attrlist.has_option("interactive") {
524                todo!(
525                    "Port this: {}",
526                    r##"
527                        fallback = (node.attr? 'fallback') ? %(<img src="#{node.image_uri node.attr 'fallback'}" alt="#{encode_attribute_value node.alt}"#{attrs}#{@void_element_slash}>) : %(<span class="alt">#{node.alt}</span>)
528                        img = %(<object type="image/svg+xml" data="#{src = node.image_uri target}"#{attrs}>#{fallback}</object>)
529                        NOTE: The attrs list calculated above may not be usable.
530                    "##
531                );
532            } else {
533                format!(
534                    r#"<img {attrs}{void_element_slash}>"#,
535                    attrs = attrs.join(" "),
536                    void_element_slash = "",
537                )
538            }
539        } else {
540            format!(
541                r#"<img {attrs}{void_element_slash}>"#,
542                attrs = attrs.join(" "),
543                void_element_slash = "",
544                // img = %(<img src="#{src = node.image_uri target}"
545                // alt="#{encode_attribute_value node.alt}"#{attrs}#{@
546                // void_element_slash}>)
547            )
548        };
549
550        render_icon_or_image(params.attrlist, &img, &src, "image", dest);
551    }
552
553    fn image_uri(
554        &self,
555        target_image_path: &str,
556        parser: &Parser,
557        asset_dir_key: Option<&str>,
558    ) -> String {
559        let asset_dir_key = asset_dir_key.unwrap_or("imagesdir");
560
561        if false {
562            todo!(
563                // TO DO (https://github.com/scouten/asciidoc-parser/issues/277):
564                "Port this when implementing safe modes: {}",
565                r#"
566				if (doc = @document).safe < SafeMode::SECURE && (doc.attr? 'data-uri')
567				  if ((Helpers.uriish? target_image) && (target_image = Helpers.encode_spaces_in_uri target_image)) ||
568					  (asset_dir_key && (images_base = doc.attr asset_dir_key) && (Helpers.uriish? images_base) &&
569					  (target_image = normalize_web_path target_image, images_base, false))
570					(doc.attr? 'allow-uri-read') ? (generate_data_uri_from_uri target_image, (doc.attr? 'cache-uri')) : target_image
571				  else
572					generate_data_uri target_image, asset_dir_key
573				  end
574				else
575				  normalize_web_path target_image, (asset_dir_key ? (doc.attr asset_dir_key) : nil)
576				end
577            "#
578            );
579        } else {
580            let asset_dir = parser
581                .attribute_value(asset_dir_key)
582                .as_maybe_str()
583                .map(|s| s.to_string());
584
585            normalize_web_path(target_image_path, parser, asset_dir.as_deref(), true)
586        }
587    }
588
589    fn render_icon(&self, params: &IconRenderParams, dest: &mut String) {
590        let src = self.icon_uri(params.target, params.attrlist, params.parser);
591
592        let img = if params.parser.has_attribute("icons") {
593            let icons = params.parser.attribute_value("icons");
594            if let Some(icons) = icons.as_maybe_str()
595                && icons == "font"
596            {
597                let mut i_class_attrs: Vec<String> = vec![
598                    "fa".to_owned(),
599                    format!("fa-{target}", target = params.target),
600                ];
601
602                if let Some(size) = params.attrlist.named_or_positional_attribute("size", 1) {
603                    i_class_attrs.push(format!("fa-{size}", size = size.value()));
604                }
605
606                if let Some(flip) = params.attrlist.named_attribute("flip") {
607                    i_class_attrs.push(format!("fa-flip-{flip}", flip = flip.value()));
608                } else if let Some(rotate) = params.attrlist.named_attribute("rotate") {
609                    i_class_attrs.push(format!("fa-rotate-{rotate}", rotate = rotate.value()));
610                }
611
612                format!(
613                    r##"<i class="{i_class_attr_val}"{title_attr}></i>"##,
614                    i_class_attr_val = i_class_attrs.join(" "),
615                    title_attr = if let Some(title) = params.attrlist.named_attribute("title") {
616                        format!(r#" title="{title}""#, title = title.value())
617                    } else {
618                        "".to_owned()
619                    }
620                )
621            } else {
622                let mut attrs: Vec<String> = vec![
623                    format!(r#"src="{src}""#),
624                    format!(
625                        r#"alt="{alt}""#,
626                        alt = encode_attribute_value(params.alt.to_string())
627                    ),
628                ];
629
630                if let Some(width) = params.attrlist.named_attribute("width") {
631                    attrs.push(format!(r#"width="{width}""#, width = width.value()));
632                }
633
634                if let Some(height) = params.attrlist.named_attribute("height") {
635                    attrs.push(format!(r#"height="{height}""#, height = height.value()));
636                }
637
638                if let Some(title) = params.attrlist.named_attribute("title") {
639                    attrs.push(format!(r#"title="{title}""#, title = title.value()));
640                }
641
642                format!(
643                    "<img {attrs}{void_element_slash}>",
644                    attrs = attrs.join(" "),
645                    void_element_slash = "",
646                )
647            }
648        } else {
649            format!("[{alt}&#93;", alt = params.alt)
650        };
651
652        render_icon_or_image(params.attrlist, &img, &src, "icon", dest);
653    }
654
655    fn render_link(&self, params: &LinkRenderParams, dest: &mut String) {
656        let id = params.attrlist.id();
657
658        let mut roles = params.extra_roles.clone();
659        let mut attrlist_roles = params.attrlist.roles().clone();
660        roles.append(&mut attrlist_roles);
661
662        let link = format!(
663            r##"<a href="{target}"{id}{class}{link_constraint_attrs}>{link_text}</a>"##,
664            target = params.target,
665            id = if let Some(id) = id {
666                format!(r#" id="{id}""#)
667            } else {
668                "".to_owned()
669            },
670            class = if roles.is_empty() {
671                "".to_owned()
672            } else {
673                format!(r#" class="{roles}""#, roles = roles.join(" "))
674            },
675            // title = %( title="#{node.attr 'title'}") if node.attr? 'title'
676            // Haven't seen this in the wild yet.
677            link_constraint_attrs = link_constraint_attrs(params.attrlist, params.window),
678            link_text = params.link_text,
679        );
680
681        dest.push_str(&link);
682    }
683}
684
685fn wrap_body_in_html_tag(
686    _attrlist: Option<&Attrlist<'_>>,
687    tag: &'static str,
688    id: Option<String>,
689    roles: Vec<&str>,
690    body: &str,
691    dest: &mut String,
692) {
693    dest.push('<');
694    dest.push_str(tag);
695
696    if let Some(id) = id.as_ref() {
697        dest.push_str(" id=\"");
698        dest.push_str(id);
699        dest.push('"');
700    }
701
702    if !roles.is_empty() {
703        let roles = roles.join(" ");
704        dest.push_str(" class=\"");
705        dest.push_str(&roles);
706        dest.push('"');
707    }
708
709    dest.push('>');
710    dest.push_str(body);
711    dest.push_str("</");
712    dest.push_str(tag);
713    dest.push('>');
714}
715
716fn render_icon_or_image(
717    attrlist: &Attrlist,
718    img: &str,
719    src: &str,
720    type_: &'static str,
721    dest: &mut String,
722) {
723    let mut img = img.to_string();
724
725    if let Some(link) = attrlist.named_attribute("link") {
726        let mut link = link.value();
727        if link == "self" {
728            link = src;
729        }
730
731        img = format!(
732            r#"<a class="image" href="{link}"{link_constraint_attrs}>{img}</a>"#,
733            link_constraint_attrs = link_constraint_attrs(attrlist, None)
734        );
735    }
736
737    let mut roles: Vec<&str> = attrlist.roles();
738
739    if let Some(float) = attrlist.named_attribute("float") {
740        roles.insert(0, float.value());
741    }
742
743    roles.insert(0, type_);
744
745    dest.push_str(r#"<span class=""#);
746    dest.push_str(&roles.join(" "));
747    dest.push_str(r#"">"#);
748    dest.push_str(&img);
749    dest.push_str("</span>");
750}
751
752fn encode_attribute_value(value: String) -> String {
753    value.replace('"', "&quot;")
754}
755
756fn normalize_web_path(
757    target: &str,
758    parser: &Parser,
759    start: Option<&str>,
760    preserve_uri_target: bool,
761) -> String {
762    if preserve_uri_target && is_uri_ish(target) {
763        encode_spaces_in_uri(target)
764    } else {
765        parser.path_resolver.web_path(target, start)
766    }
767}
768
769fn is_uri_ish(path: &str) -> bool {
770    path.contains(':') && URI_SNIFF.is_match(path)
771}
772
773fn encode_spaces_in_uri(s: &str) -> String {
774    s.replace(' ', "%20")
775}
776
777/// Detects strings that resemble URIs.
778///
779/// ## Examples
780///
781/// * `http://domain`
782/// * `https://domain`
783/// * `file:///path`
784/// * `data:info`
785///
786/// ## Counter-examples (do not match)
787///
788/// * `c:/sample.adoc`
789/// * `c:\sample.adoc`
790static URI_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
791    #[allow(clippy::unwrap_used)]
792    Regex::new(
793        r#"(?x)
794        \A                             # Anchor to start of string
795        \p{Alphabetic}                 # First character must be a letter
796        [\p{Alphabetic}\p{Nd}.+-]+     # Followed by one or more alphanum or . + -
797        :                              # Literal colon
798        /{0,2}                         # Zero to two slashes
799    "#,
800    )
801    .unwrap()
802});
803
804fn link_constraint_attrs(attrlist: &Attrlist<'_>, window: Option<&'static str>) -> String {
805    let rel = if attrlist.has_option("nofollow") {
806        Some("nofollow")
807    } else {
808        None
809    };
810
811    if let Some(window) = attrlist
812        .named_attribute("window")
813        .map(|a| a.value())
814        .or(window)
815    {
816        let rel_noopener = if window == "_blank" || attrlist.has_option("noopener") {
817            if let Some(rel) = rel {
818                format!(r#" rel="{rel}" noopener"#)
819            } else {
820                r#" rel="noopener""#.to_owned()
821            }
822        } else {
823            "".to_string()
824        };
825
826        format!(r#" target="{window}"{rel_noopener}"#)
827    } else if let Some(rel) = rel {
828        format!(r#" rel="{rel}""#)
829    } else {
830        "".to_string()
831    }
832}