tpnote_lib/
html.rs

1//! Helper functions dealing with HTML conversion.
2use crate::clone_ext::CloneExt;
3use crate::error::InputStreamError;
4use crate::filename::{NotePath, NotePathStr};
5use crate::{config::LocalLinkKind, error::NoteError};
6use html_escape;
7use parking_lot::RwLock;
8use parse_hyperlinks::parser::Link;
9use parse_hyperlinks_extras::iterator_html::HtmlLinkInlineImage;
10use percent_encoding::percent_decode_str;
11use std::path::MAIN_SEPARATOR_STR;
12use std::{
13    borrow::Cow,
14    collections::HashSet,
15    path::{Component, Path, PathBuf},
16    sync::Arc,
17};
18
19pub(crate) const HTML_EXT: &str = ".html";
20
21/// A local path can carry a format string at the end. This is the separator
22/// character.
23const FORMAT_SEPARATOR: char = '?';
24
25/// If followed directly after FORMAT_SEPARATOR, it selects the sort-tag
26/// for further matching.
27const FORMAT_ONLY_SORT_TAG: char = '#';
28
29/// If followed directly after FORMAT_SEPARATOR, it selects the whole filename
30/// for further matching.
31const FORMAT_COMPLETE_FILENAME: &str = "?";
32
33/// A format string can be separated in a _from_ and _to_ part. This
34/// optional separator is placed after `FORMAT_SEPARATOR` and separates
35/// the _from_ and _to_ pattern.
36const FORMAT_FROM_TO_SEPARATOR: char = ':';
37
38/// If `rewrite_rel_path` and `dest` is relative, concatenate `docdir` and
39/// `dest`, then strip `root_path` from the left before returning.
40/// If not `rewrite_rel_path` and `dest` is relative, return `dest`.
41/// If `rewrite_abs_path` and `dest` is absolute, concatenate and return
42/// `root_path` and `dest`.
43/// If not `rewrite_abs_path` and `dest` is absolute, return `dest`.
44/// The `dest` portion of the output is always canonicalized.
45/// Return the assembled path, when in `root_path`, or `None` otherwise.
46/// Asserts in debug mode, that `doc_dir` is in `root_path`.
47fn assemble_link(
48    root_path: &Path,
49    docdir: &Path,
50    dest: &Path,
51    rewrite_rel_paths: bool,
52    rewrite_abs_paths: bool,
53) -> Option<PathBuf> {
54    ///
55    /// Concatenate `path` and `append`.
56    /// The `append` portion of the output is if possible canonicalized.
57    /// In case of underflow of an absolute link, the returned path is empty.
58    fn append(path: &mut PathBuf, append: &Path) {
59        // Append `dest` to `link` and canonicalize.
60        for dir in append.components() {
61            match dir {
62                Component::ParentDir => {
63                    if !path.pop() {
64                        let path_is_relative = {
65                            let mut c = path.components();
66                            !(c.next() == Some(Component::RootDir)
67                                || c.next() == Some(Component::RootDir))
68                        };
69                        if path_is_relative {
70                            path.push(Component::ParentDir.as_os_str());
71                        } else {
72                            path.clear();
73                            break;
74                        }
75                    }
76                }
77                Component::Normal(c) => path.push(c),
78                _ => {}
79            }
80        }
81    }
82
83    // Under Windows `.is_relative()` does not detect `Component::RootDir`
84    let dest_is_relative = {
85        let mut c = dest.components();
86        !(c.next() == Some(Component::RootDir) || c.next() == Some(Component::RootDir))
87    };
88
89    // Check if the link points into `root_path`, reject otherwise
90    // (strip_prefix will not work).
91    debug_assert!(docdir.starts_with(root_path));
92
93    // Calculate the output.
94    let mut link = match (rewrite_rel_paths, rewrite_abs_paths, dest_is_relative) {
95        // *** Relative links.
96        // Result: "/" + docdir.strip(root_path) + dest
97        (true, false, true) => {
98            let link = PathBuf::from(Component::RootDir.as_os_str());
99            link.join(docdir.strip_prefix(root_path).ok()?)
100        }
101        // Result: docdir + dest
102        (true, true, true) => docdir.to_path_buf(),
103        // Result: dest
104        (false, _, true) => PathBuf::new(),
105        // *** Absolute links.
106        // Result: "/" + dest
107        (_, false, false) => PathBuf::from(Component::RootDir.as_os_str()),
108        // Result: "/" + root_path
109        (_, true, false) => root_path.to_path_buf(),
110    };
111    append(&mut link, dest);
112
113    if link.as_os_str().is_empty() {
114        None
115    } else {
116        Some(link)
117    }
118}
119
120trait Hyperlink {
121    /// A helper function, that first HTML escape decodes all strings of the
122    /// link. Then it percent decodes the link destination (and the
123    /// link text in case of an autolink).
124    fn decode_ampersand_and_percent(&mut self);
125
126    /// True if the value is a local link.
127    #[allow(clippy::ptr_arg)]
128    fn is_local_fn(value: &Cow<str>) -> bool;
129
130    /// * `Link::Text2Dest`: strips a possible scheme in local `dest`.
131    /// * `Link::Image2Dest`: strip local scheme in `dest`.
132    /// * `Link::Image`: strip local scheme in `src`.
133    ///
134    ///  No action if not local.
135    fn strip_local_scheme(&mut self);
136
137    /// Helper function that strips a possible scheme in `input`.
138    fn strip_scheme_fn(input: &mut Cow<str>);
139
140    /// True if the link is:
141    /// * `Link::Text2Dest` and the link text equals the link destination, or
142    /// * `Link::Image` and the links `alt` equals the link source.
143    ///
144    /// WARNING: place this test after `decode_html_escape_and_percent()`
145    /// and before: `rebase_local_link`, `expand_shorthand_link`,
146    /// `rewrite_autolink` and `apply_format_attribute`.
147    fn is_autolink(&self) -> bool;
148
149    /// A method that converts the relative URLs (local links) in `self`.
150    /// If successful, it returns `Ok(Some(URL))`, otherwise
151    /// `Err(NoteError::InvalidLocalLink)`.
152    /// If `self` contains an absolute URL, no conversion is performed and the
153    /// return value is `Ok(())`.
154    ///
155    /// Conversion details:
156    /// The base path for this conversion (usually where the HTML file resides),
157    /// is `docdir`. If not `rewrite_rel_links`, relative local links are not
158    /// converted. Furthermore, all local links starting with `/` are prepended
159    /// with `root_path`. All absolute URLs always remain untouched.
160    ///
161    /// Algorithm:
162    /// 1. If `rewrite_abs_links==true` and `link` starts with `/`, concatenate
163    ///    and return `root_path` and `dest`.
164    /// 2. If `rewrite_abs_links==false` and `dest` does not start wit `/`,
165    ///    return `dest`.
166    /// 3. If `rewrite_ext==true` and the link points to a known Tp-Note file
167    ///    extension, then `.html` is appended to the converted link.
168    ///
169    /// Remark: The _anchor's text property_ is never changed. However, there
170    /// is one exception: when the text contains a URL starting with `http:` or
171    /// `https:`, only the file stem is kept. Example, the anchor text property:
172    /// `<a ...>http:dir/my file.md</a>` is rewritten into `<a ...>my file</a>`.
173    ///
174    /// Contracts:
175    /// 1. `link` may have a scheme.
176    /// 2. `link` is `Link::Text2Dest` or `Link::Image`
177    /// 3. `root_path` and `docdir` are absolute paths to directories.
178    /// 4. `root_path` is never empty `""`. It can be `"/"`.
179    fn rebase_local_link(
180        &mut self,
181        root_path: &Path,
182        docdir: &Path,
183        rewrite_rel_paths: bool,
184        rewrite_abs_paths: bool,
185    ) -> Result<(), NoteError>;
186
187    /// If `dest` in `Link::Text2Dest` contains only a sort
188    /// tag as filename, expand the latter to a full filename.
189    /// Otherwise, no action.
190    /// This method accesses the filesystem. Therefore sometimes `prepend_path`
191    /// is needed as parameter and prepended.
192    fn expand_shorthand_link(&mut self, prepend_path: Option<&Path>) -> Result<(), NoteError>;
193
194    /// This removes a possible scheme in `text`.
195    /// Call this method only when you sure that this
196    /// is an autolink by testing with `is_autolink()`.
197    fn rewrite_autolink(&mut self);
198
199    /// A formatting attribute is a format string starting with `?` followed
200    /// by one or two patterns. It is appended to `dest` or `src`.
201    /// Processing details:
202    /// 1. Extract some a possible formatting attribute string in `dest`
203    ///    (`Link::Text2Dest`) or `src` (`Link::Image`) after `?`.
204    /// 2. Extract the _path_ before `?` in `dest` or `src`.
205    /// 3. Apply the formatting to _path_.
206    /// 4. Store the result by overwriting `text` or `alt`.
207    fn apply_format_attribute(&mut self);
208
209    /// If the link destination `dest` is a local path, return it.
210    /// Otherwise return `None`.
211    /// Acts on `Link:Text2Dest` and `Link::Imgage2Dest` only.
212    fn get_local_link_dest_path(&self) -> Option<&Path>;
213
214    /// If `dest` or `src` is a local path, return it.
215    /// Otherwise return `None`.
216    /// Acts an `Link:Image` and `Link::Image2Dest` only.
217    fn get_local_link_src_path(&self) -> Option<&Path>;
218
219    /// If the extension of a local path in `dest` is some Tp-Note
220    /// extension, append `.html` to the path. Otherwise silently return.
221    /// Acts on `Link:Text2Dest` only.
222    fn append_html_ext(&mut self);
223
224    /// Renders `Link::Text2Dest`, `Link::Image2Dest` and `Link::Image`
225    /// to HTML. Some characters in `dest` or `src` might be HTML
226    /// escape encoded. This does not percent encode at all, because
227    /// we know, that the result will be inserted later in a UTF-8 template.
228    fn to_html(&self) -> String;
229}
230
231impl Hyperlink for Link<'_> {
232    #[inline]
233    fn decode_ampersand_and_percent(&mut self) {
234        // HTML escape decode value.
235        fn dec_amp(val: &mut Cow<str>) {
236            let decoded_text = html_escape::decode_html_entities(val);
237            if matches!(&decoded_text, Cow::Owned(..)) {
238                // Does nothing, but satisfying the borrow checker. Does not `clone()`.
239                let decoded_text = Cow::Owned(decoded_text.into_owned());
240                // Store result.
241                let _ = std::mem::replace(val, decoded_text);
242            }
243        }
244
245        // HTML escape decode and percent decode value.
246        fn dec_amp_percent(val: &mut Cow<str>) {
247            dec_amp(val);
248            let decoded_dest = percent_decode_str(val.as_ref()).decode_utf8().unwrap();
249            if matches!(&decoded_dest, Cow::Owned(..)) {
250                // Does nothing, but satisfying the borrow checker. Does not `clone()`.
251                let decoded_dest = Cow::Owned(decoded_dest.into_owned());
252                // Store result.
253                let _ = std::mem::replace(val, decoded_dest);
254            }
255        }
256
257        match self {
258            Link::Text2Dest(text1, dest, title) => {
259                dec_amp(text1);
260                dec_amp_percent(dest);
261                dec_amp(title);
262            }
263            Link::Image(alt, src) => {
264                dec_amp(alt);
265                dec_amp_percent(src);
266            }
267            Link::Image2Dest(text1, alt, src, text2, dest, title) => {
268                dec_amp(text1);
269                dec_amp(alt);
270                dec_amp_percent(src);
271                dec_amp(text2);
272                dec_amp_percent(dest);
273                dec_amp(title);
274            }
275            _ => unimplemented!(),
276        };
277    }
278
279    //
280    fn is_local_fn(dest: &Cow<str>) -> bool {
281        !((dest.contains("://") && !dest.contains(":///"))
282            || dest.starts_with("mailto:")
283            || dest.starts_with("tel:"))
284    }
285
286    //
287    fn strip_local_scheme(&mut self) {
288        fn strip(dest: &mut Cow<str>) {
289            if <Link<'_> as Hyperlink>::is_local_fn(dest) {
290                <Link<'_> as Hyperlink>::strip_scheme_fn(dest);
291            }
292        }
293
294        match self {
295            Link::Text2Dest(_, dest, _title) => strip(dest),
296            Link::Image2Dest(_, _, src, _, dest, _) => {
297                strip(src);
298                strip(dest);
299            }
300            Link::Image(_, src) => strip(src),
301            _ => {}
302        };
303    }
304
305    //
306    fn strip_scheme_fn(inout: &mut Cow<str>) {
307        let output = inout
308            .trim_start_matches("https://")
309            .trim_start_matches("https:")
310            .trim_start_matches("http://")
311            .trim_start_matches("http:")
312            .trim_start_matches("tpnote:")
313            .trim_start_matches("mailto:")
314            .trim_start_matches("tel:");
315        if output != inout.as_ref() {
316            let _ = std::mem::replace(inout, Cow::Owned(output.to_string()));
317        }
318    }
319
320    //
321    fn is_autolink(&self) -> bool {
322        let (text, dest) = match self {
323            Link::Text2Dest(text, dest, _title) => (text, dest),
324            Link::Image(alt, source) => (alt, source),
325            // `Link::Image2Dest` is never an autolink.
326            _ => return false,
327        };
328        text == dest
329    }
330
331    //
332    fn rebase_local_link(
333        &mut self,
334        root_path: &Path,
335        docdir: &Path,
336        rewrite_rel_paths: bool,
337        rewrite_abs_paths: bool,
338    ) -> Result<(), NoteError> {
339        let do_rebase = |path: &mut Cow<str>| -> Result<(), NoteError> {
340            if <Link as Hyperlink>::is_local_fn(path) {
341                let dest_out = assemble_link(
342                    root_path,
343                    docdir,
344                    Path::new(path.as_ref()),
345                    rewrite_rel_paths,
346                    rewrite_abs_paths,
347                )
348                .ok_or(NoteError::InvalidLocalPath {
349                    path: path.as_ref().to_string(),
350                })?;
351
352                // Store result.
353                let new_dest = Cow::Owned(dest_out.to_str().unwrap_or_default().to_string());
354                let _ = std::mem::replace(path, new_dest);
355            }
356            Ok(())
357        };
358
359        match self {
360            Link::Text2Dest(_, dest, _) => do_rebase(dest),
361            Link::Image2Dest(_, _, src, _, dest, _) => do_rebase(src).and_then(|_| do_rebase(dest)),
362            Link::Image(_, src) => do_rebase(src),
363            _ => unimplemented!(),
364        }
365    }
366
367    //
368    fn expand_shorthand_link(&mut self, prepend_path: Option<&Path>) -> Result<(), NoteError> {
369        let shorthand_link = match self {
370            Link::Text2Dest(_, dest, _) => dest,
371            Link::Image2Dest(_, _, _, _, dest, _) => dest,
372            _ => return Ok(()),
373        };
374
375        if !<Link as Hyperlink>::is_local_fn(shorthand_link) {
376            return Ok(());
377        }
378
379        let (shorthand_str, shorthand_format) = match shorthand_link.split_once(FORMAT_SEPARATOR) {
380            Some((path, fmt)) => (path, Some(fmt)),
381            None => (shorthand_link.as_ref(), None),
382        };
383
384        let shorthand_path = Path::new(shorthand_str);
385
386        if let Some(sort_tag) = shorthand_str.is_valid_sort_tag() {
387            let full_shorthand_path = if let Some(root_path) = prepend_path {
388                // Concatenate `root_path` and `shorthand_path`.
389                let shorthand_path = shorthand_path
390                    .strip_prefix(MAIN_SEPARATOR_STR)
391                    .unwrap_or(shorthand_path);
392                Cow::Owned(root_path.join(shorthand_path))
393            } else {
394                Cow::Borrowed(shorthand_path)
395            };
396
397            // Search for the file.
398            let found = full_shorthand_path
399                .parent()
400                .and_then(|dir| dir.find_file_with_sort_tag(sort_tag));
401
402            if let Some(path) = found {
403                // We prepended `root_path` before, we can safely strip it
404                // and unwrap.
405                let found_link = path
406                    .strip_prefix(prepend_path.unwrap_or(Path::new("")))
407                    .unwrap();
408                // Prepend `/`.
409                let mut found_link = Path::new(MAIN_SEPARATOR_STR)
410                    .join(found_link)
411                    .to_str()
412                    .unwrap_or_default()
413                    .to_string();
414
415                if let Some(fmt) = shorthand_format {
416                    found_link.push(FORMAT_SEPARATOR);
417                    found_link.push_str(fmt);
418                }
419
420                // Store result.
421                let _ = std::mem::replace(shorthand_link, Cow::Owned(found_link));
422            } else {
423                return Err(NoteError::CanNotExpandShorthandLink {
424                    path: full_shorthand_path.to_string_lossy().into_owned(),
425                });
426            }
427        }
428        Ok(())
429    }
430
431    //
432    fn rewrite_autolink(&mut self) {
433        let text = match self {
434            Link::Text2Dest(text, _, _) => text,
435            Link::Image(alt, _) => alt,
436            _ => return,
437        };
438
439        <Link as Hyperlink>::strip_scheme_fn(text);
440    }
441
442    //
443    fn apply_format_attribute(&mut self) {
444        // Is this an absolute URL?
445
446        let (text, dest) = match self {
447            Link::Text2Dest(text, dest, _) => (text, dest),
448            Link::Image(alt, source) => (alt, source),
449            _ => return,
450        };
451
452        if !<Link as Hyperlink>::is_local_fn(dest) {
453            return;
454        }
455
456        // We assume, that `dest` had been expanded already, so we can extract
457        // the full filename here.
458        // If ever it ends with a format string we apply it. Otherwise we quit
459        // the method and do nothing.
460        let (path, format) = match dest.split_once(FORMAT_SEPARATOR) {
461            Some(s) => s,
462            None => return,
463        };
464
465        let mut short_text = Path::new(path)
466            .file_name()
467            .unwrap_or_default()
468            .to_str()
469            .unwrap_or_default();
470
471        // Select what to match:
472        let format = if format.starts_with(FORMAT_COMPLETE_FILENAME) {
473            // Keep complete filename.
474            format
475                .strip_prefix(FORMAT_COMPLETE_FILENAME)
476                .unwrap_or(format)
477        } else if format.starts_with(FORMAT_ONLY_SORT_TAG) {
478            // Keep only format-tag.
479            short_text = Path::new(path).disassemble().0;
480            format.strip_prefix(FORMAT_ONLY_SORT_TAG).unwrap_or(format)
481        } else {
482            // Keep only stem.
483            short_text = Path::new(path).disassemble().2;
484            format
485        };
486
487        match format.split_once(FORMAT_FROM_TO_SEPARATOR) {
488            // No `:`
489            None => {
490                if !format.is_empty() {
491                    if let Some(idx) = short_text.find(format) {
492                        short_text = &short_text[..idx];
493                    };
494                }
495            }
496            // Some `:`
497            Some((from, to)) => {
498                if !from.is_empty() {
499                    if let Some(idx) = short_text.find(from) {
500                        short_text = &short_text[(idx + from.len())..];
501                    };
502                }
503                if !to.is_empty() {
504                    if let Some(idx) = short_text.find(to) {
505                        short_text = &short_text[..idx];
506                    };
507                }
508            }
509        }
510        // Store the result.
511        let _ = std::mem::replace(text, Cow::Owned(short_text.to_string()));
512        let _ = std::mem::replace(dest, Cow::Owned(path.to_string()));
513    }
514
515    //
516    fn get_local_link_dest_path(&self) -> Option<&Path> {
517        let dest = match self {
518            Link::Text2Dest(_, dest, _) => dest,
519            Link::Image2Dest(_, _, _, _, dest, _) => dest,
520            _ => return None,
521        };
522        if <Link as Hyperlink>::is_local_fn(dest) {
523            // Strip URL fragment.
524            match (dest.rfind('#'), dest.rfind(['/', '\\'])) {
525                (Some(n), sep) if sep.is_some_and(|sep| n > sep) || sep.is_none() => {
526                    Some(Path::new(&dest.as_ref()[..n]))
527                }
528                _ => Some(Path::new(dest.as_ref())),
529            }
530        } else {
531            None
532        }
533    }
534
535    //
536    fn get_local_link_src_path(&self) -> Option<&Path> {
537        let src = match self {
538            Link::Image2Dest(_, _, src, _, _, _) => src,
539            Link::Image(_, src) => src,
540            _ => return None,
541        };
542        if <Link as Hyperlink>::is_local_fn(src) {
543            Some(Path::new(src.as_ref()))
544        } else {
545            None
546        }
547    }
548
549    //
550    fn append_html_ext(&mut self) {
551        let dest = match self {
552            Link::Text2Dest(_, dest, _) => dest,
553            Link::Image2Dest(_, _, _, _, dest, _) => dest,
554            _ => return,
555        };
556        if <Link as Hyperlink>::is_local_fn(dest) {
557            let path = dest.as_ref();
558            if path.has_tpnote_ext() {
559                let mut newpath = path.to_string();
560                newpath.push_str(HTML_EXT);
561
562                let _ = std::mem::replace(dest, Cow::Owned(newpath));
563            }
564        }
565    }
566
567    //
568    fn to_html(&self) -> String {
569        // HTML escape encode double quoted attributes
570        fn enc_amp(val: Cow<str>) -> Cow<str> {
571            let s = html_escape::encode_double_quoted_attribute(val.as_ref());
572            if s == val {
573                val
574            } else {
575                // No cloning happens here, because we own `s` already.
576                Cow::Owned(s.into_owned())
577            }
578        }
579        // Replace Windows backslash, then HTML escape encode.
580        fn repl_backspace_enc_amp(val: Cow<str>) -> Cow<str> {
581            let val = if val.as_ref().contains('\\') {
582                Cow::Owned(val.to_string().replace('\\', "/"))
583            } else {
584                val
585            };
586            let s = html_escape::encode_double_quoted_attribute(val.as_ref());
587            if s == val {
588                val
589            } else {
590                // No cloning happens here, because we own `s` already.
591                Cow::Owned(s.into_owned())
592            }
593        }
594
595        match self {
596            Link::Text2Dest(text, dest, title) => {
597                // Format title.
598                let title_html = if !title.is_empty() {
599                    format!(" title=\"{}\"", enc_amp(title.shallow_clone()))
600                } else {
601                    "".to_string()
602                };
603
604                format!(
605                    "<a href=\"{}\"{}>{}</a>",
606                    repl_backspace_enc_amp(dest.shallow_clone()),
607                    title_html,
608                    text
609                )
610            }
611            Link::Image2Dest(text1, alt, src, text2, dest, title) => {
612                // Format title.
613                let title_html = if !title.is_empty() {
614                    format!(" title=\"{}\"", enc_amp(title.shallow_clone()))
615                } else {
616                    "".to_string()
617                };
618
619                format!(
620                    "<a href=\"{}\"{}>{}<img src=\"{}\" alt=\"{}\">{}</a>",
621                    repl_backspace_enc_amp(dest.shallow_clone()),
622                    title_html,
623                    text1,
624                    repl_backspace_enc_amp(src.shallow_clone()),
625                    enc_amp(alt.shallow_clone()),
626                    text2
627                )
628            }
629            Link::Image(alt, src) => {
630                format!(
631                    "<img src=\"{}\" alt=\"{}\">",
632                    repl_backspace_enc_amp(src.shallow_clone()),
633                    enc_amp(alt.shallow_clone())
634                )
635            }
636            _ => unimplemented!(),
637        }
638    }
639}
640
641#[inline]
642/// A helper function that scans the input HTML document in `html_input` for
643/// HTML hyperlinks. When it finds a relative URL (local link), it analyzes it's
644/// path. Depending on the `local_link_kind` configuration, relative local
645/// links are converted into absolute local links and eventually rebased.
646///
647/// In order to achieve this, the user must respect the following convention
648/// concerning absolute local links in Tp-Note documents:
649/// 1. When a document contains a local link with an absolute path (absolute
650///    local link), the base of this path is considered to be the directory
651///    where the marker file ‘.tpnote.toml’ resides (or ‘/’ in non exists). The
652///    marker file directory is `root_path`.
653/// 2. Furthermore, the parameter `docdir` contains the absolute path of the
654///    directory of the currently processed HTML document. The user guarantees
655///    that `docdir` is the base for all relative local links in the document.
656///    Note: `docdir` must always start with `root_path`.
657///
658/// If `LocalLinkKind::Off`, relative local links are not converted.
659/// If `LocalLinkKind::Short`, relative local links are converted into an
660/// absolute local links with `root_path` as base directory.
661/// If `LocalLinkKind::Long`, in addition to the above, the resulting absolute
662/// local link is prepended with `root_path`.
663///
664/// If `rewrite_ext` is true and a local link points to a known
665/// Tp-Note file extension, then `.html` is appended to the converted link.
666///
667/// Remark: The link's text property is never changed. However, there is
668/// one exception: when the link's text contains a string similar to URLs,
669/// starting with `http:` or `tpnote:`. In this case, the string is interpreted
670/// as URL and only the stem of the filename is displayed, e.g.
671/// `<a ...>http:dir/my file.md</a>` is replaced with `<a ...>my file</a>`.
672///
673/// Finally, before a converted local link is reinserted in the output HTML, a
674/// copy of that link is kept in `allowed_local_links` for further bookkeeping.
675///
676/// NB: All absolute URLs (starting with a domain) always remain untouched.
677///
678/// NB2: It is guaranteed, that the resulting HTML document contains only local
679/// links to other documents within `root_path`. Deviant links displayed as
680/// `INVALID LOCAL LINK` and URL is discarded.
681pub fn rewrite_links(
682    html_input: String,
683    root_path: &Path,
684    docdir: &Path,
685    local_link_kind: LocalLinkKind,
686    rewrite_ext: bool,
687    allowed_local_links: Arc<RwLock<HashSet<PathBuf>>>,
688) -> String {
689    let (rewrite_rel_paths, rewrite_abs_paths) = match local_link_kind {
690        LocalLinkKind::Off => (false, false),
691        LocalLinkKind::Short => (true, false),
692        LocalLinkKind::Long => (true, true),
693    };
694
695    // Search for hyperlinks and inline images in the HTML rendition
696    // of this note.
697    let mut rest = &*html_input;
698    let mut html_out = String::new();
699    for ((skipped, _consumed, remaining), mut link) in HtmlLinkInlineImage::new(&html_input) {
700        html_out.push_str(skipped);
701        rest = remaining;
702
703        // Check if `text` = `dest`.
704        let mut link_is_autolink = link.is_autolink();
705
706        // Percent decode link destination.
707        link.decode_ampersand_and_percent();
708
709        // Check again if `text` = `dest`.
710        link_is_autolink = link_is_autolink || link.is_autolink();
711
712        link.strip_local_scheme();
713
714        // Rewrite the local link.
715        match link
716            .rebase_local_link(root_path, docdir, rewrite_rel_paths, rewrite_abs_paths)
717            .and_then(|_| {
718                link.expand_shorthand_link(
719                    (matches!(local_link_kind, LocalLinkKind::Short)).then_some(root_path),
720                )
721            }) {
722            Ok(()) => {}
723            Err(e) => {
724                let e = e.to_string();
725                let e = html_escape::encode_text(&e);
726                html_out.push_str(&format!("<i>{}</i>", e));
727                continue;
728            }
729        };
730
731        if link_is_autolink {
732            link.rewrite_autolink();
733        }
734
735        link.apply_format_attribute();
736
737        if let Some(dest_path) = link.get_local_link_dest_path() {
738            allowed_local_links.write().insert(dest_path.to_path_buf());
739        };
740        if let Some(src_path) = link.get_local_link_src_path() {
741            allowed_local_links.write().insert(src_path.to_path_buf());
742        };
743
744        if rewrite_ext {
745            link.append_html_ext();
746        }
747        html_out.push_str(&link.to_html());
748    }
749    // Add the last `remaining`.
750    html_out.push_str(rest);
751
752    log::trace!(
753        "Viewer: referenced allowed local files: {}",
754        allowed_local_links
755            .read_recursive()
756            .iter()
757            .map(|p| {
758                let mut s = "\n    '".to_string();
759                s.push_str(&p.display().to_string());
760                s
761            })
762            .collect::<String>()
763    );
764
765    html_out
766    // The `RwLockWriteGuard` is released here.
767}
768
769/// This trait deals with tagged HTML `&str` data.
770pub trait HtmlStr {
771    /// Lowercase pattern to check if this is a Doctype tag.
772    const TAG_DOCTYPE_PAT: &'static str = "<!doctype";
773    /// Lowercase pattern to check if this Doctype is HTML.
774    const TAG_DOCTYPE_HTML_PAT: &'static str = "<!doctype html";
775    /// Doctype HTML tag. This is inserted by
776    /// `<HtmlString>.prepend_html_start_tag()`
777    const TAG_DOCTYPE_HTML: &'static str = "<!DOCTYPE html>";
778    /// Pattern to check if f this is an HTML start tag.
779    const START_TAG_HTML_PAT: &'static str = "<html";
780    /// HTML end tag.
781    const END_TAG_HTML: &'static str = "</html>";
782
783    /// We consider `self` empty, when it equals to `<!DOCTYPE html...>` or
784    /// when it is empty.
785    fn is_empty_html(&self) -> bool;
786
787    /// We consider `html` empty, when it equals to `<!DOCTYPE html...>` or
788    /// when it is empty.
789    /// This is identical to `is_empty_html()`, but does not pull in
790    /// additional trait bounds.
791    fn is_empty_html2(html: &str) -> bool {
792        html.is_empty_html()
793    }
794
795    /// True if stream starts with `<!DOCTYPE html...>`.
796    fn has_html_start_tag(&self) -> bool;
797
798    /// True if `html` starts with `<!DOCTYPE html...>`.
799    /// This is identical to `has_html_start_tag()`, but does not pull in
800    /// additional trait bounds.
801    fn has_html_start_tag2(html: &str) -> bool {
802        html.has_html_start_tag()
803    }
804
805    /// Some heuristics to guess if the input stream contains HTML.
806    /// Current implementation:
807    /// True if:
808    ///
809    /// * The stream starts with `<!DOCTYPE html ...>`, or
810    /// * the stream starts with `<html ...>`    
811    ///
812    /// This function does not check if the recognized HTML is valid.
813    fn is_html_unchecked(&self) -> bool;
814}
815
816impl HtmlStr for str {
817    fn is_empty_html(&self) -> bool {
818        if self.is_empty() {
819            return true;
820        }
821
822        let html = self
823            .trim_start()
824            .lines()
825            .next()
826            .map(|l| l.to_ascii_lowercase())
827            .unwrap_or_default();
828
829        html.as_str().starts_with(Self::TAG_DOCTYPE_HTML_PAT)
830            // The next closing bracket must be in last position.
831            && html.find('>').unwrap_or_default() == html.len()-1
832    }
833
834    fn has_html_start_tag(&self) -> bool {
835        let html = self
836            .trim_start()
837            .lines()
838            .next()
839            .map(|l| l.to_ascii_lowercase());
840        html.as_ref()
841            .is_some_and(|l| l.starts_with(Self::TAG_DOCTYPE_HTML_PAT))
842    }
843
844    fn is_html_unchecked(&self) -> bool {
845        let html = self
846            .trim_start()
847            .lines()
848            .next()
849            .map(|l| l.to_ascii_lowercase());
850        html.as_ref().is_some_and(|l| {
851            (l.starts_with(Self::TAG_DOCTYPE_HTML_PAT)
852                && l[Self::TAG_DOCTYPE_HTML_PAT.len()..].contains('>'))
853                || (l.starts_with(Self::START_TAG_HTML_PAT)
854                    && l[Self::START_TAG_HTML_PAT.len()..].contains('>'))
855        })
856    }
857}
858
859/// This trait deals with tagged HTML `String` data.
860pub trait HtmlString: Sized {
861    /// If the input does not start with `<!DOCTYPE html`
862    /// (or lowercase variants), then insert `<!DOCTYPE html>`.
863    /// Returns `InputStreamError::NonHtmlDoctype` if there is another Doctype
864    /// already.
865    fn prepend_html_start_tag(self) -> Result<Self, InputStreamError>;
866}
867
868impl HtmlString for String {
869    fn prepend_html_start_tag(self) -> Result<Self, InputStreamError> {
870        // Bring `HtmlStr` methods into scope.
871        use crate::html::HtmlStr;
872
873        let html2 = self
874            .trim_start()
875            .lines()
876            .next()
877            .map(|l| l.to_ascii_lowercase())
878            .unwrap_or_default();
879
880        if html2.starts_with(<str as HtmlStr>::TAG_DOCTYPE_HTML_PAT) {
881            // Has a start tag already.
882            Ok(self)
883        } else if !html2.starts_with(<str as HtmlStr>::TAG_DOCTYPE_PAT) {
884            // Insert HTML Doctype tag.
885            let mut html = self;
886            html.insert_str(0, <str as HtmlStr>::TAG_DOCTYPE_HTML);
887            Ok(html)
888        } else {
889            // There is a Doctype other than HTML.
890            Err(InputStreamError::NonHtmlDoctype {
891                html: self.chars().take(25).collect::<String>(),
892            })
893        }
894    }
895}
896
897#[cfg(test)]
898mod tests {
899
900    use crate::error::InputStreamError;
901    use crate::error::NoteError;
902    use crate::html::Hyperlink;
903    use crate::html::assemble_link;
904    use crate::html::rewrite_links;
905    use parking_lot::RwLock;
906    use parse_hyperlinks::parser::Link;
907    use parse_hyperlinks_extras::parser::parse_html::take_link;
908    use std::borrow::Cow;
909    use std::{
910        collections::HashSet,
911        path::{Path, PathBuf},
912        sync::Arc,
913    };
914
915    #[test]
916    fn test_assemble_link() {
917        // `rewrite_rel_links=true`
918        let output = assemble_link(
919            Path::new("/my"),
920            Path::new("/my/doc/path"),
921            Path::new("../local/link to/note.md"),
922            true,
923            false,
924        )
925        .unwrap();
926        assert_eq!(output, Path::new("/doc/local/link to/note.md"));
927
928        // `rewrite_rel_links=false`
929        let output = assemble_link(
930            Path::new("/my"),
931            Path::new("/my/doc/path"),
932            Path::new("../local/link to/note.md"),
933            false,
934            false,
935        )
936        .unwrap();
937        assert_eq!(output, Path::new("../local/link to/note.md"));
938
939        // Absolute `dest`.
940        let output = assemble_link(
941            Path::new("/my"),
942            Path::new("/my/doc/path"),
943            Path::new("/test/../abs/local/link to/note.md"),
944            false,
945            false,
946        )
947        .unwrap();
948        assert_eq!(output, Path::new("/abs/local/link to/note.md"));
949
950        // Underflow.
951        let output = assemble_link(
952            Path::new("/my"),
953            Path::new("/my/doc/path"),
954            Path::new("/../local/link to/note.md"),
955            false,
956            false,
957        );
958        assert_eq!(output, None);
959
960        // Absolute `dest`, `rewrite_abs_links=true`.
961        let output = assemble_link(
962            Path::new("/my"),
963            Path::new("/my/doc/path"),
964            Path::new("/abs/local/link to/note.md"),
965            false,
966            true,
967        )
968        .unwrap();
969        assert_eq!(output, Path::new("/my/abs/local/link to/note.md"));
970
971        // Absolute `dest`, `rewrite_abs_links=false`.
972        let output = assemble_link(
973            Path::new("/my"),
974            Path::new("/my/doc/path"),
975            Path::new("/test/../abs/local/link to/note.md"),
976            false,
977            false,
978        )
979        .unwrap();
980        assert_eq!(output, Path::new("/abs/local/link to/note.md"));
981
982        // Absolute `dest`, `rewrite` both.
983        let output = assemble_link(
984            Path::new("/my"),
985            Path::new("/my/doc/path"),
986            Path::new("abs/local/link to/note.md"),
987            true,
988            true,
989        )
990        .unwrap();
991        assert_eq!(output, Path::new("/my/doc/path/abs/local/link to/note.md"));
992    }
993
994    #[test]
995    fn test_decode_html_escape_and_percent() {
996        //
997        let mut input = Link::Text2Dest(Cow::from("text"), Cow::from("dest"), Cow::from("title"));
998        let expected = Link::Text2Dest(Cow::from("text"), Cow::from("dest"), Cow::from("title"));
999        input.decode_ampersand_and_percent();
1000        let output = input;
1001        assert_eq!(output, expected);
1002
1003        //
1004        let mut input = Link::Text2Dest(
1005            Cow::from("te%20xt"),
1006            Cow::from("de%20st"),
1007            Cow::from("title"),
1008        );
1009        let expected =
1010            Link::Text2Dest(Cow::from("te%20xt"), Cow::from("de st"), Cow::from("title"));
1011        input.decode_ampersand_and_percent();
1012        let output = input;
1013        assert_eq!(output, expected);
1014
1015        //
1016        let mut input =
1017            Link::Text2Dest(Cow::from("text"), Cow::from("d:e%20st"), Cow::from("title"));
1018        let expected = Link::Text2Dest(Cow::from("text"), Cow::from("d:e st"), Cow::from("title"));
1019        input.decode_ampersand_and_percent();
1020        let output = input;
1021        assert_eq!(output, expected);
1022
1023        let mut input = Link::Text2Dest(
1024            Cow::from("a&amp;&quot;lt"),
1025            Cow::from("a&amp;&quot;lt"),
1026            Cow::from("a&amp;&quot;lt"),
1027        );
1028        let expected = Link::Text2Dest(
1029            Cow::from("a&\"lt"),
1030            Cow::from("a&\"lt"),
1031            Cow::from("a&\"lt"),
1032        );
1033        input.decode_ampersand_and_percent();
1034        let output = input;
1035        assert_eq!(output, expected);
1036
1037        //
1038        let mut input = Link::Image(Cow::from("al%20t"), Cow::from("de%20st"));
1039        let expected = Link::Image(Cow::from("al%20t"), Cow::from("de st"));
1040        input.decode_ampersand_and_percent();
1041        let output = input;
1042        assert_eq!(output, expected);
1043
1044        //
1045        let mut input = Link::Image(Cow::from("a\\lt"), Cow::from("d\\est"));
1046        let expected = Link::Image(Cow::from("a\\lt"), Cow::from("d\\est"));
1047        input.decode_ampersand_and_percent();
1048        let output = input;
1049        assert_eq!(output, expected);
1050
1051        //
1052        let mut input = Link::Image(Cow::from("a&amp;&quot;lt"), Cow::from("a&amp;&quot;lt"));
1053        let expected = Link::Image(Cow::from("a&\"lt"), Cow::from("a&\"lt"));
1054        input.decode_ampersand_and_percent();
1055        let output = input;
1056        assert_eq!(output, expected);
1057    }
1058
1059    #[test]
1060    fn test_is_local() {
1061        let input = Cow::from("/path/My doc.md");
1062        assert!(<Link as Hyperlink>::is_local_fn(&input));
1063
1064        let input = Cow::from("tpnote:path/My doc.md");
1065        assert!(<Link as Hyperlink>::is_local_fn(&input));
1066
1067        let input = Cow::from("tpnote:/path/My doc.md");
1068        assert!(<Link as Hyperlink>::is_local_fn(&input));
1069
1070        let input = Cow::from("https://getreu.net");
1071        assert!(!<Link as Hyperlink>::is_local_fn(&input));
1072    }
1073
1074    #[test]
1075    fn strip_local_scheme() {
1076        let mut input = Link::Text2Dest(
1077            Cow::from("xyz"),
1078            Cow::from("https://getreu.net"),
1079            Cow::from("xyz"),
1080        );
1081        let expected = input.clone();
1082        input.strip_local_scheme();
1083        assert_eq!(input, expected);
1084
1085        //
1086        let mut input = Link::Text2Dest(
1087            Cow::from("xyz"),
1088            Cow::from("tpnote:/dir/My doc.md"),
1089            Cow::from("xyz"),
1090        );
1091        let expected = Link::Text2Dest(
1092            Cow::from("xyz"),
1093            Cow::from("/dir/My doc.md"),
1094            Cow::from("xyz"),
1095        );
1096        input.strip_local_scheme();
1097        assert_eq!(input, expected);
1098    }
1099
1100    #[test]
1101    fn test_is_autolink() {
1102        let input = Link::Image(Cow::from("abc"), Cow::from("abc"));
1103        assert!(input.is_autolink());
1104
1105        //
1106        let input = Link::Text2Dest(Cow::from("abc"), Cow::from("abc"), Cow::from("xyz"));
1107        assert!(input.is_autolink());
1108
1109        //
1110        let input = Link::Image(Cow::from("abc"), Cow::from("abcd"));
1111        assert!(!input.is_autolink());
1112
1113        //
1114        let input = Link::Text2Dest(Cow::from("abc"), Cow::from("abcd"), Cow::from("xyz"));
1115        assert!(!input.is_autolink());
1116    }
1117
1118    #[test]
1119    fn test_rewrite_local_link() {
1120        let root_path = Path::new("/my/");
1121        let docdir = Path::new("/my/abs/note path/");
1122
1123        // Should panic: this is not a relative path.
1124        let mut input = take_link("<a href=\"ftp://getreu.net\">Blog</a>")
1125            .unwrap()
1126            .1
1127            .1;
1128        input
1129            .rebase_local_link(root_path, docdir, true, false)
1130            .unwrap();
1131        assert!(input.get_local_link_dest_path().is_none());
1132
1133        //
1134        let root_path = Path::new("/my/");
1135        let docdir = Path::new("/my/abs/note path/");
1136
1137        // Check relative path to image.
1138        let mut input = take_link("<img src=\"down/./down/../../t m p.jpg\" alt=\"Image\" />")
1139            .unwrap()
1140            .1
1141            .1;
1142        let expected = "<img src=\"/abs/note path/t m p.jpg\" \
1143            alt=\"Image\">";
1144        input
1145            .rebase_local_link(root_path, docdir, true, false)
1146            .unwrap();
1147        let outpath = input.get_local_link_src_path().unwrap();
1148        let output = input.to_html();
1149        assert_eq!(output, expected);
1150        assert_eq!(outpath, PathBuf::from("/abs/note path/t m p.jpg"));
1151
1152        // Check relative path to image. Canonicalized?
1153        let mut input = take_link("<img src=\"down/./../../t m p.jpg\" alt=\"Image\" />")
1154            .unwrap()
1155            .1
1156            .1;
1157        let expected = "<img src=\"/abs/t m p.jpg\" alt=\"Image\">";
1158        input
1159            .rebase_local_link(root_path, docdir, true, false)
1160            .unwrap();
1161        let outpath = input.get_local_link_src_path().unwrap();
1162        let output = input.to_html();
1163        assert_eq!(output, expected);
1164        assert_eq!(outpath, PathBuf::from("/abs/t m p.jpg"));
1165
1166        // Check relative path to note file.
1167        let mut input = take_link("<a href=\"./down/./../my note 1.md\">my note 1</a>")
1168            .unwrap()
1169            .1
1170            .1;
1171        let expected = "<a href=\"/abs/note path/my note 1.md\">my note 1</a>";
1172        input
1173            .rebase_local_link(root_path, docdir, true, false)
1174            .unwrap();
1175        let outpath = input.get_local_link_dest_path().unwrap();
1176        let output = input.to_html();
1177        assert_eq!(output, expected);
1178        assert_eq!(outpath, PathBuf::from("/abs/note path/my note 1.md"));
1179
1180        // Check absolute path to note file.
1181        let mut input = take_link("<a href=\"/dir/./down/../my note 1.md\">my note 1</a>")
1182            .unwrap()
1183            .1
1184            .1;
1185        let expected = "<a href=\"/dir/my note 1.md\">my note 1</a>";
1186        input
1187            .rebase_local_link(root_path, docdir, true, false)
1188            .unwrap();
1189        let outpath = input.get_local_link_dest_path().unwrap();
1190        let output = input.to_html();
1191        assert_eq!(output, expected);
1192        assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
1193
1194        // Check relative path to note file. Canonicalized?
1195        let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
1196            .unwrap()
1197            .1
1198            .1;
1199        let expected = "<a href=\"dir/my note 1.md\">my note 1</a>";
1200        input
1201            .rebase_local_link(root_path, docdir, false, false)
1202            .unwrap();
1203        let outpath = input.get_local_link_dest_path().unwrap();
1204        let output = input.to_html();
1205        assert_eq!(output, expected);
1206        assert_eq!(outpath, PathBuf::from("dir/my note 1.md"));
1207
1208        // Check relative link in input.
1209        let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
1210            .unwrap()
1211            .1
1212            .1;
1213        let expected = "<a href=\"/path/dir/my note 1.md\">my note 1</a>";
1214        input
1215            .rebase_local_link(
1216                Path::new("/my/note/"),
1217                Path::new("/my/note/path/"),
1218                true,
1219                false,
1220            )
1221            .unwrap();
1222        let outpath = input.get_local_link_dest_path().unwrap();
1223        let output = input.to_html();
1224        assert_eq!(output, expected);
1225        assert_eq!(outpath, PathBuf::from("/path/dir/my note 1.md"));
1226
1227        // Check absolute link in input.
1228        let mut input = take_link("<a href=\"/down/./../dir/my note 1.md\">my note 1</a>")
1229            .unwrap()
1230            .1
1231            .1;
1232        let expected = "<a href=\"/dir/my note 1.md\">my note 1</a>";
1233        input
1234            .rebase_local_link(root_path, Path::new("/my/ignored/"), true, false)
1235            .unwrap();
1236        let outpath = input.get_local_link_dest_path().unwrap();
1237        let output = input.to_html();
1238        assert_eq!(output, expected);
1239        assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
1240
1241        // Check absolute link in input, not in `root_path`.
1242        let mut input = take_link("<a href=\"/down/../../dir/my note 1.md\">my note 1</a>")
1243            .unwrap()
1244            .1
1245            .1;
1246        let output = input
1247            .rebase_local_link(root_path, Path::new("/my/notepath/"), true, false)
1248            .unwrap_err();
1249        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1250
1251        // Check relative link in input, not in `root_path`.
1252        let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
1253            .unwrap()
1254            .1
1255            .1;
1256        let output = input
1257            .rebase_local_link(root_path, Path::new("/my/notepath/"), true, false)
1258            .unwrap_err();
1259        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1260
1261        // Check relative link in input, with underflow.
1262        let root_path = Path::new("/");
1263        let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
1264            .unwrap()
1265            .1
1266            .1;
1267        let output = input
1268            .rebase_local_link(root_path, Path::new("/my/"), true, false)
1269            .unwrap_err();
1270        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1271
1272        // Check relative link in input, not in `root_path`.
1273        let root_path = Path::new("/my");
1274        let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
1275            .unwrap()
1276            .1
1277            .1;
1278        let output = input
1279            .rebase_local_link(root_path, Path::new("/my/notepath"), true, false)
1280            .unwrap_err();
1281        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1282
1283        // Test autolink.
1284        let root_path = Path::new("/my");
1285        let mut input =
1286            take_link("<a href=\"tpnote:dir/3.0-my note.md\">tpnote:dir/3.0-my note.md</a>")
1287                .unwrap()
1288                .1
1289                .1;
1290        input.strip_local_scheme();
1291        input
1292            .rebase_local_link(root_path, Path::new("/my/path"), true, false)
1293            .unwrap();
1294        input.rewrite_autolink();
1295        input.apply_format_attribute();
1296        let outpath = input.get_local_link_dest_path().unwrap();
1297        let output = input.to_html();
1298        let expected = "<a href=\"/path/dir/3.0-my note.md\">dir/3.0-my note.md</a>";
1299        assert_eq!(output, expected);
1300        assert_eq!(outpath, PathBuf::from("/path/dir/3.0-my note.md"));
1301
1302        // Test short autolink 1 with sort-tag only.
1303        let root_path = Path::new("/my");
1304        let mut input = take_link("<a href=\"tpnote:dir/3.0\">tpnote:dir/3.0</a>")
1305            .unwrap()
1306            .1
1307            .1;
1308        input.strip_local_scheme();
1309        input
1310            .rebase_local_link(root_path, Path::new("/my/path"), true, false)
1311            .unwrap();
1312        input.rewrite_autolink();
1313        input.apply_format_attribute();
1314        let outpath = input.get_local_link_dest_path().unwrap();
1315        let output = input.to_html();
1316        let expected = "<a href=\"/path/dir/3.0\">dir/3.0</a>";
1317        assert_eq!(output, expected);
1318        assert_eq!(outpath, PathBuf::from("/path/dir/3.0"));
1319
1320        // The link text contains inline content.
1321        let root_path = Path::new("/my");
1322        let mut input = take_link(
1323            "<a href=\
1324            \"/uri\">link <em>foo <strong>bar</strong> <code>#</code></em>\
1325            </a>",
1326        )
1327        .unwrap()
1328        .1
1329        .1;
1330        input.strip_local_scheme();
1331        input
1332            .rebase_local_link(root_path, Path::new("/my/path"), true, false)
1333            .unwrap();
1334        let outpath = input.get_local_link_dest_path().unwrap();
1335        let expected = "<a href=\"/uri\">link <em>foo <strong>bar\
1336            </strong> <code>#</code></em></a>";
1337
1338        let output = input.to_html();
1339        assert_eq!(output, expected);
1340        assert_eq!(outpath, PathBuf::from("/uri"));
1341    }
1342
1343    #[test]
1344    fn test_rewrite_autolink() {
1345        //
1346        let mut input = Link::Text2Dest(
1347            Cow::from("http://getreu.net"),
1348            Cow::from("http://getreu.net"),
1349            Cow::from("title"),
1350        );
1351        let expected = Link::Text2Dest(
1352            Cow::from("getreu.net"),
1353            Cow::from("http://getreu.net"),
1354            Cow::from("title"),
1355        );
1356        input.rewrite_autolink();
1357        let output = input;
1358        assert_eq!(output, expected);
1359
1360        //
1361        let mut input = Link::Text2Dest(
1362            Cow::from("/dir/3.0"),
1363            Cow::from("/dir/3.0-My note.md"),
1364            Cow::from("title"),
1365        );
1366        let expected = Link::Text2Dest(
1367            Cow::from("/dir/3.0"),
1368            Cow::from("/dir/3.0-My note.md"),
1369            Cow::from("title"),
1370        );
1371        input.rewrite_autolink();
1372        let output = input;
1373        assert_eq!(output, expected);
1374
1375        //
1376        let mut input = Link::Text2Dest(
1377            Cow::from("tpnote:/dir/3.0"),
1378            Cow::from("/dir/3.0-My note.md"),
1379            Cow::from("title"),
1380        );
1381        let expected = Link::Text2Dest(
1382            Cow::from("/dir/3.0"),
1383            Cow::from("/dir/3.0-My note.md"),
1384            Cow::from("title"),
1385        );
1386        input.rewrite_autolink();
1387        let output = input;
1388        assert_eq!(output, expected);
1389
1390        //
1391        let mut input = Link::Text2Dest(
1392            Cow::from("tpnote:/dir/3.0"),
1393            Cow::from("/dir/3.0-My note.md?"),
1394            Cow::from("title"),
1395        );
1396        let expected = Link::Text2Dest(
1397            Cow::from("/dir/3.0"),
1398            Cow::from("/dir/3.0-My note.md?"),
1399            Cow::from("title"),
1400        );
1401        input.rewrite_autolink();
1402        let output = input;
1403        assert_eq!(output, expected);
1404
1405        //
1406        let mut input = Link::Text2Dest(
1407            Cow::from("/dir/3.0-My note.md"),
1408            Cow::from("/dir/3.0-My note.md"),
1409            Cow::from("title"),
1410        );
1411        let expected = Link::Text2Dest(
1412            Cow::from("/dir/3.0-My note.md"),
1413            Cow::from("/dir/3.0-My note.md"),
1414            Cow::from("title"),
1415        );
1416        input.rewrite_autolink();
1417        let output = input;
1418        assert_eq!(output, expected);
1419    }
1420
1421    #[test]
1422    fn test_apply_format_attribute() {
1423        //
1424        let mut input = Link::Text2Dest(
1425            Cow::from("tpnote:/dir/3.0"),
1426            Cow::from("/dir/3.0-My note.md"),
1427            Cow::from("title"),
1428        );
1429        let expected = Link::Text2Dest(
1430            Cow::from("tpnote:/dir/3.0"),
1431            Cow::from("/dir/3.0-My note.md"),
1432            Cow::from("title"),
1433        );
1434        input.apply_format_attribute();
1435        let output = input;
1436        assert_eq!(output, expected);
1437
1438        //
1439        let mut input = Link::Text2Dest(
1440            Cow::from("does not matter"),
1441            Cow::from("/dir/3.0-My note.md?"),
1442            Cow::from("title"),
1443        );
1444        let expected = Link::Text2Dest(
1445            Cow::from("My note"),
1446            Cow::from("/dir/3.0-My note.md"),
1447            Cow::from("title"),
1448        );
1449        input.apply_format_attribute();
1450        let output = input;
1451        assert_eq!(output, expected);
1452
1453        let mut input = Link::Text2Dest(
1454            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1455            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1456            Cow::from("title"),
1457        );
1458        let expected = Link::Text2Dest(
1459            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1460            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1461            Cow::from("title"),
1462        );
1463        input.apply_format_attribute();
1464        let output = input;
1465        assert_eq!(output, expected);
1466
1467        //
1468        let mut input = Link::Text2Dest(
1469            Cow::from("does not matter"),
1470            Cow::from("/dir/3.0-My note--red_blue_green.jpg?"),
1471            Cow::from("title"),
1472        );
1473        let expected = Link::Text2Dest(
1474            Cow::from("My note--red_blue_green"),
1475            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1476            Cow::from("title"),
1477        );
1478        input.apply_format_attribute();
1479        let output = input;
1480        assert_eq!(output, expected);
1481
1482        //
1483        let mut input = Link::Text2Dest(
1484            Cow::from("does not matter"),
1485            Cow::from("/dir/3.0-My note--red_blue_green.jpg?--"),
1486            Cow::from("title"),
1487        );
1488        let expected = Link::Text2Dest(
1489            Cow::from("My note"),
1490            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1491            Cow::from("title"),
1492        );
1493        input.apply_format_attribute();
1494        let output = input;
1495        assert_eq!(output, expected);
1496
1497        //
1498        let mut input = Link::Text2Dest(
1499            Cow::from("does not matter"),
1500            Cow::from("/dir/3.0-My note--red_blue_green.jpg?_"),
1501            Cow::from("title"),
1502        );
1503        let expected = Link::Text2Dest(
1504            Cow::from("My note--red"),
1505            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1506            Cow::from("title"),
1507        );
1508        input.apply_format_attribute();
1509        let output = input;
1510        assert_eq!(output, expected);
1511
1512        //
1513        let mut input = Link::Text2Dest(
1514            Cow::from("does not matter"),
1515            Cow::from("/dir/3.0-My note--red_blue_green.jpg??"),
1516            Cow::from("title"),
1517        );
1518        let expected = Link::Text2Dest(
1519            Cow::from("3.0-My note--red_blue_green.jpg"),
1520            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1521            Cow::from("title"),
1522        );
1523        input.apply_format_attribute();
1524        let output = input;
1525        assert_eq!(output, expected);
1526
1527        //
1528        let mut input = Link::Text2Dest(
1529            Cow::from("does not matter"),
1530            Cow::from("/dir/3.0-My note--red_blue_green.jpg?#."),
1531            Cow::from("title"),
1532        );
1533        let expected = Link::Text2Dest(
1534            Cow::from("3"),
1535            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1536            Cow::from("title"),
1537        );
1538        input.apply_format_attribute();
1539        let output = input;
1540        assert_eq!(output, expected);
1541
1542        //
1543        let mut input = Link::Text2Dest(
1544            Cow::from("does not matter"),
1545            Cow::from("/dir/3.0-My note--red_blue_green.jpg??.:_"),
1546            Cow::from("title"),
1547        );
1548        let expected = Link::Text2Dest(
1549            Cow::from("0-My note--red"),
1550            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1551            Cow::from("title"),
1552        );
1553        input.apply_format_attribute();
1554        let output = input;
1555        assert_eq!(output, expected);
1556
1557        //
1558        let mut input = Link::Text2Dest(
1559            Cow::from("does not matter"),
1560            Cow::from("/dir/3.0-My note--red_blue_green.jpg?_:_"),
1561            Cow::from("title"),
1562        );
1563        let expected = Link::Text2Dest(
1564            Cow::from("blue"),
1565            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1566            Cow::from("title"),
1567        );
1568        input.apply_format_attribute();
1569        let output = input;
1570        assert_eq!(output, expected);
1571    }
1572
1573    #[test]
1574    fn get_local_link_dest_path() {
1575        //
1576        let input = Link::Text2Dest(Cow::from("xyz"), Cow::from("/dir/3.0"), Cow::from("title"));
1577        assert_eq!(
1578            input.get_local_link_dest_path(),
1579            Some(Path::new("/dir/3.0"))
1580        );
1581
1582        //
1583        let input = Link::Text2Dest(
1584            Cow::from("xyz"),
1585            Cow::from("http://getreu.net"),
1586            Cow::from("title"),
1587        );
1588        assert_eq!(input.get_local_link_dest_path(), None);
1589
1590        //
1591        let input = Link::Text2Dest(Cow::from("xyz"), Cow::from("dir/doc.md"), Cow::from("xyz"));
1592        let expected = Path::new("dir/doc.md");
1593        let res = input.get_local_link_dest_path().unwrap();
1594        assert_eq!(res, expected);
1595
1596        //
1597        let input = Link::Text2Dest(Cow::from("xyz"), Cow::from("d#ir/doc.md"), Cow::from("xyz"));
1598        let expected = Path::new("d#ir/doc.md");
1599        let res = input.get_local_link_dest_path().unwrap();
1600        assert_eq!(res, expected);
1601
1602        //
1603        let input = Link::Text2Dest(
1604            Cow::from("xyz"),
1605            Cow::from("dir/doc.md#1"),
1606            Cow::from("xyz"),
1607        );
1608        let expected = Path::new("dir/doc.md");
1609        let res = input.get_local_link_dest_path().unwrap();
1610        assert_eq!(res, expected);
1611    }
1612
1613    #[test]
1614    fn test_append_html_ext() {
1615        //
1616        let mut input = Link::Text2Dest(
1617            Cow::from("abc"),
1618            Cow::from("/dir/3.0-My note.md"),
1619            Cow::from("title"),
1620        );
1621        let expected = Link::Text2Dest(
1622            Cow::from("abc"),
1623            Cow::from("/dir/3.0-My note.md.html"),
1624            Cow::from("title"),
1625        );
1626        input.append_html_ext();
1627        let output = input;
1628        assert_eq!(output, expected);
1629    }
1630
1631    #[test]
1632    fn test_to_html() {
1633        //
1634        let input = Link::Text2Dest(
1635            Cow::from("te\\x/t"),
1636            Cow::from("de\\s/t"),
1637            Cow::from("ti\\t/le"),
1638        );
1639        let expected = "<a href=\"de/s/t\" title=\"ti\\t/le\">te\\x/t</a>";
1640        let output = input.to_html();
1641        assert_eq!(output, expected);
1642
1643        //
1644        let input = Link::Text2Dest(
1645            Cow::from("te&> xt"),
1646            Cow::from("de&> st"),
1647            Cow::from("ti&> tle"),
1648        );
1649        let expected = "<a href=\"de&amp;&gt; st\" title=\"ti&amp;&gt; tle\">te&> xt</a>";
1650        let output = input.to_html();
1651        assert_eq!(output, expected);
1652
1653        //
1654        let input = Link::Image(Cow::from("al&t"), Cow::from("sr&c"));
1655        let expected = "<img src=\"sr&amp;c\" alt=\"al&amp;t\">";
1656        let output = input.to_html();
1657        assert_eq!(output, expected);
1658
1659        //
1660        let input = Link::Text2Dest(Cow::from("te&> xt"), Cow::from("de&> st"), Cow::from(""));
1661        let expected = "<a href=\"de&amp;&gt; st\">te&> xt</a>";
1662        let output = input.to_html();
1663        assert_eq!(output, expected);
1664    }
1665
1666    #[test]
1667    fn test_rewrite_links() {
1668        use crate::config::LocalLinkKind;
1669
1670        let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
1671        let input = "abc<a href=\"ftp://getreu.net\">Blog</a>\
1672            def<a href=\"https://getreu.net\">https://getreu.net</a>\
1673            ghi<img src=\"t m p.jpg\" alt=\"test 1\" />\
1674            jkl<a href=\"down/../down/my note 1.md\">my note 1</a>\
1675            mno<a href=\"http:./down/../dir/my note.md\">http:./down/../dir/my note.md</a>\
1676            pqr<a href=\"http:/down/../dir/my note.md\">\
1677            http:/down/../dir/my note.md</a>\
1678            stu<a href=\"http:/../dir/underflow/my note.md\">\
1679            not allowed dir</a>\
1680            vwx<a href=\"http:../../../not allowed dir/my note.md\">\
1681            not allowed</a>"
1682            .to_string();
1683        let expected = "abc<a href=\"ftp://getreu.net\">Blog</a>\
1684            def<a href=\"https://getreu.net\">getreu.net</a>\
1685            ghi<img src=\"/abs/note path/t m p.jpg\" alt=\"test 1\">\
1686            jkl<a href=\"/abs/note path/down/my note 1.md\">my note 1</a>\
1687            mno<a href=\"/abs/note path/dir/my note.md\">./down/../dir/my note.md</a>\
1688            pqr<a href=\"/dir/my note.md\">/down/../dir/my note.md</a>\
1689            stu<i>&lt;INVALID: /../dir/underflow/my note.md&gt;</i>\
1690            vwx<i>&lt;INVALID: ../../../not allowed dir/my note.md&gt;</i>"
1691            .to_string();
1692
1693        let root_path = Path::new("/my/");
1694        let docdir = Path::new("/my/abs/note path/");
1695        let output = rewrite_links(
1696            input,
1697            root_path,
1698            docdir,
1699            LocalLinkKind::Short,
1700            false,
1701            allowed_urls.clone(),
1702        );
1703        let url = allowed_urls.read_recursive();
1704
1705        assert!(url.contains(&PathBuf::from("/abs/note path/t m p.jpg")));
1706        assert!(url.contains(&PathBuf::from("/abs/note path/dir/my note.md")));
1707        assert!(url.contains(&PathBuf::from("/abs/note path/down/my note 1.md")));
1708        assert_eq!(output, expected);
1709    }
1710
1711    #[test]
1712    fn test_rewrite_links2() {
1713        use crate::config::LocalLinkKind;
1714
1715        let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
1716        let input = "abd<a href=\"tpnote:dir/my note.md\">\
1717            <img src=\"/imagedir/favicon-32x32.png\" alt=\"logo\"></a>abd"
1718            .to_string();
1719        let expected = "abd<a href=\"/abs/note path/dir/my note.md\">\
1720            <img src=\"/imagedir/favicon-32x32.png\" alt=\"logo\"></a>abd";
1721        let root_path = Path::new("/my/");
1722        let docdir = Path::new("/my/abs/note path/");
1723        let output = rewrite_links(
1724            input,
1725            root_path,
1726            docdir,
1727            LocalLinkKind::Short,
1728            false,
1729            allowed_urls.clone(),
1730        );
1731        let url = allowed_urls.read_recursive();
1732        println!("{:?}", allowed_urls.read_recursive());
1733        assert!(url.contains(&PathBuf::from("/abs/note path/dir/my note.md")));
1734        assert_eq!(output, expected);
1735    }
1736
1737    #[test]
1738    fn test_rewrite_links3() {
1739        use crate::config::LocalLinkKind;
1740
1741        let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
1742        let input = "abd<a href=\"#1\"></a>abd".to_string();
1743        let expected = "abd<a href=\"/abs/note path/#1\"></a>abd";
1744        let root_path = Path::new("/my/");
1745        let docdir = Path::new("/my/abs/note path/");
1746        let output = rewrite_links(
1747            input,
1748            root_path,
1749            docdir,
1750            LocalLinkKind::Short,
1751            false,
1752            allowed_urls.clone(),
1753        );
1754        let url = allowed_urls.read_recursive();
1755        println!("{:?}", allowed_urls.read_recursive());
1756        assert!(url.contains(&PathBuf::from("/abs/note path/")));
1757        assert_eq!(output, expected);
1758    }
1759
1760    #[test]
1761    fn test_is_empty_html() {
1762        // Bring new methods into scope.
1763        use crate::html::HtmlStr;
1764
1765        // Test where input is '<!DOCTYPE html>'
1766        // See: [HTML doctype declaration](https://www.w3schools.com/tags/tag_doctype.ASP)
1767        assert!(String::from("<!DOCTYPE html>").is_empty_html());
1768
1769        // This should fail:
1770        assert!(!String::from("<!DOCTYPE html>>").is_empty_html());
1771
1772        // Test where input is '<!DOCTYPE html>'
1773        // See: [HTML doctype declaration](https://www.w3schools.com/tags/tag_doctype.ASP)
1774        assert!(
1775            String::from(
1776                " <!DOCTYPE HTML PUBLIC \
1777            \"-//W3C//DTD HTML 4.01 Transitional//EN\" \
1778            \"http://www.w3.org/TR/html4/loose.dtd\">"
1779            )
1780            .is_empty_html()
1781        );
1782
1783        // Test where input is '<!DOCTYPE html>'
1784        // See: [HTML doctype declaration](https://www.w3schools.com/tags/tag_doctype.ASP)
1785        assert!(
1786            String::from(
1787                " <!DOCTYPE html PUBLIC \
1788            \"-//W3C//DTD XHTML 1.1//EN\" \
1789            \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
1790            )
1791            .is_empty_html()
1792        );
1793
1794        // Test where input is '<!DOCTYPE html>Some content'
1795        assert!(!String::from("<!DOCTYPE html>Some content").is_empty_html());
1796
1797        // Test where input is an empty string
1798        assert!(String::from("").is_empty_html());
1799
1800        // Test where input is not empty HTML.
1801        // Convention: we consider empty only `` or `<!DOCTYPE html>`.
1802        assert!(!String::from("<html></html>").is_empty_html());
1803
1804        // Test where input is not empty HTML with doctype
1805        // Convention: we consider empty only `` or `<!DOCTYPE html>`.
1806        assert!(!String::from("<!DOCTYPE html><html></html>").is_empty_html());
1807    }
1808
1809    #[test]
1810    fn test_has_html_start_tag() {
1811        // Bring new methods into scope.
1812        use crate::html::HtmlStr;
1813
1814        // Test where input is '<!DOCTYPE html>Some content'
1815        assert!(String::from("<!DOCTYPE html>Some content").has_html_start_tag());
1816
1817        // This fails because we require be convention `<!DOCTYPE html>` as
1818        // first tag
1819        assert!(!String::from("<html>Some content</html>").has_html_start_tag());
1820
1821        // This fails because we require be convention `<!DOCTYPE html>` as
1822        // first tag
1823        assert!(!String::from("<HTML>").has_html_start_tag());
1824
1825        // Test where input starts with spaces
1826        assert!(String::from("  <!doctype html>Some content").has_html_start_tag());
1827
1828        // Test where input is a non-HTML doctype
1829        assert!(!String::from("<!DOCTYPE other>").has_html_start_tag());
1830
1831        // Test where input is an empty string
1832        assert!(!String::from("").has_html_start_tag());
1833    }
1834
1835    #[test]
1836    fn test_is_html_unchecked() {
1837        // Bring new methods into scope.
1838        use crate::html::HtmlStr;
1839
1840        // Test with `<!DOCTYPE html>` tag
1841        let html = "<!doctype html>";
1842        assert!(html.is_html_unchecked());
1843
1844        // Test with `<!DOCTYPE html>` tag
1845        let html = "<!doctype html abc>def";
1846        assert!(html.is_html_unchecked());
1847
1848        // Test with `<!DOCTYPE html>` tag
1849        let html = "<!doctype html";
1850        assert!(!html.is_html_unchecked());
1851
1852        // Test with `<html>` tag
1853        let html = "<html><body></body></html>";
1854        assert!(html.is_html_unchecked());
1855
1856        // Test with `<html>` tag
1857        let html = "<html abc>def";
1858        assert!(html.is_html_unchecked());
1859
1860        // Test with `<html>` tag
1861        let html = "<html abc def";
1862        assert!(!html.is_html_unchecked());
1863
1864        // Test with leading whitespace
1865        let html = "   <!doctype html><html><body></body></html>";
1866        assert!(html.is_html_unchecked());
1867
1868        // Test with non-html content
1869        let html = "<!DOCTYPE xml><root></root>";
1870        assert!(!html.is_html_unchecked());
1871
1872        // Test with partial `<!DOCTYPE>` tag
1873        let html = "<!doctype>";
1874        assert!(!html.is_html_unchecked());
1875    }
1876
1877    #[test]
1878    fn test_prepend_html_start_tag() {
1879        // Bring new methods into scope.
1880        use crate::html::HtmlString;
1881
1882        // Test where input already has doctype HTML
1883        assert_eq!(
1884            String::from("<!DOCTYPE html>Some content").prepend_html_start_tag(),
1885            Ok(String::from("<!DOCTYPE html>Some content"))
1886        );
1887
1888        // Test where input already has doctype HTML
1889        assert_eq!(
1890            String::from("<!DOCTYPE html>").prepend_html_start_tag(),
1891            Ok(String::from("<!DOCTYPE html>"))
1892        );
1893
1894        // Test where input has no HTML tag
1895        assert_eq!(
1896            String::from("<html>Some content").prepend_html_start_tag(),
1897            Ok(String::from("<!DOCTYPE html><html>Some content"))
1898        );
1899
1900        // Test where input has a non-HTML doctype
1901        assert_eq!(
1902            String::from("<!DOCTYPE other>").prepend_html_start_tag(),
1903            Err(InputStreamError::NonHtmlDoctype {
1904                html: "<!DOCTYPE other>".to_string()
1905            })
1906        );
1907
1908        // Test where input has no HTML tag
1909        assert_eq!(
1910            String::from("Some content").prepend_html_start_tag(),
1911            Ok(String::from("<!DOCTYPE html>Some content"))
1912        );
1913
1914        // Test where input is an empty string
1915        assert_eq!(
1916            String::from("").prepend_html_start_tag(),
1917            Ok(String::from("<!DOCTYPE html>"))
1918        );
1919    }
1920}