Skip to main content

tpnote_lib/
html.rs

1//! Helper functions dealing with HTML conversion.
2use crate::clone_ext::CloneExt;
3use crate::error::InputStreamError;
4use crate::filename::{NotePath, NotePathStr};
5use crate::{config::LocalLinkKind, error::NoteError};
6use html_escape;
7use parking_lot::RwLock;
8use parse_hyperlinks::parser::Link;
9use parse_hyperlinks_extras::iterator_html::HtmlLinkInlineImage;
10use percent_encoding::percent_decode_str;
11use std::path::MAIN_SEPARATOR_STR;
12use std::{
13    borrow::Cow,
14    collections::HashSet,
15    path::{Component, Path, PathBuf},
16    sync::Arc,
17};
18
19pub(crate) const HTML_EXT: &str = ".html";
20
21/// A local path can carry a format string at the end. This is the separator
22/// character.
23const FORMAT_SEPARATOR: char = '?';
24
25/// If followed directly after FORMAT_SEPARATOR, it selects the sort-tag
26/// for further matching.
27const FORMAT_ONLY_SORT_TAG: char = '#';
28
29/// If followed directly after FORMAT_SEPARATOR, it selects the whole filename
30/// for further matching.
31const FORMAT_COMPLETE_FILENAME: &str = "?";
32
33/// A format string can be separated in a _from_ and _to_ part. This
34/// optional separator is placed after `FORMAT_SEPARATOR` and separates
35/// the _from_ and _to_ pattern.
36const FORMAT_FROM_TO_SEPARATOR: char = ':';
37
38/// If `rewrite_rel_path` and `dest` is relative, concatenate `docdir` and
39/// `dest`, then strip `root_path` from the left before returning.
40/// If not `rewrite_rel_path` and `dest` is relative, return `dest`.
41/// If `rewrite_abs_path` and `dest` is absolute, concatenate and return
42/// `root_path` and `dest`.
43/// If not `rewrite_abs_path` and `dest` is absolute, return `dest`.
44/// The `dest` portion of the output is always canonicalized.
45/// Return the assembled path, when in `root_path`, or `None` otherwise.
46/// Asserts in debug mode, that `doc_dir` is in `root_path`.
47fn assemble_link(
48    root_path: &Path,
49    docdir: &Path,
50    dest: &Path,
51    rewrite_rel_paths: bool,
52    rewrite_abs_paths: bool,
53) -> Option<PathBuf> {
54    ///
55    /// Concatenate `path` and `append`.
56    /// The `append` portion of the output is if possible canonicalized.
57    /// In case of underflow of an absolute link, the returned path is empty.
58    fn append(path: &mut PathBuf, append: &Path) {
59        // Append `dest` to `link` and canonicalize.
60        for dir in append.components() {
61            match dir {
62                Component::ParentDir => {
63                    if !path.pop() {
64                        let path_is_relative = {
65                            let mut c = path.components();
66                            !(c.next() == Some(Component::RootDir)
67                                || c.next() == Some(Component::RootDir))
68                        };
69                        if path_is_relative {
70                            path.push(Component::ParentDir.as_os_str());
71                        } else {
72                            path.clear();
73                            break;
74                        }
75                    }
76                }
77                Component::Normal(c) => path.push(c),
78                _ => {}
79            }
80        }
81    }
82
83    // Under Windows `.is_relative()` does not detect `Component::RootDir`
84    let dest_is_relative = {
85        let mut c = dest.components();
86        !(c.next() == Some(Component::RootDir) || c.next() == Some(Component::RootDir))
87    };
88
89    // Check if the link points into `root_path`, reject otherwise
90    // (strip_prefix will not work).
91    debug_assert!(docdir.starts_with(root_path));
92
93    // Calculate the output.
94    let mut link = match (rewrite_rel_paths, rewrite_abs_paths, dest_is_relative) {
95        // *** Relative links.
96        // Result: "/" + docdir.strip(root_path) + dest
97        (true, false, true) => {
98            let link = PathBuf::from(Component::RootDir.as_os_str());
99            link.join(docdir.strip_prefix(root_path).ok()?)
100        }
101        // Result: docdir + dest
102        (true, true, true) => docdir.to_path_buf(),
103        // Result: dest
104        (false, _, true) => PathBuf::new(),
105        // *** Absolute links.
106        // Result: "/" + dest
107        (_, false, false) => PathBuf::from(Component::RootDir.as_os_str()),
108        // Result: "/" + root_path
109        (_, true, false) => root_path.to_path_buf(),
110    };
111    append(&mut link, dest);
112
113    if link.as_os_str().is_empty() {
114        None
115    } else {
116        Some(link)
117    }
118}
119
120trait Hyperlink {
121    /// A helper function, that first HTML escape decodes all strings of the
122    /// link. Then it percent decodes the link destination (and the
123    /// link text in case of an autolink).
124    fn decode_ampersand_and_percent(&mut self);
125
126    /// True if the value is a local link.
127    #[allow(clippy::ptr_arg)]
128    fn is_local_fn(value: &Cow<str>) -> bool;
129
130    /// * `Link::Text2Dest`: strips a possible scheme in local `dest`.
131    /// * `Link::Image2Dest`: strip local scheme in `dest`.
132    /// * `Link::Image`: strip local scheme in `src`.
133    ///
134    ///  No action if not local.
135    fn strip_local_scheme(&mut self);
136
137    /// Helper function that strips a possible scheme in `input`.
138    fn strip_scheme_fn(input: &mut Cow<str>);
139
140    /// True if the link is:
141    /// * `Link::Text2Dest` and the link text equals the link destination, or
142    /// * `Link::Image` and the links `alt` equals the link source.
143    ///
144    /// WARNING: place this test after `decode_html_escape_and_percent()`
145    /// and before: `rebase_local_link`, `expand_shorthand_link`,
146    /// `rewrite_autolink` and `apply_format_attribute`.
147    fn is_autolink(&self) -> bool;
148
149    /// A method that converts the relative URLs (local links) in `self`.
150    /// If successful, it returns `Ok(Some(URL))`, otherwise
151    /// `Err(NoteError::InvalidLocalLink)`.
152    /// If `self` contains an absolute URL, no conversion is performed and the
153    /// return value is `Ok(())`.
154    ///
155    /// Conversion details:
156    /// The base path for this conversion (usually where the HTML file resides),
157    /// is `docdir`. If not `rewrite_rel_links`, relative local links are not
158    /// converted. Furthermore, all local links starting with `/` are prepended
159    /// with `root_path`. All absolute URLs always remain untouched.
160    ///
161    /// Algorithm:
162    /// 1. If `rewrite_abs_links==true` and `link` starts with `/`, concatenate
163    ///    and return `root_path` and `dest`.
164    /// 2. If `rewrite_abs_links==false` and `dest` does not start wit `/`,
165    ///    return `dest`.
166    /// 3. If `rewrite_ext==true` and the link points to a known Tp-Note file
167    ///    extension, then `.html` is appended to the converted link.
168    ///
169    /// Remark: The _anchor's text property_ is never changed. However, there
170    /// is one exception: when the text contains a URL starting with `http:` or
171    /// `https:`, only the file stem is kept. Example, the anchor text property:
172    /// `<a ...>http:dir/my file.md</a>` is rewritten into `<a ...>my file</a>`.
173    ///
174    /// Contracts:
175    /// 1. `link` may have a scheme.
176    /// 2. `link` is `Link::Text2Dest` or `Link::Image`
177    /// 3. `root_path` and `docdir` are absolute paths to directories.
178    /// 4. `root_path` is never empty `""`. It can be `"/"`.
179    fn rebase_local_link(
180        &mut self,
181        root_path: &Path,
182        docdir: &Path,
183        rewrite_rel_paths: bool,
184        rewrite_abs_paths: bool,
185    ) -> Result<(), NoteError>;
186
187    /// If `dest` in `Link::Text2Dest` contains only a sort
188    /// tag as filename, expand the latter to a full filename.
189    /// Otherwise, no action.
190    /// This method accesses the filesystem. Therefore sometimes `prepend_path`
191    /// is needed as parameter and prepended.
192    fn expand_shorthand_link(&mut self, prepend_path: Option<&Path>) -> Result<(), NoteError>;
193
194    /// This removes a possible scheme in `text`.
195    /// Call this method only when you sure that this
196    /// is an autolink by testing with `is_autolink()`.
197    fn rewrite_autolink(&mut self);
198
199    /// A formatting attribute is a format string starting with `?` followed
200    /// by one or two patterns. It is appended to `dest` or `src`.
201    /// Processing details:
202    /// 1. Extract some a possible formatting attribute string in `dest`
203    ///    (`Link::Text2Dest`) or `src` (`Link::Image`) after `?`.
204    /// 2. Extract the _path_ before `?` in `dest` or `src`.
205    /// 3. Apply the formatting to _path_.
206    /// 4. Store the result by overwriting `text` or `alt`.
207    fn apply_format_attribute(&mut self);
208
209    /// If the link destination `dest` is a local path, return it.
210    /// Otherwise return `None`.
211    /// Acts on `Link:Text2Dest` and `Link::Imgage2Dest` only.
212    fn get_local_link_dest_path(&self) -> Option<&Path>;
213
214    /// If `dest` or `src` is a local path, return it.
215    /// Otherwise return `None`.
216    /// Acts an `Link:Image` and `Link::Image2Dest` only.
217    fn get_local_link_src_path(&self) -> Option<&Path>;
218
219    /// If the extension of a local path in `dest` is some Tp-Note
220    /// extension, append `.html` to the path. Otherwise silently return.
221    /// Acts on `Link:Text2Dest` only.
222    fn append_html_ext(&mut self);
223
224    /// Renders `Link::Text2Dest`, `Link::Image2Dest` and `Link::Image`
225    /// to HTML. Some characters in `dest` or `src` might be HTML
226    /// escape encoded. This does not percent encode at all, because
227    /// we know, that the result will be inserted later in a UTF-8 template.
228    fn to_html(&self) -> String;
229}
230
231impl Hyperlink for Link<'_> {
232    #[inline]
233    fn decode_ampersand_and_percent(&mut self) {
234        // HTML escape decode value.
235        fn dec_amp(val: &mut Cow<str>) {
236            let decoded_text = html_escape::decode_html_entities(val);
237            if matches!(&decoded_text, Cow::Owned(..)) {
238                // Does nothing, but satisfying the borrow checker. Does not `clone()`.
239                let decoded_text = Cow::Owned(decoded_text.into_owned());
240                // Store result.
241                let _ = std::mem::replace(val, decoded_text);
242            }
243        }
244
245        // HTML escape decode and percent decode value.
246        fn dec_amp_percent(val: &mut Cow<str>) {
247            dec_amp(val);
248            let decoded_dest = percent_decode_str(val.as_ref()).decode_utf8().unwrap();
249            if matches!(&decoded_dest, Cow::Owned(..)) {
250                // Does nothing, but satisfying the borrow checker. Does not `clone()`.
251                let decoded_dest = Cow::Owned(decoded_dest.into_owned());
252                // Store result.
253                let _ = std::mem::replace(val, decoded_dest);
254            }
255        }
256
257        match self {
258            Link::Text2Dest(text1, dest, title) => {
259                dec_amp(text1);
260                dec_amp_percent(dest);
261                dec_amp(title);
262            }
263            Link::Image(alt, src) => {
264                dec_amp(alt);
265                dec_amp_percent(src);
266            }
267            Link::Image2Dest(text1, alt, src, text2, dest, title) => {
268                dec_amp(text1);
269                dec_amp(alt);
270                dec_amp_percent(src);
271                dec_amp(text2);
272                dec_amp_percent(dest);
273                dec_amp(title);
274            }
275            _ => unimplemented!(),
276        };
277    }
278
279    //
280    fn is_local_fn(dest: &Cow<str>) -> bool {
281        !((dest.contains("://") && !dest.contains(":///"))
282            || dest.starts_with("mailto:")
283            || dest.starts_with("tel:"))
284    }
285
286    //
287    fn strip_local_scheme(&mut self) {
288        fn strip(dest: &mut Cow<str>) {
289            if <Link<'_> as Hyperlink>::is_local_fn(dest) {
290                <Link<'_> as Hyperlink>::strip_scheme_fn(dest);
291            }
292        }
293
294        match self {
295            Link::Text2Dest(_, dest, _title) => strip(dest),
296            Link::Image2Dest(_, _, src, _, dest, _) => {
297                strip(src);
298                strip(dest);
299            }
300            Link::Image(_, src) => strip(src),
301            _ => {}
302        };
303    }
304
305    //
306    fn strip_scheme_fn(inout: &mut Cow<str>) {
307        let output = inout
308            .trim_start_matches("https://")
309            .trim_start_matches("https:")
310            .trim_start_matches("http://")
311            .trim_start_matches("http:")
312            .trim_start_matches("tpnote:")
313            .trim_start_matches("mailto:")
314            .trim_start_matches("tel:");
315        if output != inout.as_ref() {
316            let _ = std::mem::replace(inout, Cow::Owned(output.to_string()));
317        }
318    }
319
320    //
321    fn is_autolink(&self) -> bool {
322        let (text, dest) = match self {
323            Link::Text2Dest(text, dest, _title) => (text, dest),
324            Link::Image(alt, source) => (alt, source),
325            // `Link::Image2Dest` is never an autolink.
326            _ => return false,
327        };
328        text == dest
329    }
330
331    //
332    fn rebase_local_link(
333        &mut self,
334        root_path: &Path,
335        docdir: &Path,
336        rewrite_rel_paths: bool,
337        rewrite_abs_paths: bool,
338    ) -> Result<(), NoteError> {
339        let do_rebase = |path: &mut Cow<str>| -> Result<(), NoteError> {
340            if <Link as Hyperlink>::is_local_fn(path) {
341                let dest_out = assemble_link(
342                    root_path,
343                    docdir,
344                    Path::new(path.as_ref()),
345                    rewrite_rel_paths,
346                    rewrite_abs_paths,
347                )
348                .ok_or(NoteError::InvalidLocalPath {
349                    path: path.as_ref().to_string(),
350                })?;
351
352                // Store result.
353                let new_dest = Cow::Owned(dest_out.to_str().unwrap_or_default().to_string());
354                let _ = std::mem::replace(path, new_dest);
355            }
356            Ok(())
357        };
358
359        match self {
360            Link::Text2Dest(_, dest, _) => do_rebase(dest),
361            Link::Image2Dest(_, _, src, _, dest, _) => do_rebase(src).and_then(|_| do_rebase(dest)),
362            Link::Image(_, src) => do_rebase(src),
363            _ => unimplemented!(),
364        }
365    }
366
367    //
368    fn expand_shorthand_link(&mut self, prepend_path: Option<&Path>) -> Result<(), NoteError> {
369        let shorthand_link = match self {
370            Link::Text2Dest(_, dest, _) => dest,
371            Link::Image2Dest(_, _, _, _, dest, _) => dest,
372            _ => return Ok(()),
373        };
374
375        if !<Link as Hyperlink>::is_local_fn(shorthand_link) {
376            return Ok(());
377        }
378
379        let (shorthand_str, shorthand_format) = match shorthand_link.split_once(FORMAT_SEPARATOR) {
380            Some((path, fmt)) => (path, Some(fmt)),
381            None => (shorthand_link.as_ref(), None),
382        };
383
384        let shorthand_path = Path::new(shorthand_str);
385
386        if let Some(sort_tag) = shorthand_str.is_valid_sort_tag() {
387            let full_shorthand_path = if let Some(root_path) = prepend_path {
388                // Concatenate `root_path` and `shorthand_path`.
389                let shorthand_path = shorthand_path
390                    .strip_prefix(MAIN_SEPARATOR_STR)
391                    .unwrap_or(shorthand_path);
392                Cow::Owned(root_path.join(shorthand_path))
393            } else {
394                Cow::Borrowed(shorthand_path)
395            };
396
397            // Search for the file.
398            let found = full_shorthand_path
399                .parent()
400                .and_then(|dir| dir.find_file_with_sort_tag(sort_tag));
401
402            if let Some(path) = found {
403                // We prepended `root_path` before, we can safely strip it
404                // and unwrap.
405                let found_link = path
406                    .strip_prefix(prepend_path.unwrap_or(Path::new("")))
407                    .unwrap();
408                // Prepend `/`.
409                let mut found_link = Path::new(MAIN_SEPARATOR_STR)
410                    .join(found_link)
411                    .to_str()
412                    .unwrap_or_default()
413                    .to_string();
414
415                if let Some(fmt) = shorthand_format {
416                    found_link.push(FORMAT_SEPARATOR);
417                    found_link.push_str(fmt);
418                }
419
420                // Store result.
421                let _ = std::mem::replace(shorthand_link, Cow::Owned(found_link));
422            } else {
423                return Err(NoteError::CanNotExpandShorthandLink {
424                    path: full_shorthand_path.to_string_lossy().into_owned(),
425                });
426            }
427        }
428        Ok(())
429    }
430
431    //
432    fn rewrite_autolink(&mut self) {
433        let text = match self {
434            Link::Text2Dest(text, _, _) => text,
435            Link::Image(alt, _) => alt,
436            _ => return,
437        };
438
439        <Link as Hyperlink>::strip_scheme_fn(text);
440    }
441
442    //
443    fn apply_format_attribute(&mut self) {
444        // Is this an absolute URL?
445
446        let (text, dest) = match self {
447            Link::Text2Dest(text, dest, _) => (text, dest),
448            Link::Image(alt, source) => (alt, source),
449            _ => return,
450        };
451
452        if !<Link as Hyperlink>::is_local_fn(dest) {
453            return;
454        }
455
456        // We assume, that `dest` had been expanded already, so we can extract
457        // the full filename here.
458        // If ever it ends with a format string we apply it. Otherwise we quit
459        // the method and do nothing.
460        let (path, format) = match dest.split_once(FORMAT_SEPARATOR) {
461            Some(s) => s,
462            None => return,
463        };
464
465        let mut short_text = Path::new(path)
466            .file_name()
467            .unwrap_or_default()
468            .to_str()
469            .unwrap_or_default();
470
471        // Select what to match:
472        let format = if format.starts_with(FORMAT_COMPLETE_FILENAME) {
473            // Keep complete filename.
474            format
475                .strip_prefix(FORMAT_COMPLETE_FILENAME)
476                .unwrap_or(format)
477        } else if format.starts_with(FORMAT_ONLY_SORT_TAG) {
478            // Keep only format-tag.
479            short_text = Path::new(path).disassemble().0;
480            format.strip_prefix(FORMAT_ONLY_SORT_TAG).unwrap_or(format)
481        } else {
482            // Keep only stem.
483            short_text = Path::new(path).disassemble().2;
484            format
485        };
486
487        match format.split_once(FORMAT_FROM_TO_SEPARATOR) {
488            // No `:`
489            None => {
490                if !format.is_empty()
491                    && let Some(idx) = short_text.find(format) {
492                        short_text = &short_text[..idx];
493                    };
494            }
495            // Some `:`
496            Some((from, to)) => {
497                if !from.is_empty()
498                    && let Some(idx) = short_text.find(from) {
499                        short_text = &short_text[(idx + from.len())..];
500                    };
501                if !to.is_empty()
502                    && let Some(idx) = short_text.find(to) {
503                        short_text = &short_text[..idx];
504                    };
505            }
506        }
507        // Store the result.
508        let _ = std::mem::replace(text, Cow::Owned(short_text.to_string()));
509        let _ = std::mem::replace(dest, Cow::Owned(path.to_string()));
510    }
511
512    //
513    fn get_local_link_dest_path(&self) -> Option<&Path> {
514        let dest = match self {
515            Link::Text2Dest(_, dest, _) => dest,
516            Link::Image2Dest(_, _, _, _, dest, _) => dest,
517            _ => return None,
518        };
519        if <Link as Hyperlink>::is_local_fn(dest) {
520            // Strip URL fragment.
521            match (dest.rfind('#'), dest.rfind(['/', '\\'])) {
522                (Some(n), sep) if sep.is_some_and(|sep| n > sep) || sep.is_none() => {
523                    Some(Path::new(&dest.as_ref()[..n]))
524                }
525                _ => Some(Path::new(dest.as_ref())),
526            }
527        } else {
528            None
529        }
530    }
531
532    //
533    fn get_local_link_src_path(&self) -> Option<&Path> {
534        let src = match self {
535            Link::Image2Dest(_, _, src, _, _, _) => src,
536            Link::Image(_, src) => src,
537            _ => return None,
538        };
539        if <Link as Hyperlink>::is_local_fn(src) {
540            Some(Path::new(src.as_ref()))
541        } else {
542            None
543        }
544    }
545
546    //
547    fn append_html_ext(&mut self) {
548        let dest = match self {
549            Link::Text2Dest(_, dest, _) => dest,
550            Link::Image2Dest(_, _, _, _, dest, _) => dest,
551            _ => return,
552        };
553        if <Link as Hyperlink>::is_local_fn(dest) {
554            let path = dest.as_ref();
555            if path.has_tpnote_ext() {
556                let mut newpath = path.to_string();
557                newpath.push_str(HTML_EXT);
558
559                let _ = std::mem::replace(dest, Cow::Owned(newpath));
560            }
561        }
562    }
563
564    //
565    fn to_html(&self) -> String {
566        // HTML escape encode double quoted attributes
567        fn enc_amp(val: Cow<str>) -> Cow<str> {
568            let s = html_escape::encode_double_quoted_attribute(val.as_ref());
569            if s == val {
570                val
571            } else {
572                // No cloning happens here, because we own `s` already.
573                Cow::Owned(s.into_owned())
574            }
575        }
576        // Replace Windows backslash, then HTML escape encode.
577        fn repl_backspace_enc_amp(val: Cow<str>) -> Cow<str> {
578            let val = if val.as_ref().contains('\\') {
579                Cow::Owned(val.to_string().replace('\\', "/"))
580            } else {
581                val
582            };
583            let s = html_escape::encode_double_quoted_attribute(val.as_ref());
584            if s == val {
585                val
586            } else {
587                // No cloning happens here, because we own `s` already.
588                Cow::Owned(s.into_owned())
589            }
590        }
591
592        match self {
593            Link::Text2Dest(text, dest, title) => {
594                // Format title.
595                let title_html = if !title.is_empty() {
596                    format!(" title=\"{}\"", enc_amp(title.shallow_clone()))
597                } else {
598                    "".to_string()
599                };
600
601                format!(
602                    "<a href=\"{}\"{}>{}</a>",
603                    repl_backspace_enc_amp(dest.shallow_clone()),
604                    title_html,
605                    text
606                )
607            }
608            Link::Image2Dest(text1, alt, src, text2, dest, title) => {
609                // Format title.
610                let title_html = if !title.is_empty() {
611                    format!(" title=\"{}\"", enc_amp(title.shallow_clone()))
612                } else {
613                    "".to_string()
614                };
615
616                format!(
617                    "<a href=\"{}\"{}>{}<img src=\"{}\" alt=\"{}\">{}</a>",
618                    repl_backspace_enc_amp(dest.shallow_clone()),
619                    title_html,
620                    text1,
621                    repl_backspace_enc_amp(src.shallow_clone()),
622                    enc_amp(alt.shallow_clone()),
623                    text2
624                )
625            }
626            Link::Image(alt, src) => {
627                format!(
628                    "<img src=\"{}\" alt=\"{}\">",
629                    repl_backspace_enc_amp(src.shallow_clone()),
630                    enc_amp(alt.shallow_clone())
631                )
632            }
633            _ => unimplemented!(),
634        }
635    }
636}
637
638#[inline]
639/// A helper function that scans the input HTML document in `html_input` for
640/// HTML hyperlinks. When it finds a relative URL (local link), it analyzes it's
641/// path. Depending on the `local_link_kind` configuration, relative local
642/// links are converted into absolute local links and eventually rebased.
643///
644/// In order to achieve this, the user must respect the following convention
645/// concerning absolute local links in Tp-Note documents:
646/// 1. When a document contains a local link with an absolute path (absolute
647///    local link), the base of this path is considered to be the directory
648///    where the marker file ‘.tpnote.toml’ resides (or ‘/’ in non exists). The
649///    marker file directory is `root_path`.
650/// 2. Furthermore, the parameter `docdir` contains the absolute path of the
651///    directory of the currently processed HTML document. The user guarantees
652///    that `docdir` is the base for all relative local links in the document.
653///    Note: `docdir` must always start with `root_path`.
654///
655/// If `LocalLinkKind::Off`, relative local links are not converted.
656/// If `LocalLinkKind::Short`, relative local links are converted into an
657/// absolute local links with `root_path` as base directory.
658/// If `LocalLinkKind::Long`, in addition to the above, the resulting absolute
659/// local link is prepended with `root_path`.
660///
661/// If `rewrite_ext` is true and a local link points to a known
662/// Tp-Note file extension, then `.html` is appended to the converted link.
663///
664/// Remark: The link's text property is never changed. However, there is
665/// one exception: when the link's text contains a string similar to URLs,
666/// starting with `http:` or `tpnote:`. In this case, the string is interpreted
667/// as URL and only the stem of the filename is displayed, e.g.
668/// `<a ...>http:dir/my file.md</a>` is replaced with `<a ...>my file</a>`.
669///
670/// Finally, before a converted local link is reinserted in the output HTML, a
671/// copy of that link is kept in `allowed_local_links` for further bookkeeping.
672///
673/// NB: All absolute URLs (starting with a domain) always remain untouched.
674///
675/// NB2: It is guaranteed, that the resulting HTML document contains only local
676/// links to other documents within `root_path`. Deviant links displayed as
677/// `INVALID LOCAL LINK` and URL is discarded.
678pub fn rewrite_links(
679    html_input: String,
680    root_path: &Path,
681    docdir: &Path,
682    local_link_kind: LocalLinkKind,
683    rewrite_ext: bool,
684    allowed_local_links: Arc<RwLock<HashSet<PathBuf>>>,
685) -> String {
686    let (rewrite_rel_paths, rewrite_abs_paths) = match local_link_kind {
687        LocalLinkKind::Off => (false, false),
688        LocalLinkKind::Short => (true, false),
689        LocalLinkKind::Long => (true, true),
690    };
691
692    // Search for hyperlinks and inline images in the HTML rendition
693    // of this note.
694    let mut rest = &*html_input;
695    let mut html_out = String::new();
696    for ((skipped, _consumed, remaining), mut link) in HtmlLinkInlineImage::new(&html_input) {
697        html_out.push_str(skipped);
698        rest = remaining;
699
700        // Check if `text` = `dest`.
701        let mut link_is_autolink = link.is_autolink();
702
703        // Percent decode link destination.
704        link.decode_ampersand_and_percent();
705
706        // Check again if `text` = `dest`.
707        link_is_autolink = link_is_autolink || link.is_autolink();
708
709        link.strip_local_scheme();
710
711        // Rewrite the local link.
712        match link
713            .rebase_local_link(root_path, docdir, rewrite_rel_paths, rewrite_abs_paths)
714            .and_then(|_| {
715                link.expand_shorthand_link(
716                    (matches!(local_link_kind, LocalLinkKind::Short)).then_some(root_path),
717                )
718            }) {
719            Ok(()) => {}
720            Err(e) => {
721                let e = e.to_string();
722                let e = html_escape::encode_text(&e);
723                html_out.push_str(&format!("<i>{}</i>", e));
724                continue;
725            }
726        };
727
728        if link_is_autolink {
729            link.rewrite_autolink();
730        }
731
732        link.apply_format_attribute();
733
734        if let Some(dest_path) = link.get_local_link_dest_path() {
735            allowed_local_links.write().insert(dest_path.to_path_buf());
736        };
737        if let Some(src_path) = link.get_local_link_src_path() {
738            allowed_local_links.write().insert(src_path.to_path_buf());
739        };
740
741        if rewrite_ext {
742            link.append_html_ext();
743        }
744        html_out.push_str(&link.to_html());
745    }
746    // Add the last `remaining`.
747    html_out.push_str(rest);
748
749    log::trace!(
750        "Viewer: referenced allowed local files: {}",
751        allowed_local_links
752            .read_recursive()
753            .iter()
754            .map(|p| {
755                let mut s = "\n    '".to_string();
756                s.push_str(&p.display().to_string());
757                s
758            })
759            .collect::<String>()
760    );
761
762    html_out
763    // The `RwLockWriteGuard` is released here.
764}
765
766/// This trait deals with tagged HTML `&str` data.
767pub trait HtmlStr {
768    /// Lowercase pattern to check if this is a Doctype tag.
769    const TAG_DOCTYPE_PAT: &'static str = "<!doctype";
770    /// Lowercase pattern to check if this Doctype is HTML.
771    const TAG_DOCTYPE_HTML_PAT: &'static str = "<!doctype html";
772    /// Doctype HTML tag. This is inserted by
773    /// `<HtmlString>.prepend_html_start_tag()`
774    const TAG_DOCTYPE_HTML: &'static str = "<!DOCTYPE html>";
775    /// Pattern to check if f this is an HTML start tag.
776    const START_TAG_HTML_PAT: &'static str = "<html";
777    /// HTML end tag.
778    const END_TAG_HTML: &'static str = "</html>";
779
780    /// We consider `self` empty, when it equals to `<!DOCTYPE html...>` or
781    /// when it is empty.
782    fn is_empty_html(&self) -> bool;
783
784    /// We consider `html` empty, when it equals to `<!DOCTYPE html...>` or
785    /// when it is empty.
786    /// This is identical to `is_empty_html()`, but does not pull in
787    /// additional trait bounds.
788    fn is_empty_html2(html: &str) -> bool {
789        html.is_empty_html()
790    }
791
792    /// True if stream starts with `<!DOCTYPE html...>`.
793    fn has_html_start_tag(&self) -> bool;
794
795    /// True if `html` starts with `<!DOCTYPE html...>`.
796    /// This is identical to `has_html_start_tag()`, but does not pull in
797    /// additional trait bounds.
798    fn has_html_start_tag2(html: &str) -> bool {
799        html.has_html_start_tag()
800    }
801
802    /// Some heuristics to guess if the input stream contains HTML.
803    /// Current implementation:
804    /// True if:
805    ///
806    /// * The stream starts with `<!DOCTYPE html ...>`, or
807    /// * the stream starts with `<html ...>`    
808    ///
809    /// This function does not check if the recognized HTML is valid.
810    fn is_html_unchecked(&self) -> bool;
811}
812
813impl HtmlStr for str {
814    fn is_empty_html(&self) -> bool {
815        if self.is_empty() {
816            return true;
817        }
818
819        let html = self
820            .trim_start()
821            .lines()
822            .next()
823            .map(|l| l.to_ascii_lowercase())
824            .unwrap_or_default();
825
826        html.as_str().starts_with(Self::TAG_DOCTYPE_HTML_PAT)
827            // The next closing bracket must be in last position.
828            && html.find('>').unwrap_or_default() == html.len()-1
829    }
830
831    fn has_html_start_tag(&self) -> bool {
832        let html = self
833            .trim_start()
834            .lines()
835            .next()
836            .map(|l| l.to_ascii_lowercase());
837        html.as_ref()
838            .is_some_and(|l| l.starts_with(Self::TAG_DOCTYPE_HTML_PAT))
839    }
840
841    fn is_html_unchecked(&self) -> bool {
842        let html = self
843            .trim_start()
844            .lines()
845            .next()
846            .map(|l| l.to_ascii_lowercase());
847        html.as_ref().is_some_and(|l| {
848            (l.starts_with(Self::TAG_DOCTYPE_HTML_PAT)
849                && l[Self::TAG_DOCTYPE_HTML_PAT.len()..].contains('>'))
850                || (l.starts_with(Self::START_TAG_HTML_PAT)
851                    && l[Self::START_TAG_HTML_PAT.len()..].contains('>'))
852        })
853    }
854}
855
856/// This trait deals with tagged HTML `String` data.
857pub trait HtmlString: Sized {
858    /// If the input does not start with `<!DOCTYPE html`
859    /// (or lowercase variants), then insert `<!DOCTYPE html>`.
860    /// Returns `InputStreamError::NonHtmlDoctype` if there is another Doctype
861    /// already.
862    fn prepend_html_start_tag(self) -> Result<Self, InputStreamError>;
863}
864
865impl HtmlString for String {
866    fn prepend_html_start_tag(self) -> Result<Self, InputStreamError> {
867        // Bring `HtmlStr` methods into scope.
868        use crate::html::HtmlStr;
869
870        let html2 = self
871            .trim_start()
872            .lines()
873            .next()
874            .map(|l| l.to_ascii_lowercase())
875            .unwrap_or_default();
876
877        if html2.starts_with(<str as HtmlStr>::TAG_DOCTYPE_HTML_PAT) {
878            // Has a start tag already.
879            Ok(self)
880        } else if !html2.starts_with(<str as HtmlStr>::TAG_DOCTYPE_PAT) {
881            // Insert HTML Doctype tag.
882            let mut html = self;
883            html.insert_str(0, <str as HtmlStr>::TAG_DOCTYPE_HTML);
884            Ok(html)
885        } else {
886            // There is a Doctype other than HTML.
887            Err(InputStreamError::NonHtmlDoctype {
888                html: self.chars().take(25).collect::<String>(),
889            })
890        }
891    }
892}
893
894#[cfg(test)]
895mod tests {
896
897    use crate::error::InputStreamError;
898    use crate::error::NoteError;
899    use crate::html::Hyperlink;
900    use crate::html::assemble_link;
901    use crate::html::rewrite_links;
902    use parking_lot::RwLock;
903    use parse_hyperlinks::parser::Link;
904    use parse_hyperlinks_extras::parser::parse_html::take_link;
905    use std::borrow::Cow;
906    use std::{
907        collections::HashSet,
908        path::{Path, PathBuf},
909        sync::Arc,
910    };
911
912    #[test]
913    fn test_assemble_link() {
914        // `rewrite_rel_links=true`
915        let output = assemble_link(
916            Path::new("/my"),
917            Path::new("/my/doc/path"),
918            Path::new("../local/link to/note.md"),
919            true,
920            false,
921        )
922        .unwrap();
923        assert_eq!(output, Path::new("/doc/local/link to/note.md"));
924
925        // `rewrite_rel_links=false`
926        let output = assemble_link(
927            Path::new("/my"),
928            Path::new("/my/doc/path"),
929            Path::new("../local/link to/note.md"),
930            false,
931            false,
932        )
933        .unwrap();
934        assert_eq!(output, Path::new("../local/link to/note.md"));
935
936        // Absolute `dest`.
937        let output = assemble_link(
938            Path::new("/my"),
939            Path::new("/my/doc/path"),
940            Path::new("/test/../abs/local/link to/note.md"),
941            false,
942            false,
943        )
944        .unwrap();
945        assert_eq!(output, Path::new("/abs/local/link to/note.md"));
946
947        // Underflow.
948        let output = assemble_link(
949            Path::new("/my"),
950            Path::new("/my/doc/path"),
951            Path::new("/../local/link to/note.md"),
952            false,
953            false,
954        );
955        assert_eq!(output, None);
956
957        // Absolute `dest`, `rewrite_abs_links=true`.
958        let output = assemble_link(
959            Path::new("/my"),
960            Path::new("/my/doc/path"),
961            Path::new("/abs/local/link to/note.md"),
962            false,
963            true,
964        )
965        .unwrap();
966        assert_eq!(output, Path::new("/my/abs/local/link to/note.md"));
967
968        // Absolute `dest`, `rewrite_abs_links=false`.
969        let output = assemble_link(
970            Path::new("/my"),
971            Path::new("/my/doc/path"),
972            Path::new("/test/../abs/local/link to/note.md"),
973            false,
974            false,
975        )
976        .unwrap();
977        assert_eq!(output, Path::new("/abs/local/link to/note.md"));
978
979        // Absolute `dest`, `rewrite` both.
980        let output = assemble_link(
981            Path::new("/my"),
982            Path::new("/my/doc/path"),
983            Path::new("abs/local/link to/note.md"),
984            true,
985            true,
986        )
987        .unwrap();
988        assert_eq!(output, Path::new("/my/doc/path/abs/local/link to/note.md"));
989    }
990
991    #[test]
992    fn test_decode_html_escape_and_percent() {
993        //
994        let mut input = Link::Text2Dest(Cow::from("text"), Cow::from("dest"), Cow::from("title"));
995        let expected = Link::Text2Dest(Cow::from("text"), Cow::from("dest"), Cow::from("title"));
996        input.decode_ampersand_and_percent();
997        let output = input;
998        assert_eq!(output, expected);
999
1000        //
1001        let mut input = Link::Text2Dest(
1002            Cow::from("te%20xt"),
1003            Cow::from("de%20st"),
1004            Cow::from("title"),
1005        );
1006        let expected =
1007            Link::Text2Dest(Cow::from("te%20xt"), Cow::from("de st"), Cow::from("title"));
1008        input.decode_ampersand_and_percent();
1009        let output = input;
1010        assert_eq!(output, expected);
1011
1012        //
1013        let mut input =
1014            Link::Text2Dest(Cow::from("text"), Cow::from("d:e%20st"), Cow::from("title"));
1015        let expected = Link::Text2Dest(Cow::from("text"), Cow::from("d:e st"), Cow::from("title"));
1016        input.decode_ampersand_and_percent();
1017        let output = input;
1018        assert_eq!(output, expected);
1019
1020        let mut input = Link::Text2Dest(
1021            Cow::from("a&amp;&quot;lt"),
1022            Cow::from("a&amp;&quot;lt"),
1023            Cow::from("a&amp;&quot;lt"),
1024        );
1025        let expected = Link::Text2Dest(
1026            Cow::from("a&\"lt"),
1027            Cow::from("a&\"lt"),
1028            Cow::from("a&\"lt"),
1029        );
1030        input.decode_ampersand_and_percent();
1031        let output = input;
1032        assert_eq!(output, expected);
1033
1034        //
1035        let mut input = Link::Image(Cow::from("al%20t"), Cow::from("de%20st"));
1036        let expected = Link::Image(Cow::from("al%20t"), Cow::from("de st"));
1037        input.decode_ampersand_and_percent();
1038        let output = input;
1039        assert_eq!(output, expected);
1040
1041        //
1042        let mut input = Link::Image(Cow::from("a\\lt"), Cow::from("d\\est"));
1043        let expected = Link::Image(Cow::from("a\\lt"), Cow::from("d\\est"));
1044        input.decode_ampersand_and_percent();
1045        let output = input;
1046        assert_eq!(output, expected);
1047
1048        //
1049        let mut input = Link::Image(Cow::from("a&amp;&quot;lt"), Cow::from("a&amp;&quot;lt"));
1050        let expected = Link::Image(Cow::from("a&\"lt"), Cow::from("a&\"lt"));
1051        input.decode_ampersand_and_percent();
1052        let output = input;
1053        assert_eq!(output, expected);
1054    }
1055
1056    #[test]
1057    fn test_is_local() {
1058        let input = Cow::from("/path/My doc.md");
1059        assert!(<Link as Hyperlink>::is_local_fn(&input));
1060
1061        let input = Cow::from("tpnote:path/My doc.md");
1062        assert!(<Link as Hyperlink>::is_local_fn(&input));
1063
1064        let input = Cow::from("tpnote:/path/My doc.md");
1065        assert!(<Link as Hyperlink>::is_local_fn(&input));
1066
1067        let input = Cow::from("https://getreu.net");
1068        assert!(!<Link as Hyperlink>::is_local_fn(&input));
1069    }
1070
1071    #[test]
1072    fn strip_local_scheme() {
1073        let mut input = Link::Text2Dest(
1074            Cow::from("xyz"),
1075            Cow::from("https://getreu.net"),
1076            Cow::from("xyz"),
1077        );
1078        let expected = input.clone();
1079        input.strip_local_scheme();
1080        assert_eq!(input, expected);
1081
1082        //
1083        let mut input = Link::Text2Dest(
1084            Cow::from("xyz"),
1085            Cow::from("tpnote:/dir/My doc.md"),
1086            Cow::from("xyz"),
1087        );
1088        let expected = Link::Text2Dest(
1089            Cow::from("xyz"),
1090            Cow::from("/dir/My doc.md"),
1091            Cow::from("xyz"),
1092        );
1093        input.strip_local_scheme();
1094        assert_eq!(input, expected);
1095    }
1096
1097    #[test]
1098    fn test_is_autolink() {
1099        let input = Link::Image(Cow::from("abc"), Cow::from("abc"));
1100        assert!(input.is_autolink());
1101
1102        //
1103        let input = Link::Text2Dest(Cow::from("abc"), Cow::from("abc"), Cow::from("xyz"));
1104        assert!(input.is_autolink());
1105
1106        //
1107        let input = Link::Image(Cow::from("abc"), Cow::from("abcd"));
1108        assert!(!input.is_autolink());
1109
1110        //
1111        let input = Link::Text2Dest(Cow::from("abc"), Cow::from("abcd"), Cow::from("xyz"));
1112        assert!(!input.is_autolink());
1113    }
1114
1115    #[test]
1116    fn test_rewrite_local_link() {
1117        let root_path = Path::new("/my/");
1118        let docdir = Path::new("/my/abs/note path/");
1119
1120        // Should panic: this is not a relative path.
1121        let mut input = take_link("<a href=\"ftp://getreu.net\">Blog</a>")
1122            .unwrap()
1123            .1
1124            .1;
1125        input
1126            .rebase_local_link(root_path, docdir, true, false)
1127            .unwrap();
1128        assert!(input.get_local_link_dest_path().is_none());
1129
1130        //
1131        let root_path = Path::new("/my/");
1132        let docdir = Path::new("/my/abs/note path/");
1133
1134        // Check relative path to image.
1135        let mut input = take_link("<img src=\"down/./down/../../t m p.jpg\" alt=\"Image\" />")
1136            .unwrap()
1137            .1
1138            .1;
1139        let expected = "<img src=\"/abs/note path/t m p.jpg\" \
1140            alt=\"Image\">";
1141        input
1142            .rebase_local_link(root_path, docdir, true, false)
1143            .unwrap();
1144        let outpath = input.get_local_link_src_path().unwrap();
1145        let output = input.to_html();
1146        assert_eq!(output, expected);
1147        assert_eq!(outpath, PathBuf::from("/abs/note path/t m p.jpg"));
1148
1149        // Check relative path to image. Canonicalized?
1150        let mut input = take_link("<img src=\"down/./../../t m p.jpg\" alt=\"Image\" />")
1151            .unwrap()
1152            .1
1153            .1;
1154        let expected = "<img src=\"/abs/t m p.jpg\" alt=\"Image\">";
1155        input
1156            .rebase_local_link(root_path, docdir, true, false)
1157            .unwrap();
1158        let outpath = input.get_local_link_src_path().unwrap();
1159        let output = input.to_html();
1160        assert_eq!(output, expected);
1161        assert_eq!(outpath, PathBuf::from("/abs/t m p.jpg"));
1162
1163        // Check relative path to note file.
1164        let mut input = take_link("<a href=\"./down/./../my note 1.md\">my note 1</a>")
1165            .unwrap()
1166            .1
1167            .1;
1168        let expected = "<a href=\"/abs/note path/my note 1.md\">my note 1</a>";
1169        input
1170            .rebase_local_link(root_path, docdir, true, false)
1171            .unwrap();
1172        let outpath = input.get_local_link_dest_path().unwrap();
1173        let output = input.to_html();
1174        assert_eq!(output, expected);
1175        assert_eq!(outpath, PathBuf::from("/abs/note path/my note 1.md"));
1176
1177        // Check absolute path to note file.
1178        let mut input = take_link("<a href=\"/dir/./down/../my note 1.md\">my note 1</a>")
1179            .unwrap()
1180            .1
1181            .1;
1182        let expected = "<a href=\"/dir/my note 1.md\">my note 1</a>";
1183        input
1184            .rebase_local_link(root_path, docdir, true, false)
1185            .unwrap();
1186        let outpath = input.get_local_link_dest_path().unwrap();
1187        let output = input.to_html();
1188        assert_eq!(output, expected);
1189        assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
1190
1191        // Check relative path to note file. Canonicalized?
1192        let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
1193            .unwrap()
1194            .1
1195            .1;
1196        let expected = "<a href=\"dir/my note 1.md\">my note 1</a>";
1197        input
1198            .rebase_local_link(root_path, docdir, false, false)
1199            .unwrap();
1200        let outpath = input.get_local_link_dest_path().unwrap();
1201        let output = input.to_html();
1202        assert_eq!(output, expected);
1203        assert_eq!(outpath, PathBuf::from("dir/my note 1.md"));
1204
1205        // Check relative link in input.
1206        let mut input = take_link("<a href=\"./down/./../dir/my note 1.md\">my note 1</a>")
1207            .unwrap()
1208            .1
1209            .1;
1210        let expected = "<a href=\"/path/dir/my note 1.md\">my note 1</a>";
1211        input
1212            .rebase_local_link(
1213                Path::new("/my/note/"),
1214                Path::new("/my/note/path/"),
1215                true,
1216                false,
1217            )
1218            .unwrap();
1219        let outpath = input.get_local_link_dest_path().unwrap();
1220        let output = input.to_html();
1221        assert_eq!(output, expected);
1222        assert_eq!(outpath, PathBuf::from("/path/dir/my note 1.md"));
1223
1224        // Check absolute link in input.
1225        let mut input = take_link("<a href=\"/down/./../dir/my note 1.md\">my note 1</a>")
1226            .unwrap()
1227            .1
1228            .1;
1229        let expected = "<a href=\"/dir/my note 1.md\">my note 1</a>";
1230        input
1231            .rebase_local_link(root_path, Path::new("/my/ignored/"), true, false)
1232            .unwrap();
1233        let outpath = input.get_local_link_dest_path().unwrap();
1234        let output = input.to_html();
1235        assert_eq!(output, expected);
1236        assert_eq!(outpath, PathBuf::from("/dir/my note 1.md"));
1237
1238        // Check absolute link in input, not in `root_path`.
1239        let mut input = take_link("<a href=\"/down/../../dir/my note 1.md\">my note 1</a>")
1240            .unwrap()
1241            .1
1242            .1;
1243        let output = input
1244            .rebase_local_link(root_path, Path::new("/my/notepath/"), true, false)
1245            .unwrap_err();
1246        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1247
1248        // Check relative link in input, not in `root_path`.
1249        let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
1250            .unwrap()
1251            .1
1252            .1;
1253        let output = input
1254            .rebase_local_link(root_path, Path::new("/my/notepath/"), true, false)
1255            .unwrap_err();
1256        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1257
1258        // Check relative link in input, with underflow.
1259        let root_path = Path::new("/");
1260        let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
1261            .unwrap()
1262            .1
1263            .1;
1264        let output = input
1265            .rebase_local_link(root_path, Path::new("/my/"), true, false)
1266            .unwrap_err();
1267        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1268
1269        // Check relative link in input, not in `root_path`.
1270        let root_path = Path::new("/my");
1271        let mut input = take_link("<a href=\"../../dir/my note 1.md\">my note 1</a>")
1272            .unwrap()
1273            .1
1274            .1;
1275        let output = input
1276            .rebase_local_link(root_path, Path::new("/my/notepath"), true, false)
1277            .unwrap_err();
1278        assert!(matches!(output, NoteError::InvalidLocalPath { .. }));
1279
1280        // Test autolink.
1281        let root_path = Path::new("/my");
1282        let mut input =
1283            take_link("<a href=\"tpnote:dir/3.0-my note.md\">tpnote:dir/3.0-my note.md</a>")
1284                .unwrap()
1285                .1
1286                .1;
1287        input.strip_local_scheme();
1288        input
1289            .rebase_local_link(root_path, Path::new("/my/path"), true, false)
1290            .unwrap();
1291        input.rewrite_autolink();
1292        input.apply_format_attribute();
1293        let outpath = input.get_local_link_dest_path().unwrap();
1294        let output = input.to_html();
1295        let expected = "<a href=\"/path/dir/3.0-my note.md\">dir/3.0-my note.md</a>";
1296        assert_eq!(output, expected);
1297        assert_eq!(outpath, PathBuf::from("/path/dir/3.0-my note.md"));
1298
1299        // Test short autolink 1 with sort-tag only.
1300        let root_path = Path::new("/my");
1301        let mut input = take_link("<a href=\"tpnote:dir/3.0\">tpnote:dir/3.0</a>")
1302            .unwrap()
1303            .1
1304            .1;
1305        input.strip_local_scheme();
1306        input
1307            .rebase_local_link(root_path, Path::new("/my/path"), true, false)
1308            .unwrap();
1309        input.rewrite_autolink();
1310        input.apply_format_attribute();
1311        let outpath = input.get_local_link_dest_path().unwrap();
1312        let output = input.to_html();
1313        let expected = "<a href=\"/path/dir/3.0\">dir/3.0</a>";
1314        assert_eq!(output, expected);
1315        assert_eq!(outpath, PathBuf::from("/path/dir/3.0"));
1316
1317        // The link text contains inline content.
1318        let root_path = Path::new("/my");
1319        let mut input = take_link(
1320            "<a href=\
1321            \"/uri\">link <em>foo <strong>bar</strong> <code>#</code></em>\
1322            </a>",
1323        )
1324        .unwrap()
1325        .1
1326        .1;
1327        input.strip_local_scheme();
1328        input
1329            .rebase_local_link(root_path, Path::new("/my/path"), true, false)
1330            .unwrap();
1331        let outpath = input.get_local_link_dest_path().unwrap();
1332        let expected = "<a href=\"/uri\">link <em>foo <strong>bar\
1333            </strong> <code>#</code></em></a>";
1334
1335        let output = input.to_html();
1336        assert_eq!(output, expected);
1337        assert_eq!(outpath, PathBuf::from("/uri"));
1338    }
1339
1340    #[test]
1341    fn test_rewrite_autolink() {
1342        //
1343        let mut input = Link::Text2Dest(
1344            Cow::from("http://getreu.net"),
1345            Cow::from("http://getreu.net"),
1346            Cow::from("title"),
1347        );
1348        let expected = Link::Text2Dest(
1349            Cow::from("getreu.net"),
1350            Cow::from("http://getreu.net"),
1351            Cow::from("title"),
1352        );
1353        input.rewrite_autolink();
1354        let output = input;
1355        assert_eq!(output, expected);
1356
1357        //
1358        let mut input = Link::Text2Dest(
1359            Cow::from("/dir/3.0"),
1360            Cow::from("/dir/3.0-My note.md"),
1361            Cow::from("title"),
1362        );
1363        let expected = Link::Text2Dest(
1364            Cow::from("/dir/3.0"),
1365            Cow::from("/dir/3.0-My note.md"),
1366            Cow::from("title"),
1367        );
1368        input.rewrite_autolink();
1369        let output = input;
1370        assert_eq!(output, expected);
1371
1372        //
1373        let mut input = Link::Text2Dest(
1374            Cow::from("tpnote:/dir/3.0"),
1375            Cow::from("/dir/3.0-My note.md"),
1376            Cow::from("title"),
1377        );
1378        let expected = Link::Text2Dest(
1379            Cow::from("/dir/3.0"),
1380            Cow::from("/dir/3.0-My note.md"),
1381            Cow::from("title"),
1382        );
1383        input.rewrite_autolink();
1384        let output = input;
1385        assert_eq!(output, expected);
1386
1387        //
1388        let mut input = Link::Text2Dest(
1389            Cow::from("tpnote:/dir/3.0"),
1390            Cow::from("/dir/3.0-My note.md?"),
1391            Cow::from("title"),
1392        );
1393        let expected = Link::Text2Dest(
1394            Cow::from("/dir/3.0"),
1395            Cow::from("/dir/3.0-My note.md?"),
1396            Cow::from("title"),
1397        );
1398        input.rewrite_autolink();
1399        let output = input;
1400        assert_eq!(output, expected);
1401
1402        //
1403        let mut input = Link::Text2Dest(
1404            Cow::from("/dir/3.0-My note.md"),
1405            Cow::from("/dir/3.0-My note.md"),
1406            Cow::from("title"),
1407        );
1408        let expected = Link::Text2Dest(
1409            Cow::from("/dir/3.0-My note.md"),
1410            Cow::from("/dir/3.0-My note.md"),
1411            Cow::from("title"),
1412        );
1413        input.rewrite_autolink();
1414        let output = input;
1415        assert_eq!(output, expected);
1416    }
1417
1418    #[test]
1419    fn test_apply_format_attribute() {
1420        //
1421        let mut input = Link::Text2Dest(
1422            Cow::from("tpnote:/dir/3.0"),
1423            Cow::from("/dir/3.0-My note.md"),
1424            Cow::from("title"),
1425        );
1426        let expected = Link::Text2Dest(
1427            Cow::from("tpnote:/dir/3.0"),
1428            Cow::from("/dir/3.0-My note.md"),
1429            Cow::from("title"),
1430        );
1431        input.apply_format_attribute();
1432        let output = input;
1433        assert_eq!(output, expected);
1434
1435        //
1436        let mut input = Link::Text2Dest(
1437            Cow::from("does not matter"),
1438            Cow::from("/dir/3.0-My note.md?"),
1439            Cow::from("title"),
1440        );
1441        let expected = Link::Text2Dest(
1442            Cow::from("My note"),
1443            Cow::from("/dir/3.0-My note.md"),
1444            Cow::from("title"),
1445        );
1446        input.apply_format_attribute();
1447        let output = input;
1448        assert_eq!(output, expected);
1449
1450        let mut input = Link::Text2Dest(
1451            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1452            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1453            Cow::from("title"),
1454        );
1455        let expected = Link::Text2Dest(
1456            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1457            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1458            Cow::from("title"),
1459        );
1460        input.apply_format_attribute();
1461        let output = input;
1462        assert_eq!(output, expected);
1463
1464        //
1465        let mut input = Link::Text2Dest(
1466            Cow::from("does not matter"),
1467            Cow::from("/dir/3.0-My note--red_blue_green.jpg?"),
1468            Cow::from("title"),
1469        );
1470        let expected = Link::Text2Dest(
1471            Cow::from("My note--red_blue_green"),
1472            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1473            Cow::from("title"),
1474        );
1475        input.apply_format_attribute();
1476        let output = input;
1477        assert_eq!(output, expected);
1478
1479        //
1480        let mut input = Link::Text2Dest(
1481            Cow::from("does not matter"),
1482            Cow::from("/dir/3.0-My note--red_blue_green.jpg?--"),
1483            Cow::from("title"),
1484        );
1485        let expected = Link::Text2Dest(
1486            Cow::from("My note"),
1487            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1488            Cow::from("title"),
1489        );
1490        input.apply_format_attribute();
1491        let output = input;
1492        assert_eq!(output, expected);
1493
1494        //
1495        let mut input = Link::Text2Dest(
1496            Cow::from("does not matter"),
1497            Cow::from("/dir/3.0-My note--red_blue_green.jpg?_"),
1498            Cow::from("title"),
1499        );
1500        let expected = Link::Text2Dest(
1501            Cow::from("My note--red"),
1502            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1503            Cow::from("title"),
1504        );
1505        input.apply_format_attribute();
1506        let output = input;
1507        assert_eq!(output, expected);
1508
1509        //
1510        let mut input = Link::Text2Dest(
1511            Cow::from("does not matter"),
1512            Cow::from("/dir/3.0-My note--red_blue_green.jpg??"),
1513            Cow::from("title"),
1514        );
1515        let expected = Link::Text2Dest(
1516            Cow::from("3.0-My note--red_blue_green.jpg"),
1517            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1518            Cow::from("title"),
1519        );
1520        input.apply_format_attribute();
1521        let output = input;
1522        assert_eq!(output, expected);
1523
1524        //
1525        let mut input = Link::Text2Dest(
1526            Cow::from("does not matter"),
1527            Cow::from("/dir/3.0-My note--red_blue_green.jpg?#."),
1528            Cow::from("title"),
1529        );
1530        let expected = Link::Text2Dest(
1531            Cow::from("3"),
1532            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1533            Cow::from("title"),
1534        );
1535        input.apply_format_attribute();
1536        let output = input;
1537        assert_eq!(output, expected);
1538
1539        //
1540        let mut input = Link::Text2Dest(
1541            Cow::from("does not matter"),
1542            Cow::from("/dir/3.0-My note--red_blue_green.jpg??.:_"),
1543            Cow::from("title"),
1544        );
1545        let expected = Link::Text2Dest(
1546            Cow::from("0-My note--red"),
1547            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1548            Cow::from("title"),
1549        );
1550        input.apply_format_attribute();
1551        let output = input;
1552        assert_eq!(output, expected);
1553
1554        //
1555        let mut input = Link::Text2Dest(
1556            Cow::from("does not matter"),
1557            Cow::from("/dir/3.0-My note--red_blue_green.jpg?_:_"),
1558            Cow::from("title"),
1559        );
1560        let expected = Link::Text2Dest(
1561            Cow::from("blue"),
1562            Cow::from("/dir/3.0-My note--red_blue_green.jpg"),
1563            Cow::from("title"),
1564        );
1565        input.apply_format_attribute();
1566        let output = input;
1567        assert_eq!(output, expected);
1568    }
1569
1570    #[test]
1571    fn get_local_link_dest_path() {
1572        //
1573        let input = Link::Text2Dest(Cow::from("xyz"), Cow::from("/dir/3.0"), Cow::from("title"));
1574        assert_eq!(
1575            input.get_local_link_dest_path(),
1576            Some(Path::new("/dir/3.0"))
1577        );
1578
1579        //
1580        let input = Link::Text2Dest(
1581            Cow::from("xyz"),
1582            Cow::from("http://getreu.net"),
1583            Cow::from("title"),
1584        );
1585        assert_eq!(input.get_local_link_dest_path(), None);
1586
1587        //
1588        let input = Link::Text2Dest(Cow::from("xyz"), Cow::from("dir/doc.md"), Cow::from("xyz"));
1589        let expected = Path::new("dir/doc.md");
1590        let res = input.get_local_link_dest_path().unwrap();
1591        assert_eq!(res, expected);
1592
1593        //
1594        let input = Link::Text2Dest(Cow::from("xyz"), Cow::from("d#ir/doc.md"), Cow::from("xyz"));
1595        let expected = Path::new("d#ir/doc.md");
1596        let res = input.get_local_link_dest_path().unwrap();
1597        assert_eq!(res, expected);
1598
1599        //
1600        let input = Link::Text2Dest(
1601            Cow::from("xyz"),
1602            Cow::from("dir/doc.md#1"),
1603            Cow::from("xyz"),
1604        );
1605        let expected = Path::new("dir/doc.md");
1606        let res = input.get_local_link_dest_path().unwrap();
1607        assert_eq!(res, expected);
1608    }
1609
1610    #[test]
1611    fn test_append_html_ext() {
1612        //
1613        let mut input = Link::Text2Dest(
1614            Cow::from("abc"),
1615            Cow::from("/dir/3.0-My note.md"),
1616            Cow::from("title"),
1617        );
1618        let expected = Link::Text2Dest(
1619            Cow::from("abc"),
1620            Cow::from("/dir/3.0-My note.md.html"),
1621            Cow::from("title"),
1622        );
1623        input.append_html_ext();
1624        let output = input;
1625        assert_eq!(output, expected);
1626    }
1627
1628    #[test]
1629    fn test_to_html() {
1630        //
1631        let input = Link::Text2Dest(
1632            Cow::from("te\\x/t"),
1633            Cow::from("de\\s/t"),
1634            Cow::from("ti\\t/le"),
1635        );
1636        let expected = "<a href=\"de/s/t\" title=\"ti\\t/le\">te\\x/t</a>";
1637        let output = input.to_html();
1638        assert_eq!(output, expected);
1639
1640        //
1641        let input = Link::Text2Dest(
1642            Cow::from("te&> xt"),
1643            Cow::from("de&> st"),
1644            Cow::from("ti&> tle"),
1645        );
1646        let expected = "<a href=\"de&amp;&gt; st\" title=\"ti&amp;&gt; tle\">te&> xt</a>";
1647        let output = input.to_html();
1648        assert_eq!(output, expected);
1649
1650        //
1651        let input = Link::Image(Cow::from("al&t"), Cow::from("sr&c"));
1652        let expected = "<img src=\"sr&amp;c\" alt=\"al&amp;t\">";
1653        let output = input.to_html();
1654        assert_eq!(output, expected);
1655
1656        //
1657        let input = Link::Text2Dest(Cow::from("te&> xt"), Cow::from("de&> st"), Cow::from(""));
1658        let expected = "<a href=\"de&amp;&gt; st\">te&> xt</a>";
1659        let output = input.to_html();
1660        assert_eq!(output, expected);
1661    }
1662
1663    #[test]
1664    fn test_rewrite_links() {
1665        use crate::config::LocalLinkKind;
1666
1667        let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
1668        let input = "abc<a href=\"ftp://getreu.net\">Blog</a>\
1669            def<a href=\"https://getreu.net\">https://getreu.net</a>\
1670            ghi<img src=\"t m p.jpg\" alt=\"test 1\" />\
1671            jkl<a href=\"down/../down/my note 1.md\">my note 1</a>\
1672            mno<a href=\"http:./down/../dir/my note.md\">http:./down/../dir/my note.md</a>\
1673            pqr<a href=\"http:/down/../dir/my note.md\">\
1674            http:/down/../dir/my note.md</a>\
1675            stu<a href=\"http:/../dir/underflow/my note.md\">\
1676            not allowed dir</a>\
1677            vwx<a href=\"http:../../../not allowed dir/my note.md\">\
1678            not allowed</a>"
1679            .to_string();
1680        let expected = "abc<a href=\"ftp://getreu.net\">Blog</a>\
1681            def<a href=\"https://getreu.net\">getreu.net</a>\
1682            ghi<img src=\"/abs/note path/t m p.jpg\" alt=\"test 1\">\
1683            jkl<a href=\"/abs/note path/down/my note 1.md\">my note 1</a>\
1684            mno<a href=\"/abs/note path/dir/my note.md\">./down/../dir/my note.md</a>\
1685            pqr<a href=\"/dir/my note.md\">/down/../dir/my note.md</a>\
1686            stu<i>&lt;INVALID: /../dir/underflow/my note.md&gt;</i>\
1687            vwx<i>&lt;INVALID: ../../../not allowed dir/my note.md&gt;</i>"
1688            .to_string();
1689
1690        let root_path = Path::new("/my/");
1691        let docdir = Path::new("/my/abs/note path/");
1692        let output = rewrite_links(
1693            input,
1694            root_path,
1695            docdir,
1696            LocalLinkKind::Short,
1697            false,
1698            allowed_urls.clone(),
1699        );
1700        let url = allowed_urls.read_recursive();
1701
1702        assert!(url.contains(&PathBuf::from("/abs/note path/t m p.jpg")));
1703        assert!(url.contains(&PathBuf::from("/abs/note path/dir/my note.md")));
1704        assert!(url.contains(&PathBuf::from("/abs/note path/down/my note 1.md")));
1705        assert_eq!(output, expected);
1706    }
1707
1708    #[test]
1709    fn test_rewrite_links2() {
1710        use crate::config::LocalLinkKind;
1711
1712        let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
1713        let input = "abd<a href=\"tpnote:dir/my note.md\">\
1714            <img src=\"/imagedir/favicon-32x32.png\" alt=\"logo\"></a>abd"
1715            .to_string();
1716        let expected = "abd<a href=\"/abs/note path/dir/my note.md\">\
1717            <img src=\"/imagedir/favicon-32x32.png\" alt=\"logo\"></a>abd";
1718        let root_path = Path::new("/my/");
1719        let docdir = Path::new("/my/abs/note path/");
1720        let output = rewrite_links(
1721            input,
1722            root_path,
1723            docdir,
1724            LocalLinkKind::Short,
1725            false,
1726            allowed_urls.clone(),
1727        );
1728        let url = allowed_urls.read_recursive();
1729        println!("{:?}", allowed_urls.read_recursive());
1730        assert!(url.contains(&PathBuf::from("/abs/note path/dir/my note.md")));
1731        assert_eq!(output, expected);
1732    }
1733
1734    #[test]
1735    fn test_rewrite_links3() {
1736        use crate::config::LocalLinkKind;
1737
1738        let allowed_urls = Arc::new(RwLock::new(HashSet::new()));
1739        let input = "abd<a href=\"#1\"></a>abd".to_string();
1740        let expected = "abd<a href=\"/abs/note path/#1\"></a>abd";
1741        let root_path = Path::new("/my/");
1742        let docdir = Path::new("/my/abs/note path/");
1743        let output = rewrite_links(
1744            input,
1745            root_path,
1746            docdir,
1747            LocalLinkKind::Short,
1748            false,
1749            allowed_urls.clone(),
1750        );
1751        let url = allowed_urls.read_recursive();
1752        println!("{:?}", allowed_urls.read_recursive());
1753        assert!(url.contains(&PathBuf::from("/abs/note path/")));
1754        assert_eq!(output, expected);
1755    }
1756
1757    #[test]
1758    fn test_is_empty_html() {
1759        // Bring new methods into scope.
1760        use crate::html::HtmlStr;
1761
1762        // Test where input is '<!DOCTYPE html>'
1763        // See: [HTML doctype declaration](https://www.w3schools.com/tags/tag_doctype.ASP)
1764        assert!(String::from("<!DOCTYPE html>").is_empty_html());
1765
1766        // This should fail:
1767        assert!(!String::from("<!DOCTYPE html>>").is_empty_html());
1768
1769        // Test where input is '<!DOCTYPE html>'
1770        // See: [HTML doctype declaration](https://www.w3schools.com/tags/tag_doctype.ASP)
1771        assert!(
1772            String::from(
1773                " <!DOCTYPE HTML PUBLIC \
1774            \"-//W3C//DTD HTML 4.01 Transitional//EN\" \
1775            \"http://www.w3.org/TR/html4/loose.dtd\">"
1776            )
1777            .is_empty_html()
1778        );
1779
1780        // Test where input is '<!DOCTYPE html>'
1781        // See: [HTML doctype declaration](https://www.w3schools.com/tags/tag_doctype.ASP)
1782        assert!(
1783            String::from(
1784                " <!DOCTYPE html PUBLIC \
1785            \"-//W3C//DTD XHTML 1.1//EN\" \
1786            \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
1787            )
1788            .is_empty_html()
1789        );
1790
1791        // Test where input is '<!DOCTYPE html>Some content'
1792        assert!(!String::from("<!DOCTYPE html>Some content").is_empty_html());
1793
1794        // Test where input is an empty string
1795        assert!(String::from("").is_empty_html());
1796
1797        // Test where input is not empty HTML.
1798        // Convention: we consider empty only `` or `<!DOCTYPE html>`.
1799        assert!(!String::from("<html></html>").is_empty_html());
1800
1801        // Test where input is not empty HTML with doctype
1802        // Convention: we consider empty only `` or `<!DOCTYPE html>`.
1803        assert!(!String::from("<!DOCTYPE html><html></html>").is_empty_html());
1804    }
1805
1806    #[test]
1807    fn test_has_html_start_tag() {
1808        // Bring new methods into scope.
1809        use crate::html::HtmlStr;
1810
1811        // Test where input is '<!DOCTYPE html>Some content'
1812        assert!(String::from("<!DOCTYPE html>Some content").has_html_start_tag());
1813
1814        // This fails because we require be convention `<!DOCTYPE html>` as
1815        // first tag
1816        assert!(!String::from("<html>Some content</html>").has_html_start_tag());
1817
1818        // This fails because we require be convention `<!DOCTYPE html>` as
1819        // first tag
1820        assert!(!String::from("<HTML>").has_html_start_tag());
1821
1822        // Test where input starts with spaces
1823        assert!(String::from("  <!doctype html>Some content").has_html_start_tag());
1824
1825        // Test where input is a non-HTML doctype
1826        assert!(!String::from("<!DOCTYPE other>").has_html_start_tag());
1827
1828        // Test where input is an empty string
1829        assert!(!String::from("").has_html_start_tag());
1830    }
1831
1832    #[test]
1833    fn test_is_html_unchecked() {
1834        // Bring new methods into scope.
1835        use crate::html::HtmlStr;
1836
1837        // Test with `<!DOCTYPE html>` tag
1838        let html = "<!doctype html>";
1839        assert!(html.is_html_unchecked());
1840
1841        // Test with `<!DOCTYPE html>` tag
1842        let html = "<!doctype html abc>def";
1843        assert!(html.is_html_unchecked());
1844
1845        // Test with `<!DOCTYPE html>` tag
1846        let html = "<!doctype html";
1847        assert!(!html.is_html_unchecked());
1848
1849        // Test with `<html>` tag
1850        let html = "<html><body></body></html>";
1851        assert!(html.is_html_unchecked());
1852
1853        // Test with `<html>` tag
1854        let html = "<html abc>def";
1855        assert!(html.is_html_unchecked());
1856
1857        // Test with `<html>` tag
1858        let html = "<html abc def";
1859        assert!(!html.is_html_unchecked());
1860
1861        // Test with leading whitespace
1862        let html = "   <!doctype html><html><body></body></html>";
1863        assert!(html.is_html_unchecked());
1864
1865        // Test with non-html content
1866        let html = "<!DOCTYPE xml><root></root>";
1867        assert!(!html.is_html_unchecked());
1868
1869        // Test with partial `<!DOCTYPE>` tag
1870        let html = "<!doctype>";
1871        assert!(!html.is_html_unchecked());
1872    }
1873
1874    #[test]
1875    fn test_prepend_html_start_tag() {
1876        // Bring new methods into scope.
1877        use crate::html::HtmlString;
1878
1879        // Test where input already has doctype HTML
1880        assert_eq!(
1881            String::from("<!DOCTYPE html>Some content").prepend_html_start_tag(),
1882            Ok(String::from("<!DOCTYPE html>Some content"))
1883        );
1884
1885        // Test where input already has doctype HTML
1886        assert_eq!(
1887            String::from("<!DOCTYPE html>").prepend_html_start_tag(),
1888            Ok(String::from("<!DOCTYPE html>"))
1889        );
1890
1891        // Test where input has no HTML tag
1892        assert_eq!(
1893            String::from("<html>Some content").prepend_html_start_tag(),
1894            Ok(String::from("<!DOCTYPE html><html>Some content"))
1895        );
1896
1897        // Test where input has a non-HTML doctype
1898        assert_eq!(
1899            String::from("<!DOCTYPE other>").prepend_html_start_tag(),
1900            Err(InputStreamError::NonHtmlDoctype {
1901                html: "<!DOCTYPE other>".to_string()
1902            })
1903        );
1904
1905        // Test where input has no HTML tag
1906        assert_eq!(
1907            String::from("Some content").prepend_html_start_tag(),
1908            Ok(String::from("<!DOCTYPE html>Some content"))
1909        );
1910
1911        // Test where input is an empty string
1912        assert_eq!(
1913            String::from("").prepend_html_start_tag(),
1914            Ok(String::from("<!DOCTYPE html>"))
1915        );
1916    }
1917}