tpnote_lib/
markup_language.rs

1//! Helper functions dealing with markup languages.
2use crate::config::LIB_CFG;
3use crate::error::NoteError;
4#[cfg(feature = "renderer")]
5use crate::highlight::SyntaxPreprocessor;
6#[cfg(feature = "renderer")]
7use crate::html2md::convert_html_to_md;
8use crate::settings::SETTINGS;
9use parse_hyperlinks::renderer::text_links2html;
10use parse_hyperlinks::renderer::text_rawlinks2html;
11#[cfg(feature = "renderer")]
12use pulldown_cmark::{Options, Parser, html};
13#[cfg(feature = "renderer")]
14use rst_parser;
15#[cfg(feature = "renderer")]
16use rst_renderer;
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19#[cfg(feature = "renderer")]
20use std::str::from_utf8;
21
22/// The filter `filter_tags()` omits HTML `<span....>` after converting to
23/// Markdown.
24#[cfg(test)] // Currently the `filter_tags()` filter is not used in the code.
25#[cfg(feature = "renderer")]
26const FILTERED_TAGS: &[&str; 4] = &["<span", "</span>", "<div", "</div>"];
27
28/// Available converters for converting the input from standard input or the
29/// clipboard to HTML.
30#[non_exhaustive]
31#[derive(Default, Debug, Hash, Clone, Eq, PartialEq, Deserialize, Serialize, Copy)]
32pub enum InputConverter {
33    /// Convert from HTML to Markdown.
34    ToMarkdown,
35    /// Do not convert, return an error instead.
36    #[default]
37    Disabled,
38    /// Do not convert, just pass through wrapped in `Ok()`.
39    PassThrough,
40}
41
42impl InputConverter {
43    /// Returns a function that implements the `InputConverter` looked up in
44    /// the `extensions` table in the `extension` line.
45    /// When `extension` is not found in `extensions`, the function returns
46    /// a `NoteError`.
47    #[inline]
48    pub(crate) fn build(extension: &str) -> fn(String) -> Result<String, NoteError> {
49        let settings = SETTINGS.read_recursive();
50        let scheme = &LIB_CFG.read_recursive().scheme[settings.current_scheme];
51
52        let mut input_converter = InputConverter::default();
53        for e in &scheme.filename.extensions {
54            if e.0 == *extension {
55                input_converter = e.1;
56                break;
57            }
58        }
59
60        match input_converter {
61            #[cfg(feature = "renderer")]
62            InputConverter::ToMarkdown => |s| convert_html_to_md(&s),
63
64            InputConverter::Disabled => {
65                |_: String| -> Result<String, NoteError> { Err(NoteError::HtmlToMarkupDisabled) }
66            }
67
68            _ => Ok,
69        }
70    }
71
72    /// Filters the `TARGET_TAGS`, e.g. `<span...>`, `</span>`, `<div...>`
73    /// and `<div>` in `text`.
74    /// Contract: the input substring `...` does not contain the characters
75    /// `>` or `\n`.
76    #[cfg(test)] // Currently the `filter_tags()` filter is not used in the code.
77    #[cfg(feature = "renderer")]
78    fn filter_tags(text: String) -> String {
79        let mut res = String::new();
80        let mut i = 0;
81        while let Some(mut start) = text[i..].find('<') {
82            if let Some(mut end) = text[i + start..].find('>') {
83                end += 1;
84                // Move on if there is another opening bracket.
85                if let Some(new_start) = text[i + start + 1..i + start + end].rfind('<') {
86                    start += new_start + 1;
87                    end -= new_start + 1;
88                }
89
90                // Is this a tag listed in `FILTERED_TAGS`?
91                let filter_tag = FILTERED_TAGS
92                    .iter()
93                    .any(|&pat| text[i + start..i + start + end].starts_with(pat));
94
95                if filter_tag {
96                    res.push_str(&text[i..i + start]);
97                } else {
98                    res.push_str(&text[i..i + start + end]);
99                };
100                i = i + start + end;
101            } else {
102                res.push_str(&text[i..i + start + 1]);
103                i = i + start + 1;
104            }
105        }
106        if i > 0 {
107            res.push_str(&text[i..]);
108            if res != text {
109                log::trace!("`html_to_markup` filter: removed tags in \"{}\"", text);
110            }
111            res
112        } else {
113            text
114        }
115    }
116}
117
118/// The Markup language of the note content.
119#[non_exhaustive]
120#[derive(Default, Debug, Hash, Clone, Eq, PartialEq, Deserialize, Serialize, Copy)]
121pub enum MarkupLanguage {
122    Markdown,
123    ReStructuredText,
124    Html,
125    PlainText,
126    /// The markup language is known, but the renderer is disabled.
127    RendererDisabled,
128    /// This is a Tp-Note file, but we are not able to determine the
129    /// MarkupLanguage at this point.
130    Unkown,
131    /// This is not a Tp-Note file.
132    #[default]
133    None,
134}
135
136impl MarkupLanguage {
137    /// If `Self` is `None` return `rhs`, otherwise return `Self`.
138    pub fn or(self, rhs: Self) -> Self {
139        match self {
140            MarkupLanguage::None => rhs,
141            _ => self,
142        }
143    }
144
145    /// Returns the MIME type for all `Markup Languages.is_tpnote_file()==true`.
146    /// Otherwise, for `MarkupLanguage::None` this returns None.
147    pub fn mine_type(&self) -> Option<&'static str> {
148        match self {
149            Self::Markdown => Some("text/markodwn"),
150            Self::ReStructuredText => Some("x-rst"),
151            Self::Html => Some("text/html"),
152            Self::PlainText => Some("text/plain"),
153            Self::RendererDisabled => Some("text/plain"),
154            Self::Unkown => Some("text/plain"),
155            _ => None,
156        }
157    }
158
159    /// As we identify a markup language by the file's extension, we
160    /// can also tell, in case `Markuplanguage::from(ext).is_some()`,
161    /// that a file with the extension `ext` is a Tp-Note file.
162    pub fn is_some(&self) -> bool {
163        !matches!(self, Self::None)
164    }
165
166    /// As we identify a markup language by the file's extension, we
167    /// can also tell, in case `Markuplanguage::from(ext).is_none()`,
168    /// that a file with the extension `ext` is NOT a Tp-Note file.
169    pub fn is_none(&self) -> bool {
170        matches!(self, Self::None)
171    }
172
173    /// Every `MarkupLanguage` variant has an own internal HTML renderer:
174    /// * `Markdown` is rendered according the "CommonMark" standard.
175    /// * Currently only as small subset of ReStructuredText is rendered for
176    ///   `ReStructuredText`. This feature is experimental.
177    /// * The `Html` renderer simply forwards the input without modification.
178    /// * `PlainText` is rendered as raw text. Hyperlinks in Markdown,
179    ///   ReStructuredText, AsciiDoc and WikiText syntax are detected and
180    ///   are displayed in the rendition with their link text. All hyperlinks
181    ///   are clickable.
182    /// * `Unknown` is rendered like `PlainText`, hyperlinks are also
183    ///   clickable, but they are displayed as they appear in the input.
184    /// * For the variant `None` the result is always the empty string whatever
185    ///   the input may be.
186    pub fn render(&self, input: &str) -> String {
187        match self {
188            #[cfg(feature = "renderer")]
189            Self::Markdown => {
190                // Set up options and parser. Besides the CommonMark standard
191                // we enable some useful extras.
192
193                let options = Options::all();
194                let parser = Parser::new_ext(input, options);
195                let parser = SyntaxPreprocessor::new(parser);
196
197                // Write to String buffer.
198                let mut html_output: String = String::with_capacity(input.len() * 3 / 2);
199                html::push_html(&mut html_output, parser);
200                html_output
201            }
202
203            #[cfg(feature = "renderer")]
204            Self::ReStructuredText => {
205                // Note, that the current ReStructuredText renderer requires
206                // files to end with no new line.
207                let rest_input = input.trim();
208                // Write to String buffer.
209                let mut html_output: Vec<u8> = Vec::with_capacity(rest_input.len() * 3 / 2);
210                const STANDALONE: bool = false; // Don't wrap in `<!doctype html><html></html>`.
211                rst_parser::parse(rest_input.trim_start())
212                    .map(|doc| rst_renderer::render_html(&doc, &mut html_output, STANDALONE))
213                    .map_or_else(
214                        |e| NoteError::RstParse { msg: e.to_string() }.to_string(),
215                        |_| from_utf8(&html_output).unwrap_or_default().to_string(),
216                    )
217            }
218
219            Self::Html => input.to_string(),
220
221            Self::PlainText | Self::RendererDisabled => text_links2html(input),
222
223            Self::Unkown => text_rawlinks2html(input),
224
225            _ => String::new(),
226        }
227    }
228}
229
230impl From<&Path> for MarkupLanguage {
231    /// Is the file extension ` at the end of the given path listed in
232    /// `file.extensions`? Return the corresponding `MarkupLanguage`.
233    /// Only the extension of `Path` is considered here.
234    #[inline]
235    fn from(path: &Path) -> Self {
236        let file_extension = path
237            .extension()
238            .unwrap_or_default()
239            .to_str()
240            .unwrap_or_default();
241
242        Self::from(file_extension)
243    }
244}
245
246impl From<&str> for MarkupLanguage {
247    /// Is `file_extension` listed in `file.extensions`?
248    #[inline]
249    fn from(file_extension: &str) -> Self {
250        let scheme = &LIB_CFG.read_recursive().scheme[SETTINGS.read_recursive().current_scheme];
251
252        for e in &scheme.filename.extensions {
253            if e.0 == file_extension {
254                return e.2;
255            }
256        }
257
258        // Nothing was found.
259        MarkupLanguage::None
260    }
261}
262
263#[cfg(test)]
264mod tests {
265
266    use super::InputConverter;
267    use super::MarkupLanguage;
268    use std::path::Path;
269
270    #[test]
271    fn test_markuplanguage_from() {
272        //
273        let path = Path::new("/dir/file.md");
274        assert_eq!(MarkupLanguage::from(path), MarkupLanguage::Markdown);
275
276        //
277        let path = Path::new("md");
278        assert_eq!(MarkupLanguage::from(path), MarkupLanguage::None);
279        //
280        let ext = "/dir/file.md";
281        assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::None);
282
283        //
284        let ext = "md";
285        assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::Markdown);
286
287        //
288        let ext = "rst";
289        assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::ReStructuredText);
290    }
291
292    #[test]
293    fn test_markuplanguage_render() {
294        // Markdown
295        let input = "[Link text](https://domain.invalid/)";
296        let expected: &str = "<p><a href=\"https://domain.invalid/\">Link text</a></p>\n";
297
298        let result = MarkupLanguage::Markdown.render(input);
299        assert_eq!(result, expected);
300
301        // ReStructuredText
302        let input = "`Link text <https://domain.invalid/>`_";
303        let expected: &str = "<p><a href=\"https://domain.invalid/\">Link text</a></p>";
304
305        let result = MarkupLanguage::ReStructuredText.render(input);
306        assert_eq!(result, expected);
307    }
308
309    #[test]
310    fn test_input_converter_md() {
311        let ic = InputConverter::build("md");
312        let input: &str =
313            "<div id=\"videopodcast\">outside <span id=\"pills\">inside</span>\n</div>";
314        let expected: &str = "outside inside";
315
316        let result = ic(input.to_string());
317        assert_eq!(result.unwrap(), expected);
318
319        //
320        let input: &str = r#"<p><a href="/my_uri">link</a></p>"#;
321        let expected: &str = "[link](/my_uri)";
322
323        let result = ic(input.to_string());
324        assert_eq!(result.unwrap(), expected);
325
326        //
327        // [CommonMark: Example 489](https://spec.commonmark.org/0.31.2/#example-489)
328        let input: &str = r#"<p><a href="/my uri">link</a></p>"#;
329        let expected: &str = "[link](</my uri>)";
330
331        let result = ic(input.to_string());
332        assert_eq!(result.unwrap(), expected);
333
334        //
335        // [CommonMark: Example 489](https://spec.commonmark.org/0.31.2/#example-489)
336        let input: &str = r#"<p><a href="/my%20uri">link</a></p>"#;
337        let expected: &str = "[link](</my uri>)";
338
339        let result = ic(input.to_string());
340        assert_eq!(result.unwrap(), expected);
341
342        //
343        // We want ATX style headers.
344        let input: &str = r#"<p><h1>Title</h1></p>"#;
345        let expected: &str = "# Title";
346
347        let result = ic(input.to_string());
348        assert_eq!(result.unwrap(), expected);
349    }
350
351    #[test]
352    fn test_filter_tags() {
353        let input: &str =
354            "A<div id=\"videopodcast\">out<p>side <span id=\"pills\">inside</span>\n</div>B";
355        let expected: &str = "Aout<p>side inside\nB";
356
357        let result = InputConverter::filter_tags(input.to_string());
358        assert_eq!(result, expected);
359
360        let input: &str = "A<B<C <div>D<E<p>F<>G";
361        let expected: &str = "A<B<C D<E<p>F<>G";
362
363        let result = InputConverter::filter_tags(input.to_string());
364        assert_eq!(result, expected);
365    }
366}
367// `rewrite_rel_links=true`