tpnote_lib/
markup_language.rs1use crate::config::LIB_CFG;
3use crate::error::NoteError;
4#[cfg(feature = "renderer")]
5use crate::highlight::SyntaxPreprocessor;
6#[cfg(feature = "renderer")]
7use crate::html2md::convert_html_to_md;
8use crate::settings::SETTINGS;
9use parse_hyperlinks::renderer::text_links2html;
10use parse_hyperlinks::renderer::text_rawlinks2html;
11#[cfg(feature = "renderer")]
12use pulldown_cmark::{html, Options, Parser};
13#[cfg(feature = "renderer")]
14use rst_parser;
15#[cfg(feature = "renderer")]
16use rst_renderer;
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19#[cfg(feature = "renderer")]
20use std::str::from_utf8;
21
22#[cfg(test)] #[cfg(feature = "renderer")]
26const FILTERED_TAGS: &[&str; 4] = &["<span", "</span>", "<div", "</div>"];
27
28#[non_exhaustive]
31#[derive(Default, Debug, Hash, Clone, Eq, PartialEq, Deserialize, Serialize, Copy)]
32pub enum InputConverter {
33    ToMarkdown,
35    #[default]
37    Disabled,
38    PassThrough,
40}
41
42impl InputConverter {
43    #[inline]
48    pub(crate) fn build(extension: &str) -> fn(String) -> Result<String, NoteError> {
49        let settings = SETTINGS.read_recursive();
50        let scheme = &LIB_CFG.read_recursive().scheme[settings.current_scheme];
51
52        let mut input_converter = InputConverter::default();
53        for e in &scheme.filename.extensions {
54            if e.0 == *extension {
55                input_converter = e.1;
56                break;
57            }
58        }
59
60        match input_converter {
61            #[cfg(feature = "renderer")]
62            InputConverter::ToMarkdown => |s| convert_html_to_md(&s),
63
64            InputConverter::Disabled => {
65                |_: String| -> Result<String, NoteError> { Err(NoteError::HtmlToMarkupDisabled) }
66            }
67
68            _ => Ok,
69        }
70    }
71
72    #[cfg(test)] #[cfg(feature = "renderer")]
78    fn filter_tags(text: String) -> String {
79        let mut res = String::new();
80        let mut i = 0;
81        while let Some(mut start) = text[i..].find('<') {
82            if let Some(mut end) = text[i + start..].find('>') {
83                end += 1;
84                if let Some(new_start) = text[i + start + 1..i + start + end].rfind('<') {
86                    start += new_start + 1;
87                    end -= new_start + 1;
88                }
89
90                let filter_tag = FILTERED_TAGS
92                    .iter()
93                    .any(|&pat| text[i + start..i + start + end].starts_with(pat));
94
95                if filter_tag {
96                    res.push_str(&text[i..i + start]);
97                } else {
98                    res.push_str(&text[i..i + start + end]);
99                };
100                i = i + start + end;
101            } else {
102                res.push_str(&text[i..i + start + 1]);
103                i = i + start + 1;
104            }
105        }
106        if i > 0 {
107            res.push_str(&text[i..]);
108            if res != text {
109                log::trace!("`html_to_markup` filter: removed tags in \"{}\"", text);
110            }
111            res
112        } else {
113            text
114        }
115    }
116}
117
118#[non_exhaustive]
120#[derive(Default, Debug, Hash, Clone, Eq, PartialEq, Deserialize, Serialize, Copy)]
121pub enum MarkupLanguage {
122    Markdown,
123    ReStructuredText,
124    Html,
125    PlainText,
126    RendererDisabled,
128    Unkown,
131    #[default]
133    None,
134}
135
136impl MarkupLanguage {
137    pub fn or(self, rhs: Self) -> Self {
139        match self {
140            MarkupLanguage::None => rhs,
141            _ => self,
142        }
143    }
144
145    pub fn mine_type(&self) -> Option<&'static str> {
148        match self {
149            Self::Markdown => Some("text/markodwn"),
150            Self::ReStructuredText => Some("x-rst"),
151            Self::Html => Some("text/html"),
152            Self::PlainText => Some("text/plain"),
153            Self::RendererDisabled => Some("text/plain"),
154            Self::Unkown => Some("text/plain"),
155            _ => None,
156        }
157    }
158
159    pub fn is_some(&self) -> bool {
163        !matches!(self, Self::None)
164    }
165
166    pub fn is_none(&self) -> bool {
170        matches!(self, Self::None)
171    }
172
173    pub fn render(&self, input: &str) -> String {
187        match self {
188            #[cfg(feature = "renderer")]
189            Self::Markdown => {
190                let options = Options::all();
194                let parser = Parser::new_ext(input, options);
195                let parser = SyntaxPreprocessor::new(parser);
196
197                let mut html_output: String = String::with_capacity(input.len() * 3 / 2);
199                html::push_html(&mut html_output, parser);
200                html_output
201            }
202
203            #[cfg(feature = "renderer")]
204            Self::ReStructuredText => {
205                let rest_input = input.trim();
207                let mut html_output: Vec<u8> = Vec::with_capacity(rest_input.len() * 3 / 2);
209                const STANDALONE: bool = false; rst_parser::parse(rest_input.trim_start())
211                    .map(|doc| rst_renderer::render_html(&doc, &mut html_output, STANDALONE))
212                    .map_or_else(
213                        |e| NoteError::RstParse { msg: e.to_string() }.to_string(),
214                        |_| from_utf8(&html_output).unwrap_or_default().to_string(),
215                    )
216            }
217
218            Self::Html => input.to_string(),
219
220            Self::PlainText | Self::RendererDisabled => text_links2html(input),
221
222            Self::Unkown => text_rawlinks2html(input),
223
224            _ => String::new(),
225        }
226    }
227}
228
229impl From<&Path> for MarkupLanguage {
230    #[inline]
235    fn from(path: &Path) -> Self {
236        let file_extension = path
237            .extension()
238            .unwrap_or_default()
239            .to_str()
240            .unwrap_or_default();
241
242        Self::from(file_extension)
243    }
244}
245
246impl From<&str> for MarkupLanguage {
247    #[inline]
249    fn from(file_extension: &str) -> Self {
250        let scheme = &LIB_CFG.read_recursive().scheme[SETTINGS.read_recursive().current_scheme];
251
252        for e in &scheme.filename.extensions {
253            if e.0 == file_extension {
254                return e.2;
255            }
256        }
257
258        MarkupLanguage::None
260    }
261}
262
263#[cfg(test)]
264mod tests {
265
266    use super::InputConverter;
267    use super::MarkupLanguage;
268    use std::path::Path;
269
270    #[test]
271    fn test_markuplanguage_from() {
272        let path = Path::new("/dir/file.md");
274        assert_eq!(MarkupLanguage::from(path), MarkupLanguage::Markdown);
275
276        let path = Path::new("md");
278        assert_eq!(MarkupLanguage::from(path), MarkupLanguage::None);
279        let ext = "/dir/file.md";
281        assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::None);
282
283        let ext = "md";
285        assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::Markdown);
286
287        let ext = "rst";
289        assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::ReStructuredText);
290    }
291
292    #[test]
293    fn test_markuplanguage_render() {
294        let input = "[Link text](https://domain.invalid/)";
296        let expected: &str = "<p><a href=\"https://domain.invalid/\">Link text</a></p>\n";
297
298        let result = MarkupLanguage::Markdown.render(input);
299        assert_eq!(result, expected);
300
301        let input = "`Link text <https://domain.invalid/>`_";
303        let expected: &str = "<p><a href=\"https://domain.invalid/\">Link text</a></p>\n";
304
305        let result = MarkupLanguage::ReStructuredText.render(input);
306        assert_eq!(result, expected);
307    }
308
309    #[test]
310    fn test_input_converter_md() {
311        let ic = InputConverter::build("md");
312        let input: &str =
313            "<div id=\"videopodcast\">outside <span id=\"pills\">inside</span>\n</div>";
314        let expected: &str = "outside inside";
315
316        let result = ic(input.to_string());
317        assert_eq!(result.unwrap(), expected);
318
319        let input: &str = r#"<p><a href="/my_uri">link</a></p>"#;
321        let expected: &str = "[link](/my_uri)";
322
323        let result = ic(input.to_string());
324        assert_eq!(result.unwrap(), expected);
325
326        let input: &str = r#"<p><a href="/my uri">link</a></p>"#;
329        let expected: &str = "[link](</my uri>)";
330
331        let result = ic(input.to_string());
332        assert_eq!(result.unwrap(), expected);
333
334        let input: &str = r#"<p><a href="/my%20uri">link</a></p>"#;
337        let expected: &str = "[link](</my uri>)";
338
339        let result = ic(input.to_string());
340        assert_eq!(result.unwrap(), expected);
341
342        let input: &str = r#"<p><h1>Title</h1></p>"#;
345        let expected: &str = "# Title";
346
347        let result = ic(input.to_string());
348        assert_eq!(result.unwrap(), expected);
349    }
350
351    #[test]
352    fn test_filter_tags() {
353        let input: &str =
354            "A<div id=\"videopodcast\">out<p>side <span id=\"pills\">inside</span>\n</div>B";
355        let expected: &str = "Aout<p>side inside\nB";
356
357        let result = InputConverter::filter_tags(input.to_string());
358        assert_eq!(result, expected);
359
360        let input: &str = "A<B<C <div>D<E<p>F<>G";
361        let expected: &str = "A<B<C D<E<p>F<>G";
362
363        let result = InputConverter::filter_tags(input.to_string());
364        assert_eq!(result, expected);
365    }
366}
367