tpnote_lib/
markup_language.rs1use crate::config::LIB_CFG;
3use crate::error::NoteError;
4#[cfg(feature = "renderer")]
5use crate::highlight::SyntaxPreprocessor;
6#[cfg(feature = "renderer")]
7use crate::html2md::convert_html_to_md;
8use crate::settings::SETTINGS;
9use parse_hyperlinks::renderer::text_links2html;
10use parse_hyperlinks::renderer::text_rawlinks2html;
11#[cfg(feature = "renderer")]
12use pulldown_cmark::{Options, Parser, html};
13#[cfg(feature = "renderer")]
14use rst_parser;
15#[cfg(feature = "renderer")]
16use rst_renderer;
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19#[cfg(feature = "renderer")]
20use std::str::from_utf8;
21
22#[cfg(test)] #[cfg(feature = "renderer")]
26const FILTERED_TAGS: &[&str; 4] = &["<span", "</span>", "<div", "</div>"];
27
28#[non_exhaustive]
31#[derive(Default, Debug, Hash, Clone, Eq, PartialEq, Deserialize, Serialize, Copy)]
32pub enum InputConverter {
33 ToMarkdown,
35 #[default]
37 Disabled,
38 PassThrough,
40}
41
42impl InputConverter {
43 #[inline]
48 pub(crate) fn build(extension: &str) -> fn(String) -> Result<String, NoteError> {
49 let settings = SETTINGS.read_recursive();
50 let scheme = &LIB_CFG.read_recursive().scheme[settings.current_scheme];
51
52 let mut input_converter = InputConverter::default();
53 for e in &scheme.filename.extensions {
54 if e.0 == *extension {
55 input_converter = e.1;
56 break;
57 }
58 }
59
60 match input_converter {
61 #[cfg(feature = "renderer")]
62 InputConverter::ToMarkdown => |s| convert_html_to_md(&s),
63
64 InputConverter::Disabled => {
65 |_: String| -> Result<String, NoteError> { Err(NoteError::HtmlToMarkupDisabled) }
66 }
67
68 _ => Ok,
69 }
70 }
71
72 #[cfg(test)] #[cfg(feature = "renderer")]
78 fn filter_tags(text: String) -> String {
79 let mut res = String::new();
80 let mut i = 0;
81 while let Some(mut start) = text[i..].find('<') {
82 if let Some(mut end) = text[i + start..].find('>') {
83 end += 1;
84 if let Some(new_start) = text[i + start + 1..i + start + end].rfind('<') {
86 start += new_start + 1;
87 end -= new_start + 1;
88 }
89
90 let filter_tag = FILTERED_TAGS
92 .iter()
93 .any(|&pat| text[i + start..i + start + end].starts_with(pat));
94
95 if filter_tag {
96 res.push_str(&text[i..i + start]);
97 } else {
98 res.push_str(&text[i..i + start + end]);
99 };
100 i = i + start + end;
101 } else {
102 res.push_str(&text[i..i + start + 1]);
103 i = i + start + 1;
104 }
105 }
106 if i > 0 {
107 res.push_str(&text[i..]);
108 if res != text {
109 log::trace!("`html_to_markup` filter: removed tags in \"{}\"", text);
110 }
111 res
112 } else {
113 text
114 }
115 }
116}
117
118#[non_exhaustive]
120#[derive(Default, Debug, Hash, Clone, Eq, PartialEq, Deserialize, Serialize, Copy)]
121pub enum MarkupLanguage {
122 Markdown,
123 ReStructuredText,
124 Html,
125 PlainText,
126 RendererDisabled,
128 Unkown,
131 #[default]
133 None,
134}
135
136impl MarkupLanguage {
137 pub fn or(self, rhs: Self) -> Self {
139 match self {
140 MarkupLanguage::None => rhs,
141 _ => self,
142 }
143 }
144
145 pub fn mine_type(&self) -> Option<&'static str> {
148 match self {
149 Self::Markdown => Some("text/markodwn"),
150 Self::ReStructuredText => Some("x-rst"),
151 Self::Html => Some("text/html"),
152 Self::PlainText => Some("text/plain"),
153 Self::RendererDisabled => Some("text/plain"),
154 Self::Unkown => Some("text/plain"),
155 _ => None,
156 }
157 }
158
159 pub fn is_some(&self) -> bool {
163 !matches!(self, Self::None)
164 }
165
166 pub fn is_none(&self) -> bool {
170 matches!(self, Self::None)
171 }
172
173 pub fn render(&self, input: &str) -> String {
187 match self {
188 #[cfg(feature = "renderer")]
189 Self::Markdown => {
190 let options = Options::all();
194 let parser = Parser::new_ext(input, options);
195 let parser = SyntaxPreprocessor::new(parser);
196
197 let mut html_output: String = String::with_capacity(input.len() * 3 / 2);
199 html::push_html(&mut html_output, parser);
200 html_output
201 }
202
203 #[cfg(feature = "renderer")]
204 Self::ReStructuredText => {
205 let rest_input = input.trim();
208 let mut html_output: Vec<u8> = Vec::with_capacity(rest_input.len() * 3 / 2);
210 const STANDALONE: bool = false; rst_parser::parse(rest_input.trim_start())
212 .map(|doc| rst_renderer::render_html(&doc, &mut html_output, STANDALONE))
213 .map_or_else(
214 |e| NoteError::RstParse { msg: e.to_string() }.to_string(),
215 |_| from_utf8(&html_output).unwrap_or_default().to_string(),
216 )
217 }
218
219 Self::Html => input.to_string(),
220
221 Self::PlainText | Self::RendererDisabled => text_links2html(input),
222
223 Self::Unkown => text_rawlinks2html(input),
224
225 _ => String::new(),
226 }
227 }
228}
229
230impl From<&Path> for MarkupLanguage {
231 #[inline]
235 fn from(path: &Path) -> Self {
236 let file_extension = path
237 .extension()
238 .unwrap_or_default()
239 .to_str()
240 .unwrap_or_default();
241
242 Self::from(file_extension)
243 }
244}
245
246impl From<&str> for MarkupLanguage {
247 #[inline]
249 fn from(file_extension: &str) -> Self {
250 let scheme = &LIB_CFG.read_recursive().scheme[SETTINGS.read_recursive().current_scheme];
251
252 for e in &scheme.filename.extensions {
253 if e.0 == file_extension {
254 return e.2;
255 }
256 }
257
258 MarkupLanguage::None
260 }
261}
262
263#[cfg(test)]
264mod tests {
265
266 use super::InputConverter;
267 use super::MarkupLanguage;
268 use std::path::Path;
269
270 #[test]
271 fn test_markuplanguage_from() {
272 let path = Path::new("/dir/file.md");
274 assert_eq!(MarkupLanguage::from(path), MarkupLanguage::Markdown);
275
276 let path = Path::new("md");
278 assert_eq!(MarkupLanguage::from(path), MarkupLanguage::None);
279 let ext = "/dir/file.md";
281 assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::None);
282
283 let ext = "md";
285 assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::Markdown);
286
287 let ext = "rst";
289 assert_eq!(MarkupLanguage::from(ext), MarkupLanguage::ReStructuredText);
290 }
291
292 #[test]
293 fn test_markuplanguage_render() {
294 let input = "[Link text](https://domain.invalid/)";
296 let expected: &str = "<p><a href=\"https://domain.invalid/\">Link text</a></p>\n";
297
298 let result = MarkupLanguage::Markdown.render(input);
299 assert_eq!(result, expected);
300
301 let input = "`Link text <https://domain.invalid/>`_";
303 let expected: &str = "<p><a href=\"https://domain.invalid/\">Link text</a></p>";
304
305 let result = MarkupLanguage::ReStructuredText.render(input);
306 assert_eq!(result, expected);
307 }
308
309 #[test]
310 fn test_input_converter_md() {
311 let ic = InputConverter::build("md");
312 let input: &str =
313 "<div id=\"videopodcast\">outside <span id=\"pills\">inside</span>\n</div>";
314 let expected: &str = "outside inside";
315
316 let result = ic(input.to_string());
317 assert_eq!(result.unwrap(), expected);
318
319 let input: &str = r#"<p><a href="/my_uri">link</a></p>"#;
321 let expected: &str = "[link](/my_uri)";
322
323 let result = ic(input.to_string());
324 assert_eq!(result.unwrap(), expected);
325
326 let input: &str = r#"<p><a href="/my uri">link</a></p>"#;
329 let expected: &str = "[link](</my uri>)";
330
331 let result = ic(input.to_string());
332 assert_eq!(result.unwrap(), expected);
333
334 let input: &str = r#"<p><a href="/my%20uri">link</a></p>"#;
337 let expected: &str = "[link](</my uri>)";
338
339 let result = ic(input.to_string());
340 assert_eq!(result.unwrap(), expected);
341
342 let input: &str = r#"<p><h1>Title</h1></p>"#;
345 let expected: &str = "# Title";
346
347 let result = ic(input.to_string());
348 assert_eq!(result.unwrap(), expected);
349 }
350
351 #[test]
352 fn test_filter_tags() {
353 let input: &str =
354 "A<div id=\"videopodcast\">out<p>side <span id=\"pills\">inside</span>\n</div>B";
355 let expected: &str = "Aout<p>side inside\nB";
356
357 let result = InputConverter::filter_tags(input.to_string());
358 assert_eq!(result, expected);
359
360 let input: &str = "A<B<C <div>D<E<p>F<>G";
361 let expected: &str = "A<B<C D<E<p>F<>G";
362
363 let result = InputConverter::filter_tags(input.to_string());
364 assert_eq!(result, expected);
365 }
366}
367