simple_markdown_parser/
utilities.rs

1use super::{parse, MarkdownElement, RawText};
2
3#[cfg(target_family = "wasm")]
4use wasm_bindgen::prelude::*;
5
6/// # Errors
7/// errors from markdown parsing
8#[allow(clippy::result_unit_err)]
9pub fn parse_with_header_information<'a>(
10    on: &'a str,
11    mut cb: impl for<'b> FnMut(&'b Vec<RawText<'a>>, MarkdownElement<'a>),
12) -> Result<(), ()> {
13    let mut header_chain = Vec::new();
14    parse(on, |element| {
15        if let MarkdownElement::Heading { level, text } = element {
16            let raw_level = level as usize - 1;
17            if header_chain.len() < raw_level {
18                header_chain.extend((header_chain.len()..raw_level).map(|_| RawText("")));
19            } else {
20                let _ = header_chain.drain(raw_level..);
21            }
22            cb(&header_chain, element);
23            header_chain.push(text);
24        } else {
25            cb(&header_chain, element);
26        }
27    })
28}
29
30/// # Errors
31/// errors from markdown parsing
32#[allow(clippy::result_unit_err)]
33pub fn parse_blocks<'a>(
34    on: &'a str,
35    mut cb: impl for<'b> FnMut(&'b Vec<RawText<'a>>, &'b [MarkdownElement<'a>]),
36) -> Result<(), ()> {
37    let mut header_chain = Vec::new();
38    let mut inner = Vec::new();
39
40    let result = parse(on, |element| {
41        if let MarkdownElement::Heading { level, text } = element {
42            // Run when next one begins
43            {
44                cb(&header_chain, &inner);
45                let _ = inner.drain(..);
46            }
47
48            let raw_level = level as usize - 1;
49            if header_chain.len() < raw_level {
50                header_chain.extend((header_chain.len()..raw_level).map(|_| RawText("")));
51            } else {
52                let _ = header_chain.drain(raw_level..);
53            }
54            header_chain.push(text);
55        } else {
56            inner.push(element);
57        }
58    });
59
60    if result.is_ok() {
61        cb(&header_chain, &inner);
62    }
63
64    result
65}
66
67#[derive(Default)]
68#[cfg_attr(
69    target_family = "wasm",
70    derive(tsify::Tsify, serde::Serialize),
71    tsify(into_wasm_abi)
72)]
73pub struct CodeBlock {
74    location: Vec<String>,
75    language: String,
76    code: String,
77    /// From quotes and content
78    information: String,
79    /// From list items
80    items: Vec<String>,
81}
82
83#[cfg(target_family = "wasm")]
84#[derive(tsify::Tsify, serde::Serialize)]
85#[tsify(into_wasm_abi)]
86pub struct VecCodeBlock(Vec<CodeBlock>);
87
88#[cfg(target_family = "wasm")]
89impl From<Vec<CodeBlock>> for VecCodeBlock {
90    fn from(blocks: Vec<CodeBlock>) -> Self {
91        Self(blocks)
92    }
93}
94
95#[cfg(not(target_family = "wasm"))]
96pub type VecCodeBlock = Vec<CodeBlock>;
97
98#[must_use]
99#[cfg_attr(target_family = "wasm", wasm_bindgen)]
100pub fn extract_code_blocks(on: &str) -> VecCodeBlock {
101    let mut header_chain: Vec<RawText> = Vec::new();
102    let mut blocks: Vec<CodeBlock> = Vec::new();
103    let mut current_block = CodeBlock::default();
104    // let mut blocks = on.split("\n").collect::<Vec<_>>();
105
106    let _result = parse(on, |element| {
107        if let MarkdownElement::Heading { level, text } = element {
108            let mut block = std::mem::take(&mut current_block);
109            if !block.code.is_empty() {
110                block.location = header_chain.iter().map(|link| link.0.to_owned()).collect();
111                blocks.push(block);
112            }
113
114            let raw_level = level as usize - 1;
115            if header_chain.len() < raw_level {
116                header_chain.extend((header_chain.len()..raw_level).map(|_| RawText("")));
117            } else {
118                let _ = header_chain.drain(raw_level..);
119            }
120            header_chain.push(text);
121        } else if let MarkdownElement::CodeBlock { language, code } = element {
122            language.clone_into(&mut current_block.language);
123            code.clone_into(&mut current_block.code);
124        } else if let MarkdownElement::Paragraph(content) = element {
125            current_block.information.push_str(content.0);
126        } else if let MarkdownElement::Quote(content) = element {
127            current_block.information.push_str(content.0);
128        } else if let MarkdownElement::ListItem { level: _, text } = element {
129            current_block.items.push(text.0.to_owned());
130        }
131    });
132
133    if !current_block.code.is_empty() {
134        blocks.push(current_block);
135    }
136
137    // .into for WASM fix
138    #[allow(clippy::useless_conversion)]
139    blocks.into()
140}
141
142// Unfortuantly the same definition bc of <https://users.rust-lang.org/t/cant-use-cfg-attr-with-wasm-bindgen-skip/112072>
143#[cfg(target_family = "wasm")]
144#[wasm_bindgen]
145#[derive(Default, Debug)]
146pub struct Slide {
147    location: Vec<String>,
148    markdown_content: String,
149}
150
151#[cfg(not(target_family = "wasm"))]
152#[derive(Default, Debug)]
153pub struct Slide {
154    pub location: Vec<String>,
155    pub markdown_content: String,
156}
157
158#[cfg_attr(target_family = "wasm", wasm_bindgen)]
159impl Slide {
160    #[must_use]
161    #[cfg_attr(target_family = "wasm", wasm_bindgen(getter))]
162    pub fn location(&self) -> Vec<String> {
163        self.location.clone()
164    }
165
166    #[must_use]
167    #[cfg_attr(target_family = "wasm", wasm_bindgen(getter))]
168    pub fn markdown_content(&self) -> String {
169        self.markdown_content.clone()
170    }
171
172    #[must_use]
173    #[cfg(target_family = "wasm")]
174    #[wasm_bindgen]
175    pub fn to_html(&self, emitter: Option<crate::extras::emit::FeatureEmitterWASM>) -> String {
176        crate::extras::emit::markdown_to_html_string(&self.markdown_content, emitter)
177    }
178
179    #[must_use]
180    #[cfg(not(target_family = "wasm"))]
181    pub fn to_html(&self, emitter: &mut impl crate::extras::emit::FeatureEmitter) -> String {
182        let mut bytes: Vec<u8> = Vec::new();
183        let _ = crate::extras::emit::markdown_to_html(&self.markdown_content, &mut bytes, emitter);
184        match String::from_utf8(bytes) {
185            Ok(result) => result,
186            Err(_) => String::from("Non Utf8 output or markdown parser error"),
187        }
188    }
189}
190
191#[must_use]
192#[cfg_attr(target_family = "wasm", wasm_bindgen)]
193pub fn extract_slides(on: &str) -> Vec<Slide> {
194    let mut header_chain: Vec<RawText> = Vec::new();
195    let mut slides: Vec<Slide> = Vec::new();
196    let mut current_slide = Slide::default();
197    let mut start: usize = 0;
198
199    // TODO could just use `.lines` rather than whole `parse`
200    let _result = parse(on, |element| {
201        if let MarkdownElement::Heading { level, text } = element {
202            if level < 3 {
203                let mut slide = std::mem::take(&mut current_slide);
204                let end = text.0.as_ptr() as usize - on.as_ptr() as usize;
205                let content = &on[(start + level as usize)..end];
206                if !content.trim().is_empty() {
207                    content.clone_into(&mut current_slide.markdown_content);
208                    slide.location = header_chain.iter().map(|link| link.0.to_owned()).collect();
209                    slides.push(slide);
210                }
211                // TODO sub_ptr https://github.com/rust-lang/rust/issues/95892
212                start = (text.0.as_ptr() as usize - on.as_ptr() as usize) + text.0.len();
213            }
214
215            let raw_level = level as usize - 1;
216            if header_chain.len() < raw_level {
217                header_chain.extend((header_chain.len()..raw_level).map(|_| RawText("")));
218            } else {
219                let _ = header_chain.drain(raw_level..);
220            }
221            header_chain.push(text);
222        }
223    });
224
225    {
226        let content = &on[start..];
227        if !content.trim().is_empty() {
228            content.clone_into(&mut current_slide.markdown_content);
229            current_slide.location = header_chain.iter().map(|link| link.0.to_owned()).collect();
230            slides.push(current_slide);
231        }
232    }
233
234    slides
235}
236
237pub mod lexical_analysis {
238    use super::parse;
239
240    pub trait LexicalAnalyser {
241        fn word(&mut self, word: &str);
242
243        /// **WARNING** called with decoration
244        fn sentence(&mut self, sentence: &str);
245
246        /// **WARNING** called with decoration
247        fn paragraph(&mut self, paragraph: &str);
248    }
249
250    pub fn lexical_analysis(on: &str, analyser: &mut impl LexicalAnalyser) {
251        fn narrow_word(word: &str) -> &str {
252            let word = word.trim();
253            let word = word.strip_prefix('(').unwrap_or(word);
254            let word = word.strip_suffix('.').unwrap_or(word);
255            let word = word.strip_suffix(',').unwrap_or(word);
256            let word = word.strip_suffix(')').unwrap_or(word);
257            word
258        }
259
260        let _result = parse(on, |element| {
261            if let Some(text) = element.inner_paragraph_raw() {
262                analyser.paragraph(text);
263            }
264
265            if let Some(text) = element.parts_like() {
266                for sentence in text.0.split('.') {
267                    analyser.sentence(sentence);
268                }
269
270                for part in text.parts() {
271                    for word in part.no_decoration().split(&[' ', ',', '.', '!', '?']) {
272                        let word = narrow_word(word);
273                        if !word.is_empty() {
274                            analyser.word(word);
275                        }
276                    }
277                }
278            } else {
279                // Might be missing here
280            }
281        });
282    }
283}