simple_md_ul_parser/
lib.rs

1/* Notes:
2 * - RootNode is a separate struct to avoid accidental creation of NonRootNodes without a value.
3 * - Does not yet support checkmarks, but this is planned.
4 * - Assumes \n line endings.
5 * - Does not adhere to the CommonMark spec for lists and list items yet.
6 *   - Should parse **intended** types of lists acceptably well. */
7
8use std::fmt;
9
10#[derive(Debug, PartialEq, Eq)]
11pub struct ParseError {}
12
13impl ParseError {
14    fn new() -> Self {
15        ParseError {}
16    }
17}
18
19#[derive(Debug, PartialEq, Eq, Clone)]
20pub struct RootNode {
21    pub children: Vec<Box<NonRootNode>>,
22}
23
24pub type TreeIdentifierVec = Vec<usize>;
25
26impl RootNode {
27    pub fn new(children: Vec<NonRootNode>) -> Self {
28        RootNode {
29            children: children.into_iter().map(|n| Box::new(n)).collect(),
30        }
31    }
32
33    pub fn parse(text: &str) -> Result<Self, ParseError> {
34        if text == "" {
35            return Ok(RootNode::new(vec![]));
36        } else if text.starts_with("- ") {
37            let mut potential_list_items: Vec<_> = text.split("\n- ").collect();
38            potential_list_items[0] = potential_list_items[0]
39                .strip_prefix("- ")
40                .expect("Already checked for presence of \"- \" so should be okay to strip.");
41            /* Doing it procedurally instead of with map,
42             * because I don't think map can make use of `?` */
43            let mut children = vec![];
44            for item in potential_list_items {
45                let item = NonRootNode::parse(item, 2)?;
46                children.push(item);
47            }
48            return Ok(RootNode::new(children));
49        } else {
50            Err(ParseError {})
51        }
52    }
53
54    // TODO: want to take into account invalid TreeIdentifierVec, use Result
55    pub fn toggle(&mut self, mut identifier: TreeIdentifierVec) {
56        if !identifier.is_empty() {
57            let head = identifier[0];
58            identifier.swap_remove(0);
59            self.children[head].toggle(identifier);
60        }
61    }
62}
63
64impl fmt::Display for RootNode {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        let child_representations = self.children.iter().map(|c| c.format_with_indentation(0));
67        write!(
68            f,
69            "{}",
70            child_representations.collect::<Vec<_>>().join("\n")
71        )
72    }
73}
74
75#[derive(Debug, PartialEq, Eq, Clone, Copy)]
76pub enum CollapseState {
77    AlwaysOpen,
78    Open,
79    Closed,
80}
81
82#[derive(Debug, PartialEq, Eq, Clone)]
83pub struct NonRootNode {
84    pub value: String,
85    pub children: Vec<Box<NonRootNode>>,
86    pub collapse_state: CollapseState,
87}
88
89impl NonRootNode {
90    pub fn leaf(value: String, collapse_state: CollapseState) -> Self {
91        NonRootNode {
92            value,
93            children: vec![],
94            collapse_state,
95        }
96    }
97
98    pub fn internal(
99        value: String,
100        children: Vec<NonRootNode>,
101        collapse_state: CollapseState,
102    ) -> Self {
103        NonRootNode {
104            value,
105            children: children.into_iter().map(|n| Box::new(n)).collect(),
106            collapse_state,
107        }
108    }
109
110    /// Parses one list item, i.e. a non-root node.
111    ///
112    /// This assumes the initial "- " for this item has already been stripped.
113    /// Optional [ ] or [x] should still be present at top level.
114    /// Top-level indentation means actual content (after "- ") starts in this column.
115    pub fn parse(text: &str, top_level_indentation: usize) -> Result<Self, ParseError> {
116        // may want different name than top_level_indentation?
117        // e.g. contents_start_column ?
118        // or maybe collapse_state_begins_at_column
119        let child_prefix = format!("\n{}- ", " ".repeat(top_level_indentation));
120        let mut split_at_child_delim: Vec<_> = text.split(&child_prefix).collect();
121        let collapse_state = if split_at_child_delim[0].starts_with("[ ] ") {
122            CollapseState::Closed
123        } else if split_at_child_delim[0].starts_with("[x] ") {
124            CollapseState::Open
125        } else {
126            CollapseState::AlwaysOpen
127        };
128        match collapse_state {
129            CollapseState::Closed | CollapseState::Open => {
130                // drop [, space, ], space
131                split_at_child_delim[0] = &split_at_child_delim[0][4..]
132            }
133            _ => {}
134        };
135        let continuation_indentation = match collapse_state {
136            CollapseState::AlwaysOpen => top_level_indentation,
137            CollapseState::Closed | CollapseState::Open => top_level_indentation + 4,
138        };
139        // to deal with trailing newline in files, use split_terminator over split
140        let parent_components: Vec<_> = split_at_child_delim[0].split_terminator("\n").collect();
141        // for some reason, this includes a "" ?
142        let mut trimmed_components = vec![parent_components[0].trim()];
143        for component in &parent_components[1..] {
144            // using usizes, so subtracting is a risk
145            // underflow can even happen with well-formed lists
146            let sub_result = component.len().checked_sub(continuation_indentation);
147            if Some(component.trim_start_matches(" ").len()) != sub_result {
148                return Err(ParseError {});
149            }
150            trimmed_components.push(component.trim());
151        }
152        let parent = trimmed_components.join(" ");
153        let mut children = vec![];
154        for item in &split_at_child_delim[1..] {
155            let item = NonRootNode::parse(item, top_level_indentation + 2)?;
156            children.push(item);
157        }
158        Ok(NonRootNode::internal(
159            parent.to_string(),
160            children,
161            collapse_state,
162        ))
163    }
164
165    // TODO: want to take into account invalid TreeIdentifierVec, use Result
166    pub fn toggle(&mut self, mut identifier: TreeIdentifierVec) {
167        if identifier.is_empty() {
168            match self.collapse_state {
169                CollapseState::Open => self.collapse_state = CollapseState::Closed,
170                CollapseState::Closed => self.collapse_state = CollapseState::Open,
171                _ => {}
172            }
173        } else {
174            let head = identifier[0];
175            identifier.swap_remove(0);
176            self.children[head].toggle(identifier);
177        }
178    }
179
180    fn format_with_indentation(&self, indentation: usize) -> String {
181        let child_representations = self
182            .children
183            .iter()
184            .map(|c| c.format_with_indentation(indentation + 2))
185            .collect::<Vec<_>>();
186        let indentation = " ".repeat(indentation);
187        let mut child_block = String::new();
188        if !child_representations.is_empty() {
189            child_block.push_str("\n");
190        }
191        child_block.push_str(&child_representations.join("\n"));
192        let state_marker = match self.collapse_state {
193            CollapseState::Open => "[x] ",
194            CollapseState::Closed => "[ ] ",
195            CollapseState::AlwaysOpen => "",
196        };
197        format!("{}- {}{}{}", indentation, state_marker, self.value, child_block)
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn empty_list() {
207        let text = "";
208        assert_eq!(RootNode::parse(text), Ok(RootNode::new(vec![])));
209    }
210
211    #[test]
212    fn non_list_structure() {
213        let text = "abc";
214        assert_eq!(RootNode::parse(text), Err(ParseError::new()))
215    }
216
217    #[test]
218    fn unsupported_list_structure() {
219        let text = "* a\n* b";
220        assert_eq!(RootNode::parse(text), Err(ParseError::new()))
221    }
222
223    #[test]
224    fn flat_list_of_single_line_strings() {
225        let text = "- a\n- b\n- c"; // may be better to use verbatim type of notation?
226        assert_eq!(
227            RootNode::parse(text),
228            Ok(RootNode::new(vec![
229                NonRootNode::leaf("a".to_string(), CollapseState::AlwaysOpen),
230                NonRootNode::leaf("b".to_string(), CollapseState::AlwaysOpen),
231                NonRootNode::leaf("c".to_string(), CollapseState::AlwaysOpen)
232            ]))
233        );
234    }
235
236    #[test]
237    fn flat_list_of_single_line_strings_three_types() {
238        let text = "- [ ] a\n- [x] b\n- c"; // may be better to use verbatim type of notation?
239        assert_eq!(
240            RootNode::parse(text),
241            Ok(RootNode::new(vec![
242                NonRootNode::leaf("a".to_string(), CollapseState::Closed),
243                NonRootNode::leaf("b".to_string(), CollapseState::Open),
244                NonRootNode::leaf("c".to_string(), CollapseState::AlwaysOpen)
245            ]))
246        );
247    }
248
249    #[test]
250    fn nested_list_of_single_line_strings_various_types() {
251        let text = "- a\n  - [ ] b\n    - [x] c\n      - [ ] d\n        - e"; // may be better to use verbatim type of notation?
252        assert_eq!(
253            RootNode::parse(text),
254            Ok(RootNode::new(vec![NonRootNode::internal(
255                "a".to_string(),
256                vec![NonRootNode::internal(
257                    "b".to_string(),
258                    vec![NonRootNode::internal(
259                        "c".to_string(),
260                        vec![NonRootNode::internal(
261                            "d".to_string(),
262                            vec![NonRootNode::leaf(
263                                "e".to_string(),
264                                CollapseState::AlwaysOpen
265                            )],
266                            CollapseState::Closed
267                        )],
268                        CollapseState::Open
269                    )],
270                    CollapseState::Closed
271                )],
272                CollapseState::AlwaysOpen
273            )]))
274        );
275    }
276
277    #[test]
278    fn nested_list_of_single_line_strings() {
279        let text = "- a\n  - b\n- c\n  - d";
280        assert_eq!(
281            RootNode::parse(text),
282            Ok(RootNode::new(vec![
283                NonRootNode::internal(
284                    "a".to_string(),
285                    vec![NonRootNode::leaf(
286                        "b".to_string(),
287                        CollapseState::AlwaysOpen
288                    ),],
289                    CollapseState::AlwaysOpen
290                ),
291                NonRootNode::internal(
292                    "c".to_string(),
293                    vec![NonRootNode::leaf(
294                        "d".to_string(),
295                        CollapseState::AlwaysOpen
296                    ),],
297                    CollapseState::AlwaysOpen
298                )
299            ]))
300        );
301    }
302
303    #[test]
304    fn nested_list_of_single_line_strings_insufficient_spacing() {
305        let text = "- a\n  - b\n- c\n - d";
306        assert_eq!(RootNode::parse(text), Err(ParseError::new()))
307    }
308
309    #[test]
310    fn nested_list_of_single_line_strings_excessive_spacing() {
311        // three spaces before the final element instead of 2
312        let text = "- a\n  - b\n- c\n   - d";
313        assert_eq!(RootNode::parse(text), Err(ParseError::new()))
314    }
315
316    #[test]
317    #[ignore] // currently out of scope
318    fn nested_list_of_single_line_strings_bad_prefix() {
319        let text = "- a\n  - b\n- c\n  -d";
320        // splitting on "\n(specific number of spaces)- "
321        // does not split here
322        assert_eq!(RootNode::parse(text), Err(ParseError::new()))
323    }
324
325    #[test]
326    fn flat_list_with_split_item() {
327        let text = "- abc\n  def";
328        assert_eq!(
329            RootNode::parse(text),
330            Ok(RootNode::new(vec![NonRootNode::leaf(
331                "abc def".to_string(),
332                CollapseState::AlwaysOpen
333            )]))
334        )
335    }
336
337    #[test]
338    fn flat_list_with_split_items() {
339        let text = "- abc\n  def\n- ghi\n  jkl\n  mno";
340        assert_eq!(
341            RootNode::parse(text),
342            Ok(RootNode::new(vec![
343                NonRootNode::leaf("abc def".to_string(), CollapseState::AlwaysOpen),
344                NonRootNode::leaf("ghi jkl mno".to_string(), CollapseState::AlwaysOpen),
345            ]))
346        )
347    }
348
349    #[test]
350    fn nested_list_with_split_items() {
351        let text = "- abc\n  def\n- g\n  - hij\n    klm";
352        assert_eq!(
353            RootNode::parse(text),
354            Ok(RootNode::new(vec![
355                NonRootNode::leaf("abc def".to_string(), CollapseState::AlwaysOpen),
356                NonRootNode::internal(
357                    "g".to_string(),
358                    vec![NonRootNode::leaf(
359                        "hij klm".to_string(),
360                        CollapseState::AlwaysOpen
361                    )],
362                    CollapseState::AlwaysOpen
363                )
364            ]))
365        )
366    }
367
368    #[test]
369    fn from_file() {
370        let text = std::fs::read_to_string("./instructions.md").expect("File should be present.");
371        assert_eq!(
372            RootNode::parse(&text),
373            Ok(RootNode::new(vec![NonRootNode::internal(
374                "*do your thing*".to_string(),
375                vec![NonRootNode::internal(
376                    "good luck!".to_string(),
377                    vec![NonRootNode::leaf(
378                        "this should be open at startup".to_string(),
379                        CollapseState::AlwaysOpen
380                    )],
381                    CollapseState::Open
382                )],
383                CollapseState::AlwaysOpen
384            )]))
385        )
386    }
387
388    #[test]
389    fn toggle_nested_list_of_single_line_strings_various_types() {
390        let mut tree_before = RootNode::parse("- [ ] a\n  - [x] b\n- c\n  - [ ] d")
391            .expect("Structure should be okay.");
392        tree_before.toggle(vec![0]);
393        tree_before.toggle(vec![0, 0]);
394        tree_before.toggle(vec![1]);
395        tree_before.toggle(vec![1, 0]);
396        let mut tree_after = RootNode::parse("- [x] a\n  - [ ] b\n- c\n  - [x] d")
397            .expect("Structure should be okay.");
398        assert_eq!(tree_before, tree_after);
399    }
400
401    #[test]
402    fn display_nested_list_without_branching() {
403        let mut tree_before = RootNode::parse("- [ ] a\n  - [x] b\n    - c\n      - [ ] d")
404            .expect("Structure should be okay.");
405        assert_eq!(
406            format!("{}", tree_before),
407            "- [ ] a\n  - [x] b\n    - c\n      - [ ] d"
408        );
409    }
410
411    #[test]
412    fn display_nested_list_with_branching() {
413        let mut tree_before = RootNode::parse("- [ ] a\n  - [x] b\n- c\n  - [ ] d")
414            .expect("Structure should be okay.");
415        assert_eq!(
416            format!("{}", tree_before),
417            "- [ ] a\n  - [x] b\n- c\n  - [ ] d"
418        );
419    }
420}