simple_yaml_parser/
lib.rs

1#[derive(Debug, Clone, PartialEq, Eq)]
2pub enum YAMLKey<'a> {
3    Slice(&'a str),
4    Index(usize),
5}
6
7#[derive(Debug, PartialEq, Eq)]
8pub enum RootYAMLValue<'a> {
9    String(&'a str),
10    MultilineString(MultilineString<'a>),
11    Number(&'a str),
12    True,
13    False,
14    // Null,
15}
16
17#[derive(Debug)]
18pub enum YAMLParseErrorReason {
19    ExpectedColon,
20    ExpectedEndOfValue,
21    ExpectedBracket,
22    ExpectedTrueFalseNull,
23    ExpectedValue,
24}
25
26#[derive(Debug)]
27pub struct YAMLParseError {
28    pub at: usize,
29    pub reason: YAMLParseErrorReason,
30}
31
32impl std::error::Error for YAMLParseError {}
33
34impl std::fmt::Display for YAMLParseError {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
36        f.write_fmt(format_args!(
37            "YAMLParseError: {:?} at {:?}",
38            self.reason, self.at
39        ))
40    }
41}
42
43/// If you want to return early (not parse the whole input) use [`parse_with_exit_signal`]
44///
45/// # Errors
46/// Returns an error if it tries to parse invalid YAML input
47pub fn parse<'a>(
48    on: &'a str,
49    mut cb: impl for<'b> FnMut(&'b [YAMLKey<'a>], RootYAMLValue<'a>),
50) -> Result<(), YAMLParseError> {
51    parse_with_exit_signal(
52        on,
53        |k, v| {
54            cb(k, v);
55            false
56        },
57        &ParseOptions::default(),
58    )
59}
60
61/// For `|` and `>` based values
62#[derive(Debug, PartialEq, Eq)]
63pub struct MultilineString<'a> {
64    on: &'a str,
65    /// replace new lines with spaces. Done using `>`
66    collapse: bool,
67    /// with `|+` etc
68    preserve_leading_whitespace: bool,
69}
70
71pub struct ParseOptions {
72    pub indent_size: usize,
73}
74
75impl Default for ParseOptions {
76    fn default() -> Self {
77        Self { indent_size: 2 }
78    }
79}
80
81/// # Errors
82/// Returns an error if it tries to parse invalid YAML input
83#[allow(clippy::too_many_lines)]
84pub fn parse_with_exit_signal<'a>(
85    on: &'a str,
86    mut cb: impl for<'b> FnMut(&'b [YAMLKey<'a>], RootYAMLValue<'a>) -> bool,
87    options: &ParseOptions,
88) -> Result<(), YAMLParseError> {
89    enum State {
90        Value,
91        Identifier,
92        ListItem,
93        Multiline {
94            collapse: bool,
95            preserve_leading_whitespace: bool,
96            indent: usize,
97        },
98        Skip,
99    }
100
101    let chars = on.char_indices();
102
103    let mut key_chain = Vec::new();
104    let mut state = State::Identifier;
105    let mut list_idx: usize = 0;
106    let mut indent = 0;
107    let mut start = 0;
108
109    for (idx, chr) in chars {
110        match state {
111            State::Value => {
112                let rest_of_line = on[start..idx].trim();
113                if let (true, '-') = (rest_of_line.is_empty(), chr) {
114                    state = State::ListItem;
115                    start = idx + '-'.len_utf8();
116                } else if let '\n' = chr {
117                    if rest_of_line.is_empty() {
118                        // ready for identifier
119                        state = State::Skip;
120                    } else {
121                        let modifier = match rest_of_line {
122                            "|" => Some((true, false)),
123                            ">" => Some((false, false)),
124                            _ => None,
125                        };
126                        if let Some((collapse, preserve_leading_whitespace)) = modifier {
127                            state = State::Multiline {
128                                collapse,
129                                preserve_leading_whitespace,
130                                indent,
131                            };
132                            start = idx;
133                        } else {
134                            let value = on[start..idx].trim();
135                            let value = match value {
136                                "true" => RootYAMLValue::True,
137                                "false" => RootYAMLValue::False,
138                                value => RootYAMLValue::String(value),
139                            };
140                            cb(&key_chain, value);
141                            key_chain.pop();
142                            state = State::Skip;
143                        }
144                    }
145                    indent = 0;
146                }
147            }
148            State::Multiline {
149                collapse,
150                preserve_leading_whitespace,
151                indent: current_indent,
152            } => {
153                if let '\n' = chr {
154                    let upcoming_line = &on[(idx + '\n'.len_utf8())..];
155                    let mut upcoming_indent = 0;
156                    let mut is_empty = false;
157                    for chr in upcoming_line.chars() {
158                        if let '\n' = chr {
159                            is_empty = true;
160                            break;
161                        }
162                        if let '\t' | ' ' = chr {
163                            upcoming_indent += 1;
164                        } else {
165                            break;
166                        }
167                    }
168                    if !is_empty && upcoming_indent <= current_indent {
169                        let multiline_string = MultilineString {
170                            on: &on[start..idx],
171                            collapse,
172                            preserve_leading_whitespace,
173                        };
174                        cb(&key_chain, RootYAMLValue::MultilineString(multiline_string));
175                        key_chain.pop();
176                        state = State::Skip;
177                        indent = 0;
178                    }
179                }
180            }
181            State::Identifier => {
182                if let ':' = chr {
183                    let key = YAMLKey::Slice(on[start..idx].trim());
184                    let current_level = indent / options.indent_size;
185                    let keys = key_chain
186                        .iter()
187                        .filter(|key| matches!(key, YAMLKey::Slice(_)))
188                        .count();
189                    if current_level < keys {
190                        drop(key_chain.drain(current_level..));
191                        match key_chain.last() {
192                            Some(YAMLKey::Index(idx)) => {
193                                list_idx = *idx;
194                            }
195                            _ => {
196                                list_idx = 0;
197                            }
198                        }
199                    }
200                    key_chain.push(key);
201                    state = State::Value;
202                    start = idx + ':'.len_utf8();
203                }
204                // TODO whitespace warning etc...?
205            }
206            State::ListItem => {
207                if let ':' = chr {
208                    let current_level = indent / options.indent_size;
209                    if current_level < key_chain.len() {
210                        drop(key_chain.drain((current_level + 1)..));
211                    }
212                    key_chain.push(YAMLKey::Index(list_idx));
213                    key_chain.push(YAMLKey::Slice(on[start..idx].trim()));
214                    state = State::Value;
215                    start = idx + ':'.len_utf8();
216                    list_idx += 1;
217                }
218                if let '\n' = chr {
219                    key_chain.push(YAMLKey::Index(list_idx));
220                    let value = on[start..idx].trim();
221                    let value = match value {
222                        "true" => RootYAMLValue::True,
223                        "false" => RootYAMLValue::False,
224                        value => RootYAMLValue::String(value),
225                    };
226                    cb(&key_chain, value);
227                    key_chain.pop();
228                    list_idx += 1;
229                    state = State::Skip;
230                    indent = 0;
231                }
232            }
233            State::Skip => {
234                if let '-' = chr {
235                    state = State::ListItem;
236                    start = idx + '-'.len_utf8();
237                } else if let '\t' = chr {
238                    indent += options.indent_size;
239                } else if let ' ' = chr {
240                    indent += 1;
241                } else if !chr.is_whitespace() {
242                    state = State::Identifier;
243                    start = idx;
244                }
245            }
246        }
247    }
248
249    // TODO left over stuff here
250
251    Ok(())
252}