1use nom::{
2 branch::alt,
3 bytes::complete::{take_while, take_while1},
4 character::complete::{char, line_ending},
5 combinator::eof,
6 multi::{many0, many1},
7 sequence::{preceded, terminated},
8 IResult, Parser,
9};
10use std::cell::RefCell;
11
12#[derive(Debug, Clone, PartialEq)]
13pub struct Link {
14 pub id: Option<String>,
15 pub values: Vec<Link>,
16 pub children: Vec<Link>,
17 pub is_indented_id: bool,
18}
19
20impl Link {
21 pub fn new_singlet(id: String) -> Self {
22 Link {
23 id: Some(id),
24 values: vec![],
25 children: vec![],
26 is_indented_id: false,
27 }
28 }
29
30 pub fn new_indented_id(id: String) -> Self {
31 Link {
32 id: Some(id),
33 values: vec![],
34 children: vec![],
35 is_indented_id: true,
36 }
37 }
38
39 pub fn new_value(values: Vec<Link>) -> Self {
40 Link {
41 id: None,
42 values,
43 children: vec![],
44 is_indented_id: false,
45 }
46 }
47
48 pub fn new_link(id: Option<String>, values: Vec<Link>) -> Self {
49 Link {
50 id,
51 values,
52 children: vec![],
53 is_indented_id: false,
54 }
55 }
56
57 pub fn with_children(mut self, children: Vec<Link>) -> Self {
58 self.children = children;
59 self
60 }
61}
62
63pub struct ParserState {
64 indentation_stack: RefCell<Vec<usize>>,
65 base_indentation: RefCell<Option<usize>>,
66}
67
68impl Default for ParserState {
69 fn default() -> Self {
70 Self::new()
71 }
72}
73
74impl ParserState {
75 pub fn new() -> Self {
76 ParserState {
77 indentation_stack: RefCell::new(vec![0]),
78 base_indentation: RefCell::new(None),
79 }
80 }
81
82 pub fn set_base_indentation(&self, indent: usize) {
83 let mut base = self.base_indentation.borrow_mut();
84 if base.is_none() {
85 *base = Some(indent);
86 }
87 }
88
89 pub fn get_base_indentation(&self) -> usize {
90 self.base_indentation.borrow().unwrap_or(0)
91 }
92
93 pub fn normalize_indentation(&self, indent: usize) -> usize {
94 let base = self.get_base_indentation();
95 indent.saturating_sub(base)
96 }
97
98 pub fn push_indentation(&self, indent: usize) {
99 self.indentation_stack.borrow_mut().push(indent);
100 }
101
102 pub fn pop_indentation(&self) {
103 let mut stack = self.indentation_stack.borrow_mut();
104 if stack.len() > 1 {
105 stack.pop();
106 }
107 }
108
109 pub fn current_indentation(&self) -> usize {
110 *self.indentation_stack.borrow().last().unwrap_or(&0)
111 }
112
113 pub fn check_indentation(&self, indent: usize) -> bool {
114 indent >= self.current_indentation()
115 }
116}
117
118fn is_whitespace_char(c: char) -> bool {
119 c == ' ' || c == '\t' || c == '\n' || c == '\r'
120}
121
122fn is_horizontal_whitespace(c: char) -> bool {
123 c == ' ' || c == '\t'
124}
125
126fn is_reference_char(c: char) -> bool {
127 !is_whitespace_char(c) && c != '(' && c != ':' && c != ')'
128}
129
130fn horizontal_whitespace(input: &str) -> IResult<&str, &str> {
131 take_while(is_horizontal_whitespace)(input)
132}
133
134fn whitespace(input: &str) -> IResult<&str, &str> {
135 take_while(is_whitespace_char)(input)
136}
137
138fn simple_reference(input: &str) -> IResult<&str, String> {
139 take_while1(is_reference_char)
140 .map(|s: &str| s.to_string())
141 .parse(input)
142}
143
144fn parse_multi_quote_string(
147 input: &str,
148 quote_char: char,
149 quote_count: usize,
150) -> IResult<&str, String> {
151 let open_close = quote_char.to_string().repeat(quote_count);
152 let escape_seq = quote_char.to_string().repeat(quote_count * 2);
153 let escape_val = quote_char.to_string().repeat(quote_count);
154
155 if !input.starts_with(&open_close) {
157 return Err(nom::Err::Error(nom::error::Error::new(
158 input,
159 nom::error::ErrorKind::Tag,
160 )));
161 }
162
163 let mut remaining = &input[open_close.len()..];
164 let mut content = String::new();
165
166 loop {
167 if remaining.is_empty() {
168 return Err(nom::Err::Error(nom::error::Error::new(
169 input,
170 nom::error::ErrorKind::Tag,
171 )));
172 }
173
174 if remaining.starts_with(&escape_seq) {
176 content.push_str(&escape_val);
177 remaining = &remaining[escape_seq.len()..];
178 continue;
179 }
180
181 if remaining.starts_with(&open_close) {
183 let after_close = &remaining[open_close.len()..];
184 if after_close.is_empty() || !after_close.starts_with(quote_char) {
186 return Ok((after_close, content));
187 }
188 }
189
190 let c = remaining.chars().next().unwrap();
192 content.push(c);
193 remaining = &remaining[c.len_utf8()..];
194 }
195}
196
197fn parse_dynamic_quote_string(input: &str, quote_char: char) -> IResult<&str, String> {
200 let quote_count = input.chars().take_while(|&c| c == quote_char).count();
202
203 if quote_count == 0 {
204 return Err(nom::Err::Error(nom::error::Error::new(
205 input,
206 nom::error::ErrorKind::Tag,
207 )));
208 }
209
210 parse_multi_quote_string(input, quote_char, quote_count)
211}
212
213fn double_quoted_dynamic(input: &str) -> IResult<&str, String> {
214 parse_dynamic_quote_string(input, '"')
215}
216
217fn single_quoted_dynamic(input: &str) -> IResult<&str, String> {
218 parse_dynamic_quote_string(input, '\'')
219}
220
221fn backtick_quoted_dynamic(input: &str) -> IResult<&str, String> {
222 parse_dynamic_quote_string(input, '`')
223}
224
225fn reference(input: &str) -> IResult<&str, String> {
226 alt((
229 double_quoted_dynamic,
230 single_quoted_dynamic,
231 backtick_quoted_dynamic,
232 simple_reference,
233 ))
234 .parse(input)
235}
236
237fn eol(input: &str) -> IResult<&str, &str> {
238 alt((
239 preceded(horizontal_whitespace, line_ending),
240 preceded(horizontal_whitespace, eof),
241 ))
242 .parse(input)
243}
244
245fn reference_or_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
246 alt((
247 |i| multi_line_any_link(i, state),
248 reference.map(Link::new_singlet),
249 ))
250 .parse(input)
251}
252
253fn multi_line_value_and_whitespace<'a>(
254 input: &'a str,
255 state: &ParserState,
256) -> IResult<&'a str, Link> {
257 terminated(|i| reference_or_link(i, state), whitespace).parse(input)
258}
259
260fn multi_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
261 preceded(
262 whitespace,
263 many0(|i| multi_line_value_and_whitespace(i, state)),
264 )
265 .parse(input)
266}
267
268fn single_line_value_and_whitespace<'a>(
269 input: &'a str,
270 state: &ParserState,
271) -> IResult<&'a str, Link> {
272 preceded(horizontal_whitespace, |i| reference_or_link(i, state)).parse(input)
273}
274
275fn single_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
276 many1(|i| single_line_value_and_whitespace(i, state)).parse(input)
277}
278
279fn single_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
280 (
281 horizontal_whitespace,
282 reference,
283 horizontal_whitespace,
284 char(':'),
285 |i| single_line_values(i, state),
286 )
287 .map(|(_, id, _, _, values)| Link::new_link(Some(id), values))
288 .parse(input)
289}
290
291fn multi_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
292 (
293 char('('),
294 whitespace,
295 reference,
296 whitespace,
297 char(':'),
298 |i| multi_line_values(i, state),
299 whitespace,
300 char(')'),
301 )
302 .map(|(_, _, id, _, _, values, _, _)| Link::new_link(Some(id), values))
303 .parse(input)
304}
305
306fn single_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
307 (|i| single_line_values(i, state))
308 .map(|values| {
309 if values.len() == 1
310 && values[0].id.is_some()
311 && values[0].values.is_empty()
312 && values[0].children.is_empty()
313 {
314 Link::new_singlet(values[0].id.clone().unwrap())
315 } else {
316 Link::new_value(values)
317 }
318 })
319 .parse(input)
320}
321
322fn indented_id_link<'a>(input: &'a str, _state: &ParserState) -> IResult<&'a str, Link> {
323 (reference, horizontal_whitespace, char(':'), eol)
324 .map(|(id, _, _, _)| Link::new_indented_id(id))
325 .parse(input)
326}
327
328fn multi_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
329 (
330 char('('),
331 |i| multi_line_values(i, state),
332 whitespace,
333 char(')'),
334 )
335 .map(|(_, values, _, _)| {
336 if values.len() == 1
337 && values[0].id.is_some()
338 && values[0].values.is_empty()
339 && values[0].children.is_empty()
340 {
341 Link::new_singlet(values[0].id.clone().unwrap())
342 } else {
343 Link::new_value(values)
344 }
345 })
346 .parse(input)
347}
348
349fn multi_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
350 alt((
351 |i| multi_line_value_link(i, state),
352 |i| multi_line_link(i, state),
353 ))
354 .parse(input)
355}
356
357fn single_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
358 alt((
359 terminated(|i| single_line_link(i, state), eol),
360 terminated(|i| single_line_value_link(i, state), eol),
361 ))
362 .parse(input)
363}
364
365fn any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
366 alt((
367 terminated(|i| multi_line_any_link(i, state), eol),
368 |i| indented_id_link(i, state),
369 |i| single_line_any_link(i, state),
370 ))
371 .parse(input)
372}
373
374fn count_indentation(input: &str) -> IResult<&str, usize> {
375 take_while(|c| c == ' ').map(|s: &str| s.len()).parse(input)
376}
377
378fn push_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
379 let (input, spaces) = count_indentation(input)?;
380 let normalized_spaces = state.normalize_indentation(spaces);
381 let current = state.current_indentation();
382
383 if normalized_spaces > current {
384 state.push_indentation(normalized_spaces);
385 Ok((input, ()))
386 } else {
387 Err(nom::Err::Error(nom::error::Error::new(
388 input,
389 nom::error::ErrorKind::Verify,
390 )))
391 }
392}
393
394fn check_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
395 let (input, spaces) = count_indentation(input)?;
396 let normalized_spaces = state.normalize_indentation(spaces);
397
398 if state.check_indentation(normalized_spaces) {
399 Ok((input, ()))
400 } else {
401 Err(nom::Err::Error(nom::error::Error::new(
402 input,
403 nom::error::ErrorKind::Verify,
404 )))
405 }
406}
407
408fn element<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
409 let (input, link) = any_link(input, state)?;
410
411 if let Ok((input, _)) = push_indentation(input, state) {
412 let (input, children) = links(input, state)?;
413 Ok((input, link.with_children(children)))
414 } else {
415 Ok((input, link))
416 }
417}
418
419fn first_line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
420 let (_, spaces) = count_indentation(input)?;
422 state.set_base_indentation(spaces);
423 element(input, state)
424}
425
426fn line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
427 preceded(|i| check_indentation(i, state), |i| element(i, state)).parse(input)
428}
429
430fn links<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
431 let (input, first) = first_line(input, state)?;
432 let (input, rest) = many0(|i| line(i, state)).parse(input)?;
433
434 state.pop_indentation();
435
436 let mut result = vec![first];
437 result.extend(rest);
438 Ok((input, result))
439}
440
441pub fn parse_document(input: &str) -> IResult<&str, Vec<Link>> {
442 let state = ParserState::new();
443
444 let input = input.trim_start_matches(['\n', '\r']);
446
447 if input.trim().is_empty() {
449 return Ok(("", vec![]));
450 }
451
452 let (input, result) = links(input, &state)?;
453 let (input, _) = whitespace(input)?;
454 let (input, _) = eof(input)?;
455
456 Ok((input, result))
457}