links_notation/
parser.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{take_while, take_while1},
4    character::complete::{char, line_ending},
5    combinator::eof,
6    multi::{many0, many1},
7    sequence::{preceded, terminated},
8    IResult, Parser,
9};
10use std::cell::RefCell;
11
12#[derive(Debug, Clone, PartialEq)]
13pub struct Link {
14    pub id: Option<String>,
15    pub values: Vec<Link>,
16    pub children: Vec<Link>,
17    pub is_indented_id: bool,
18}
19
20impl Link {
21    pub fn new_singlet(id: String) -> Self {
22        Link {
23            id: Some(id),
24            values: vec![],
25            children: vec![],
26            is_indented_id: false,
27        }
28    }
29
30    pub fn new_indented_id(id: String) -> Self {
31        Link {
32            id: Some(id),
33            values: vec![],
34            children: vec![],
35            is_indented_id: true,
36        }
37    }
38
39    pub fn new_value(values: Vec<Link>) -> Self {
40        Link {
41            id: None,
42            values,
43            children: vec![],
44            is_indented_id: false,
45        }
46    }
47
48    pub fn new_link(id: Option<String>, values: Vec<Link>) -> Self {
49        Link {
50            id,
51            values,
52            children: vec![],
53            is_indented_id: false,
54        }
55    }
56
57    pub fn with_children(mut self, children: Vec<Link>) -> Self {
58        self.children = children;
59        self
60    }
61}
62
63pub struct ParserState {
64    indentation_stack: RefCell<Vec<usize>>,
65    base_indentation: RefCell<Option<usize>>,
66}
67
68impl Default for ParserState {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74impl ParserState {
75    pub fn new() -> Self {
76        ParserState {
77            indentation_stack: RefCell::new(vec![0]),
78            base_indentation: RefCell::new(None),
79        }
80    }
81
82    pub fn set_base_indentation(&self, indent: usize) {
83        let mut base = self.base_indentation.borrow_mut();
84        if base.is_none() {
85            *base = Some(indent);
86        }
87    }
88
89    pub fn get_base_indentation(&self) -> usize {
90        self.base_indentation.borrow().unwrap_or(0)
91    }
92
93    pub fn normalize_indentation(&self, indent: usize) -> usize {
94        let base = self.get_base_indentation();
95        indent.saturating_sub(base)
96    }
97
98    pub fn push_indentation(&self, indent: usize) {
99        self.indentation_stack.borrow_mut().push(indent);
100    }
101
102    pub fn pop_indentation(&self) {
103        let mut stack = self.indentation_stack.borrow_mut();
104        if stack.len() > 1 {
105            stack.pop();
106        }
107    }
108
109    pub fn current_indentation(&self) -> usize {
110        *self.indentation_stack.borrow().last().unwrap_or(&0)
111    }
112
113    pub fn check_indentation(&self, indent: usize) -> bool {
114        indent >= self.current_indentation()
115    }
116}
117
118fn is_whitespace_char(c: char) -> bool {
119    c == ' ' || c == '\t' || c == '\n' || c == '\r'
120}
121
122fn is_horizontal_whitespace(c: char) -> bool {
123    c == ' ' || c == '\t'
124}
125
126fn is_reference_char(c: char) -> bool {
127    !is_whitespace_char(c) && c != '(' && c != ':' && c != ')'
128}
129
130fn horizontal_whitespace(input: &str) -> IResult<&str, &str> {
131    take_while(is_horizontal_whitespace)(input)
132}
133
134fn whitespace(input: &str) -> IResult<&str, &str> {
135    take_while(is_whitespace_char)(input)
136}
137
138fn simple_reference(input: &str) -> IResult<&str, String> {
139    take_while1(is_reference_char)
140        .map(|s: &str| s.to_string())
141        .parse(input)
142}
143
144/// Parse a multi-quote string with a given quote character and count.
145/// For N quotes: opening = N quotes, closing = N quotes, escape = 2*N quotes -> N quotes
146fn parse_multi_quote_string(
147    input: &str,
148    quote_char: char,
149    quote_count: usize,
150) -> IResult<&str, String> {
151    let open_close = quote_char.to_string().repeat(quote_count);
152    let escape_seq = quote_char.to_string().repeat(quote_count * 2);
153    let escape_val = quote_char.to_string().repeat(quote_count);
154
155    // Check for opening quotes
156    if !input.starts_with(&open_close) {
157        return Err(nom::Err::Error(nom::error::Error::new(
158            input,
159            nom::error::ErrorKind::Tag,
160        )));
161    }
162
163    let mut remaining = &input[open_close.len()..];
164    let mut content = String::new();
165
166    loop {
167        if remaining.is_empty() {
168            return Err(nom::Err::Error(nom::error::Error::new(
169                input,
170                nom::error::ErrorKind::Tag,
171            )));
172        }
173
174        // Check for escape sequence (2*N quotes)
175        if remaining.starts_with(&escape_seq) {
176            content.push_str(&escape_val);
177            remaining = &remaining[escape_seq.len()..];
178            continue;
179        }
180
181        // Check for closing quotes (N quotes not followed by more quotes)
182        if remaining.starts_with(&open_close) {
183            let after_close = &remaining[open_close.len()..];
184            // Make sure this is exactly N quotes (not more)
185            if after_close.is_empty() || !after_close.starts_with(quote_char) {
186                return Ok((after_close, content));
187            }
188        }
189
190        // Take the next character
191        let c = remaining.chars().next().unwrap();
192        content.push(c);
193        remaining = &remaining[c.len_utf8()..];
194    }
195}
196
197/// Parse a quoted string with dynamically detected quote count.
198/// Counts opening quotes and uses that count for parsing.
199fn parse_dynamic_quote_string(input: &str, quote_char: char) -> IResult<&str, String> {
200    // Count opening quotes
201    let quote_count = input.chars().take_while(|&c| c == quote_char).count();
202
203    if quote_count == 0 {
204        return Err(nom::Err::Error(nom::error::Error::new(
205            input,
206            nom::error::ErrorKind::Tag,
207        )));
208    }
209
210    parse_multi_quote_string(input, quote_char, quote_count)
211}
212
213fn double_quoted_dynamic(input: &str) -> IResult<&str, String> {
214    parse_dynamic_quote_string(input, '"')
215}
216
217fn single_quoted_dynamic(input: &str) -> IResult<&str, String> {
218    parse_dynamic_quote_string(input, '\'')
219}
220
221fn backtick_quoted_dynamic(input: &str) -> IResult<&str, String> {
222    parse_dynamic_quote_string(input, '`')
223}
224
225fn reference(input: &str) -> IResult<&str, String> {
226    // Try quoted strings with dynamic quote detection (supports any N quotes)
227    // Then fall back to simple unquoted reference
228    alt((
229        double_quoted_dynamic,
230        single_quoted_dynamic,
231        backtick_quoted_dynamic,
232        simple_reference,
233    ))
234    .parse(input)
235}
236
237fn eol(input: &str) -> IResult<&str, &str> {
238    alt((
239        preceded(horizontal_whitespace, line_ending),
240        preceded(horizontal_whitespace, eof),
241    ))
242    .parse(input)
243}
244
245fn reference_or_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
246    alt((
247        |i| multi_line_any_link(i, state),
248        reference.map(Link::new_singlet),
249    ))
250    .parse(input)
251}
252
253fn multi_line_value_and_whitespace<'a>(
254    input: &'a str,
255    state: &ParserState,
256) -> IResult<&'a str, Link> {
257    terminated(|i| reference_or_link(i, state), whitespace).parse(input)
258}
259
260fn multi_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
261    preceded(
262        whitespace,
263        many0(|i| multi_line_value_and_whitespace(i, state)),
264    )
265    .parse(input)
266}
267
268fn single_line_value_and_whitespace<'a>(
269    input: &'a str,
270    state: &ParserState,
271) -> IResult<&'a str, Link> {
272    preceded(horizontal_whitespace, |i| reference_or_link(i, state)).parse(input)
273}
274
275fn single_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
276    many1(|i| single_line_value_and_whitespace(i, state)).parse(input)
277}
278
279fn single_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
280    (
281        horizontal_whitespace,
282        reference,
283        horizontal_whitespace,
284        char(':'),
285        |i| single_line_values(i, state),
286    )
287        .map(|(_, id, _, _, values)| Link::new_link(Some(id), values))
288        .parse(input)
289}
290
291fn multi_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
292    (
293        char('('),
294        whitespace,
295        reference,
296        whitespace,
297        char(':'),
298        |i| multi_line_values(i, state),
299        whitespace,
300        char(')'),
301    )
302        .map(|(_, _, id, _, _, values, _, _)| Link::new_link(Some(id), values))
303        .parse(input)
304}
305
306fn single_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
307    (|i| single_line_values(i, state))
308        .map(|values| {
309            if values.len() == 1
310                && values[0].id.is_some()
311                && values[0].values.is_empty()
312                && values[0].children.is_empty()
313            {
314                Link::new_singlet(values[0].id.clone().unwrap())
315            } else {
316                Link::new_value(values)
317            }
318        })
319        .parse(input)
320}
321
322fn indented_id_link<'a>(input: &'a str, _state: &ParserState) -> IResult<&'a str, Link> {
323    (reference, horizontal_whitespace, char(':'), eol)
324        .map(|(id, _, _, _)| Link::new_indented_id(id))
325        .parse(input)
326}
327
328fn multi_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
329    (
330        char('('),
331        |i| multi_line_values(i, state),
332        whitespace,
333        char(')'),
334    )
335        .map(|(_, values, _, _)| {
336            if values.len() == 1
337                && values[0].id.is_some()
338                && values[0].values.is_empty()
339                && values[0].children.is_empty()
340            {
341                Link::new_singlet(values[0].id.clone().unwrap())
342            } else {
343                Link::new_value(values)
344            }
345        })
346        .parse(input)
347}
348
349fn multi_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
350    alt((
351        |i| multi_line_value_link(i, state),
352        |i| multi_line_link(i, state),
353    ))
354    .parse(input)
355}
356
357fn single_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
358    alt((
359        terminated(|i| single_line_link(i, state), eol),
360        terminated(|i| single_line_value_link(i, state), eol),
361    ))
362    .parse(input)
363}
364
365fn any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
366    alt((
367        terminated(|i| multi_line_any_link(i, state), eol),
368        |i| indented_id_link(i, state),
369        |i| single_line_any_link(i, state),
370    ))
371    .parse(input)
372}
373
374fn count_indentation(input: &str) -> IResult<&str, usize> {
375    take_while(|c| c == ' ').map(|s: &str| s.len()).parse(input)
376}
377
378fn push_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
379    let (input, spaces) = count_indentation(input)?;
380    let normalized_spaces = state.normalize_indentation(spaces);
381    let current = state.current_indentation();
382
383    if normalized_spaces > current {
384        state.push_indentation(normalized_spaces);
385        Ok((input, ()))
386    } else {
387        Err(nom::Err::Error(nom::error::Error::new(
388            input,
389            nom::error::ErrorKind::Verify,
390        )))
391    }
392}
393
394fn check_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
395    let (input, spaces) = count_indentation(input)?;
396    let normalized_spaces = state.normalize_indentation(spaces);
397
398    if state.check_indentation(normalized_spaces) {
399        Ok((input, ()))
400    } else {
401        Err(nom::Err::Error(nom::error::Error::new(
402            input,
403            nom::error::ErrorKind::Verify,
404        )))
405    }
406}
407
408fn element<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
409    let (input, link) = any_link(input, state)?;
410
411    if let Ok((input, _)) = push_indentation(input, state) {
412        let (input, children) = links(input, state)?;
413        Ok((input, link.with_children(children)))
414    } else {
415        Ok((input, link))
416    }
417}
418
419fn first_line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
420    // Set base indentation from the first line
421    let (_, spaces) = count_indentation(input)?;
422    state.set_base_indentation(spaces);
423    element(input, state)
424}
425
426fn line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
427    preceded(|i| check_indentation(i, state), |i| element(i, state)).parse(input)
428}
429
430fn links<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
431    let (input, first) = first_line(input, state)?;
432    let (input, rest) = many0(|i| line(i, state)).parse(input)?;
433
434    state.pop_indentation();
435
436    let mut result = vec![first];
437    result.extend(rest);
438    Ok((input, result))
439}
440
441pub fn parse_document(input: &str) -> IResult<&str, Vec<Link>> {
442    let state = ParserState::new();
443
444    // Skip leading whitespace but preserve the line structure
445    let input = input.trim_start_matches(['\n', '\r']);
446
447    // Handle empty or whitespace-only documents
448    if input.trim().is_empty() {
449        return Ok(("", vec![]));
450    }
451
452    let (input, result) = links(input, &state)?;
453    let (input, _) = whitespace(input)?;
454    let (input, _) = eof(input)?;
455
456    Ok((input, result))
457}