links_notation/
parser.rs

1use nom::{
2    IResult,
3    branch::alt,
4    bytes::complete::{take_while, take_while1, is_not},
5    character::complete::{char, line_ending},
6    combinator::eof,
7    multi::{many0, many1},
8    sequence::{preceded, terminated, delimited},
9    Parser,
10};
11use std::cell::RefCell;
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct Link {
15    pub id: Option<String>,
16    pub values: Vec<Link>,
17    pub children: Vec<Link>,
18    pub is_indented_id: bool,
19}
20
21impl Link {
22    pub fn new_singlet(id: String) -> Self {
23        Link {
24            id: Some(id),
25            values: vec![],
26            children: vec![],
27            is_indented_id: false,
28        }
29    }
30
31    pub fn new_indented_id(id: String) -> Self {
32        Link {
33            id: Some(id),
34            values: vec![],
35            children: vec![],
36            is_indented_id: true,
37        }
38    }
39
40    pub fn new_value(values: Vec<Link>) -> Self {
41        Link {
42            id: None,
43            values,
44            children: vec![],
45            is_indented_id: false,
46        }
47    }
48
49    pub fn new_link(id: Option<String>, values: Vec<Link>) -> Self {
50        Link {
51            id,
52            values,
53            children: vec![],
54            is_indented_id: false,
55        }
56    }
57
58    pub fn with_children(mut self, children: Vec<Link>) -> Self {
59        self.children = children;
60        self
61    }
62}
63
64pub struct ParserState {
65    indentation_stack: RefCell<Vec<usize>>,
66}
67
68impl ParserState {
69    pub fn new() -> Self {
70        ParserState {
71            indentation_stack: RefCell::new(vec![0]),
72        }
73    }
74
75    pub fn push_indentation(&self, indent: usize) {
76        self.indentation_stack.borrow_mut().push(indent);
77    }
78
79    pub fn pop_indentation(&self) {
80        let mut stack = self.indentation_stack.borrow_mut();
81        if stack.len() > 1 {
82            stack.pop();
83        }
84    }
85
86    pub fn current_indentation(&self) -> usize {
87        *self.indentation_stack.borrow().last().unwrap_or(&0)
88    }
89
90    pub fn check_indentation(&self, indent: usize) -> bool {
91        indent >= self.current_indentation()
92    }
93}
94
95fn is_whitespace_char(c: char) -> bool {
96    c == ' ' || c == '\t' || c == '\n' || c == '\r'
97}
98
99fn is_horizontal_whitespace(c: char) -> bool {
100    c == ' ' || c == '\t'
101}
102
103fn is_reference_char(c: char) -> bool {
104    !is_whitespace_char(c) && c != '(' && c != ':' && c != ')'
105}
106
107fn horizontal_whitespace(input: &str) -> IResult<&str, &str> {
108    take_while(is_horizontal_whitespace)(input)
109}
110
111fn whitespace(input: &str) -> IResult<&str, &str> {
112    take_while(is_whitespace_char)(input)
113}
114
115fn simple_reference(input: &str) -> IResult<&str, String> {
116    take_while1(is_reference_char)
117        .map(|s: &str| s.to_string())
118        .parse(input)
119}
120
121fn double_quoted_reference(input: &str) -> IResult<&str, String> {
122    delimited(
123        char('"'),
124        is_not("\""),
125        char('"')
126    )
127    .map(|s: &str| s.to_string())
128    .parse(input)
129}
130
131fn single_quoted_reference(input: &str) -> IResult<&str, String> {
132    delimited(
133        char('\''),
134        is_not("'"),
135        char('\'')
136    )
137    .map(|s: &str| s.to_string())
138    .parse(input)
139}
140
141fn reference(input: &str) -> IResult<&str, String> {
142    alt((
143        double_quoted_reference,
144        single_quoted_reference,
145        simple_reference,
146    )).parse(input)
147}
148
149fn eol(input: &str) -> IResult<&str, &str> {
150    alt((
151        preceded(horizontal_whitespace, line_ending),
152        preceded(horizontal_whitespace, eof),
153    )).parse(input)
154}
155
156
157
158fn reference_or_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
159    alt((
160        |i| multi_line_any_link(i, state),
161        reference.map(Link::new_singlet),
162    )).parse(input)
163}
164
165fn multi_line_value_and_whitespace<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
166    terminated(
167        |i| reference_or_link(i, state),
168        whitespace
169    ).parse(input)
170}
171
172fn multi_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
173    preceded(
174        whitespace,
175        many0(|i| multi_line_value_and_whitespace(i, state))
176    ).parse(input)
177}
178
179fn single_line_value_and_whitespace<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
180    preceded(
181        horizontal_whitespace,
182        |i| reference_or_link(i, state)
183    ).parse(input)
184}
185
186fn single_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
187    many1(|i| single_line_value_and_whitespace(i, state)).parse(input)
188}
189
190fn single_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
191    (
192        horizontal_whitespace,
193        reference,
194        horizontal_whitespace,
195        char(':'),
196        |i| single_line_values(i, state)
197    ).map(|(_, id, _, _, values)| Link::new_link(Some(id), values))
198    .parse(input)
199}
200
201fn multi_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
202    (
203        char('('),
204        whitespace,
205        reference,
206        whitespace,
207        char(':'),
208        |i| multi_line_values(i, state),
209        whitespace,
210        char(')')
211    ).map(|(_, _, id, _, _, values, _, _)| Link::new_link(Some(id), values))
212    .parse(input)
213}
214
215fn single_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
216    (|i| single_line_values(i, state))
217        .map(|values| {
218            if values.len() == 1 && values[0].id.is_some() && values[0].values.is_empty() && values[0].children.is_empty() {
219                Link::new_singlet(values[0].id.clone().unwrap())
220            } else {
221                Link::new_value(values)
222            }
223        })
224        .parse(input)
225}
226
227fn indented_id_link<'a>(input: &'a str, _state: &ParserState) -> IResult<&'a str, Link> {
228    (
229        reference,
230        horizontal_whitespace,
231        char(':'),
232        eol
233    ).map(|(id, _, _, _)| Link::new_indented_id(id))
234    .parse(input)
235}
236
237fn multi_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
238    (
239        char('('),
240        |i| multi_line_values(i, state),
241        whitespace,
242        char(')')
243    ).map(|(_, values, _, _)| {
244        if values.len() == 1 && values[0].id.is_some() && values[0].values.is_empty() && values[0].children.is_empty() {
245            Link::new_singlet(values[0].id.clone().unwrap())
246        } else {
247            Link::new_value(values)
248        }
249    })
250    .parse(input)
251}
252
253fn multi_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
254    alt((
255        |i| multi_line_value_link(i, state),
256        |i| multi_line_link(i, state),
257    )).parse(input)
258}
259
260fn single_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
261    alt((
262        terminated(|i| single_line_link(i, state), eol),
263        terminated(|i| single_line_value_link(i, state), eol),
264    )).parse(input)
265}
266
267fn any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
268    alt((
269        terminated(|i| multi_line_any_link(i, state), eol),
270        |i| indented_id_link(i, state),
271        |i| single_line_any_link(i, state),
272    )).parse(input)
273}
274
275fn count_indentation(input: &str) -> IResult<&str, usize> {
276    take_while(|c| c == ' ')
277        .map(|s: &str| s.len())
278        .parse(input)
279}
280
281fn push_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
282    let (input, spaces) = count_indentation(input)?;
283    let current = state.current_indentation();
284    
285    if spaces > current {
286        state.push_indentation(spaces);
287        Ok((input, ()))
288    } else {
289        Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify)))
290    }
291}
292
293fn check_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
294    let (input, spaces) = count_indentation(input)?;
295    
296    if state.check_indentation(spaces) {
297        Ok((input, ()))
298    } else {
299        Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify)))
300    }
301}
302
303fn element<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
304    let (input, link) = any_link(input, state)?;
305    
306    if let Ok((input, _)) = push_indentation(input, state) {
307        let (input, children) = links(input, state)?;
308        Ok((input, link.with_children(children)))
309    } else {
310        Ok((input, link))
311    }
312}
313
314fn first_line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
315    element(input, state)
316}
317
318fn line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
319    preceded(
320        |i| check_indentation(i, state),
321        |i| element(i, state)
322    ).parse(input)
323}
324
325fn links<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
326    let (input, first) = first_line(input, state)?;
327    let (input, rest) = many0(|i| line(i, state)).parse(input)?;
328    
329    state.pop_indentation();
330    
331    let mut result = vec![first];
332    result.extend(rest);
333    Ok((input, result))
334}
335
336pub fn parse_document(input: &str) -> IResult<&str, Vec<Link>> {
337    let state = ParserState::new();
338    
339    // Skip leading whitespace but preserve the line structure
340    let input = input.trim_start_matches(|c: char| c == '\n' || c == '\r');
341    
342    // Handle empty or whitespace-only documents
343    if input.trim().is_empty() {
344        return Ok(("", vec![]));
345    }
346    
347    let (input, result) = links(input, &state)?;
348    let (input, _) = whitespace(input)?;
349    let (input, _) = eof(input)?;
350    
351    Ok((input, result))
352}
353