links_notation/
parser.rs

1use nom::{
2    IResult,
3    branch::alt,
4    bytes::complete::{take_while, take_while1, is_not},
5    character::complete::{char, line_ending},
6    combinator::eof,
7    multi::{many0, many1},
8    sequence::{preceded, terminated, delimited},
9    Parser,
10};
11use std::cell::RefCell;
12
13#[derive(Debug, Clone, PartialEq)]
14pub struct Link {
15    pub id: Option<String>,
16    pub values: Vec<Link>,
17    pub children: Vec<Link>,
18    pub is_indented_id: bool,
19}
20
21impl Link {
22    pub fn new_singlet(id: String) -> Self {
23        Link {
24            id: Some(id),
25            values: vec![],
26            children: vec![],
27            is_indented_id: false,
28        }
29    }
30
31    pub fn new_indented_id(id: String) -> Self {
32        Link {
33            id: Some(id),
34            values: vec![],
35            children: vec![],
36            is_indented_id: true,
37        }
38    }
39
40    pub fn new_value(values: Vec<Link>) -> Self {
41        Link {
42            id: None,
43            values,
44            children: vec![],
45            is_indented_id: false,
46        }
47    }
48
49    pub fn new_link(id: Option<String>, values: Vec<Link>) -> Self {
50        Link {
51            id,
52            values,
53            children: vec![],
54            is_indented_id: false,
55        }
56    }
57
58    pub fn with_children(mut self, children: Vec<Link>) -> Self {
59        self.children = children;
60        self
61    }
62}
63
64pub struct ParserState {
65    indentation_stack: RefCell<Vec<usize>>,
66    base_indentation: RefCell<Option<usize>>,
67}
68
69impl ParserState {
70    pub fn new() -> Self {
71        ParserState {
72            indentation_stack: RefCell::new(vec![0]),
73            base_indentation: RefCell::new(None),
74        }
75    }
76
77    pub fn set_base_indentation(&self, indent: usize) {
78        let mut base = self.base_indentation.borrow_mut();
79        if base.is_none() {
80            *base = Some(indent);
81        }
82    }
83
84    pub fn get_base_indentation(&self) -> usize {
85        self.base_indentation.borrow().unwrap_or(0)
86    }
87
88    pub fn normalize_indentation(&self, indent: usize) -> usize {
89        let base = self.get_base_indentation();
90        if indent >= base {
91            indent - base
92        } else {
93            0
94        }
95    }
96
97    pub fn push_indentation(&self, indent: usize) {
98        self.indentation_stack.borrow_mut().push(indent);
99    }
100
101    pub fn pop_indentation(&self) {
102        let mut stack = self.indentation_stack.borrow_mut();
103        if stack.len() > 1 {
104            stack.pop();
105        }
106    }
107
108    pub fn current_indentation(&self) -> usize {
109        *self.indentation_stack.borrow().last().unwrap_or(&0)
110    }
111
112    pub fn check_indentation(&self, indent: usize) -> bool {
113        indent >= self.current_indentation()
114    }
115}
116
117fn is_whitespace_char(c: char) -> bool {
118    c == ' ' || c == '\t' || c == '\n' || c == '\r'
119}
120
121fn is_horizontal_whitespace(c: char) -> bool {
122    c == ' ' || c == '\t'
123}
124
125fn is_reference_char(c: char) -> bool {
126    !is_whitespace_char(c) && c != '(' && c != ':' && c != ')'
127}
128
129fn horizontal_whitespace(input: &str) -> IResult<&str, &str> {
130    take_while(is_horizontal_whitespace)(input)
131}
132
133fn whitespace(input: &str) -> IResult<&str, &str> {
134    take_while(is_whitespace_char)(input)
135}
136
137fn simple_reference(input: &str) -> IResult<&str, String> {
138    take_while1(is_reference_char)
139        .map(|s: &str| s.to_string())
140        .parse(input)
141}
142
143fn double_quoted_reference(input: &str) -> IResult<&str, String> {
144    delimited(
145        char('"'),
146        is_not("\""),
147        char('"')
148    )
149    .map(|s: &str| s.to_string())
150    .parse(input)
151}
152
153fn single_quoted_reference(input: &str) -> IResult<&str, String> {
154    delimited(
155        char('\''),
156        is_not("'"),
157        char('\'')
158    )
159    .map(|s: &str| s.to_string())
160    .parse(input)
161}
162
163fn reference(input: &str) -> IResult<&str, String> {
164    alt((
165        double_quoted_reference,
166        single_quoted_reference,
167        simple_reference,
168    )).parse(input)
169}
170
171fn eol(input: &str) -> IResult<&str, &str> {
172    alt((
173        preceded(horizontal_whitespace, line_ending),
174        preceded(horizontal_whitespace, eof),
175    )).parse(input)
176}
177
178
179
180fn reference_or_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
181    alt((
182        |i| multi_line_any_link(i, state),
183        reference.map(Link::new_singlet),
184    )).parse(input)
185}
186
187fn multi_line_value_and_whitespace<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
188    terminated(
189        |i| reference_or_link(i, state),
190        whitespace
191    ).parse(input)
192}
193
194fn multi_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
195    preceded(
196        whitespace,
197        many0(|i| multi_line_value_and_whitespace(i, state))
198    ).parse(input)
199}
200
201fn single_line_value_and_whitespace<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
202    preceded(
203        horizontal_whitespace,
204        |i| reference_or_link(i, state)
205    ).parse(input)
206}
207
208fn single_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
209    many1(|i| single_line_value_and_whitespace(i, state)).parse(input)
210}
211
212fn single_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
213    (
214        horizontal_whitespace,
215        reference,
216        horizontal_whitespace,
217        char(':'),
218        |i| single_line_values(i, state)
219    ).map(|(_, id, _, _, values)| Link::new_link(Some(id), values))
220    .parse(input)
221}
222
223fn multi_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
224    (
225        char('('),
226        whitespace,
227        reference,
228        whitespace,
229        char(':'),
230        |i| multi_line_values(i, state),
231        whitespace,
232        char(')')
233    ).map(|(_, _, id, _, _, values, _, _)| Link::new_link(Some(id), values))
234    .parse(input)
235}
236
237fn single_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
238    (|i| single_line_values(i, state))
239        .map(|values| {
240            if values.len() == 1 && values[0].id.is_some() && values[0].values.is_empty() && values[0].children.is_empty() {
241                Link::new_singlet(values[0].id.clone().unwrap())
242            } else {
243                Link::new_value(values)
244            }
245        })
246        .parse(input)
247}
248
249fn indented_id_link<'a>(input: &'a str, _state: &ParserState) -> IResult<&'a str, Link> {
250    (
251        reference,
252        horizontal_whitespace,
253        char(':'),
254        eol
255    ).map(|(id, _, _, _)| Link::new_indented_id(id))
256    .parse(input)
257}
258
259fn multi_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
260    (
261        char('('),
262        |i| multi_line_values(i, state),
263        whitespace,
264        char(')')
265    ).map(|(_, values, _, _)| {
266        if values.len() == 1 && values[0].id.is_some() && values[0].values.is_empty() && values[0].children.is_empty() {
267            Link::new_singlet(values[0].id.clone().unwrap())
268        } else {
269            Link::new_value(values)
270        }
271    })
272    .parse(input)
273}
274
275fn multi_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
276    alt((
277        |i| multi_line_value_link(i, state),
278        |i| multi_line_link(i, state),
279    )).parse(input)
280}
281
282fn single_line_any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
283    alt((
284        terminated(|i| single_line_link(i, state), eol),
285        terminated(|i| single_line_value_link(i, state), eol),
286    )).parse(input)
287}
288
289fn any_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
290    alt((
291        terminated(|i| multi_line_any_link(i, state), eol),
292        |i| indented_id_link(i, state),
293        |i| single_line_any_link(i, state),
294    )).parse(input)
295}
296
297fn count_indentation(input: &str) -> IResult<&str, usize> {
298    take_while(|c| c == ' ')
299        .map(|s: &str| s.len())
300        .parse(input)
301}
302
303fn push_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
304    let (input, spaces) = count_indentation(input)?;
305    let normalized_spaces = state.normalize_indentation(spaces);
306    let current = state.current_indentation();
307
308    if normalized_spaces > current {
309        state.push_indentation(normalized_spaces);
310        Ok((input, ()))
311    } else {
312        Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify)))
313    }
314}
315
316fn check_indentation<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ()> {
317    let (input, spaces) = count_indentation(input)?;
318    let normalized_spaces = state.normalize_indentation(spaces);
319
320    if state.check_indentation(normalized_spaces) {
321        Ok((input, ()))
322    } else {
323        Err(nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify)))
324    }
325}
326
327fn element<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
328    let (input, link) = any_link(input, state)?;
329    
330    if let Ok((input, _)) = push_indentation(input, state) {
331        let (input, children) = links(input, state)?;
332        Ok((input, link.with_children(children)))
333    } else {
334        Ok((input, link))
335    }
336}
337
338fn first_line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
339    // Set base indentation from the first line
340    let (_, spaces) = count_indentation(input)?;
341    state.set_base_indentation(spaces);
342    element(input, state)
343}
344
345fn line<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> {
346    preceded(
347        |i| check_indentation(i, state),
348        |i| element(i, state)
349    ).parse(input)
350}
351
352fn links<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Vec<Link>> {
353    let (input, first) = first_line(input, state)?;
354    let (input, rest) = many0(|i| line(i, state)).parse(input)?;
355    
356    state.pop_indentation();
357    
358    let mut result = vec![first];
359    result.extend(rest);
360    Ok((input, result))
361}
362
363pub fn parse_document(input: &str) -> IResult<&str, Vec<Link>> {
364    let state = ParserState::new();
365
366    // Skip leading whitespace but preserve the line structure
367    let input = input.trim_start_matches(|c: char| c == '\n' || c == '\r');
368
369    // Handle empty or whitespace-only documents
370    if input.trim().is_empty() {
371        return Ok(("", vec![]));
372    }
373
374    let (input, result) = links(input, &state)?;
375    let (input, _) = whitespace(input)?;
376    let (input, _) = eof(input)?;
377
378    Ok((input, result))
379}
380