diskplan_schema/
text.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{is_a, is_not, tag},
4    character::complete::{alpha1, alphanumeric1, char, line_ending, space0, space1},
5    combinator::{all_consuming, consumed, eof, map, opt, recognize, value},
6    error::{context, VerboseError, VerboseErrorKind},
7    multi::{count, many0, many1},
8    sequence::{delimited, pair, preceded, terminated, tuple},
9    IResult, Parser,
10};
11use tracing::{span, Level};
12
13use super::{Binding, SchemaNode};
14use crate::{Expression, Identifier, Special, Token};
15
16type Res<T, U> = IResult<T, U, VerboseError<T>>;
17
18mod builder;
19use builder::SchemaNodeBuilder;
20
21mod error;
22pub use error::ParseError;
23
24#[derive(Debug)]
25pub enum NodeType {
26    Directory,
27    File,
28}
29
30/// Parses the given text representation into a tree of [`SchemaNode`]s
31pub fn parse_schema(text: &str) -> std::result::Result<SchemaNode, ParseError> {
32    let span = span!(Level::INFO, "parse_schema");
33    let _enter = span.enter();
34
35    // Strip several levels of initial indentation to help with indented literal schemas
36    let any_indent = |s| {
37        opt(alt((
38            many1(operator(0)),
39            many1(operator(1)),
40            many1(operator(2)),
41            many1(operator(3)),
42            many1(operator(4)),
43        )))(s)
44    };
45    // Parse and process entire schema and handle any errors that arise
46    let (_, ops) = all_consuming(preceded(many0(blank_line), any_indent))(text).map_err(|e| {
47        let e = match e {
48            nom::Err::Error(e) | nom::Err::Failure(e) => e,
49            nom::Err::Incomplete(_) => unreachable!(),
50        };
51        let mut error = None;
52        for (r, e) in e.errors.iter().rev() {
53            error = Some(ParseError::new(
54                match e {
55                    VerboseErrorKind::Nom(p) => {
56                        format!("Invalid token while looking for: {p:?}")
57                    }
58                    _ => format!("Error parsing {e:?}"),
59                },
60                text,
61                r,
62                error.map(Box::new),
63            ));
64        }
65        error.unwrap()
66    })?;
67    let ops = ops.unwrap_or_default();
68    let schema_node = schema_node("root", text, text, false, NodeType::Directory, None, ops)?;
69    if schema_node.match_pattern.is_some() {
70        return Err(ParseError::new(
71            "Top level :match is not allowed".into(),
72            // TODO: Or is it? (Could alternatively say is_def=true)
73            text,
74            text.find("\n:match")
75                .map(|pos| &text[pos + 1..pos + 7])
76                .unwrap_or(text),
77            None,
78        ));
79    }
80    Ok(schema_node)
81}
82
83fn schema_node<'t>(
84    line: &'t str,
85    whole: &'t str,
86    part: &'t str,
87    is_def: bool,
88    item_type: NodeType,
89    symlink: Option<Expression<'t>>,
90    ops: Vec<(&'t str, Operator<'t>)>,
91) -> std::result::Result<SchemaNode<'t>, ParseError<'t>> {
92    let part_parse_error = |e: anyhow::Error| ParseError::new(e.to_string(), whole, part, None);
93    let mut builder = SchemaNodeBuilder::new(
94        line,
95        is_def,
96        match item_type {
97            NodeType::Directory => NodeType::Directory,
98            NodeType::File => NodeType::File,
99        },
100        symlink,
101    );
102    for (span, op) in ops {
103        match op {
104            // Operators that affect the parent (when looking up this item)
105            Operator::Match(expr) => builder.match_pattern(expr),
106            Operator::Avoid(expr) => builder.avoid_pattern(expr),
107
108            // Operators that apply to this item
109            Operator::Use { name } => builder.use_definition(name),
110            Operator::Mode(mode) => builder.mode(mode),
111            Operator::Owner(owner) => builder.owner(owner),
112            Operator::Group(group) => builder.group(group),
113            Operator::Source(source) => builder.source(source),
114
115            // Operators that apply to child items
116            Operator::Let { name, expr } => builder.let_var(name, expr),
117            Operator::Item {
118                line,
119                binding,
120                is_directory,
121                link,
122                children,
123            } => {
124                let sub_item_type = match is_directory {
125                    false => NodeType::File,
126                    true => NodeType::Directory,
127                };
128                let item_node =
129                    schema_node(line, whole, span, false, sub_item_type, link, children).map_err(
130                        |e| {
131                            ParseError::new(
132                                format!(r#"Problem within "{binding}""#),
133                                whole,
134                                span,
135                                Some(Box::new(e)),
136                            )
137                        },
138                    )?;
139                builder.add_entry(binding, item_node)
140            }
141            Operator::Def {
142                line,
143                name,
144                is_directory,
145                link,
146                children,
147            } => {
148                if let NodeType::File = item_type {
149                    return Err(ParseError::new(
150                        "Files cannot have child items".to_string(),
151                        whole,
152                        span,
153                        None,
154                    ));
155                }
156                let sub_item_type = match is_directory {
157                    false => NodeType::File,
158                    true => NodeType::Directory,
159                };
160                let properties =
161                    schema_node(line, whole, span, true, sub_item_type, link, children).map_err(
162                        |e| {
163                            ParseError::new(
164                                format!(r#"Error within definition "{name}""#),
165                                whole,
166                                span,
167                                Some(Box::new(e)),
168                            )
169                        },
170                    )?;
171
172                if properties.match_pattern.is_some() {
173                    return Err(ParseError::new(
174                        ":def has own :match".to_owned(),
175                        whole,
176                        span,
177                        None,
178                    ));
179                }
180                builder.define(name, properties)
181            }
182        }
183        .map_err(|s| ParseError::new(s.to_string(), whole, span, None))?
184    }
185    // TODO: Handle error spans, child errors?, etc.
186    builder.build().map_err(part_parse_error)
187}
188
189fn indentation(level: usize) -> impl Fn(&str) -> Res<&str, &str> {
190    move |s: &str| recognize(count(tag("    "), level))(s)
191}
192
193fn operator(level: usize) -> impl Fn(&str) -> Res<&str, (&str, Operator)> {
194    // This is really just to make the op definitions tidier
195    fn op<'a, O, P>(op: &'static str, second: P) -> impl FnMut(&'a str) -> Res<&'a str, O>
196    where
197        P: Parser<&'a str, O, VerboseError<&'a str>>,
198    {
199        context("op", preceded(tuple((tag(op), space1)), second))
200    }
201
202    move |s: &str| {
203        let sep = |ch, second| preceded(delimited(space0, char(ch), space0), second);
204
205        let let_op = tuple((op("let", identifier), sep('=', expression)));
206        let use_op = op("use", identifier);
207        let match_op = op("match", expression);
208        let avoid_op = op("avoid", expression);
209        let mode_op = op("mode", octal);
210        let owner_op = op("owner", expression);
211        let group_op = op("group", expression);
212        let source_op = op("source", expression);
213
214        consumed(alt((
215            delimited(
216                tuple((indentation(level), char(':'))),
217                alt((
218                    map(let_op, |(name, expr)| Operator::Let { name, expr }),
219                    map(use_op, |name| Operator::Use { name }),
220                    map(match_op, Operator::Match),
221                    map(avoid_op, Operator::Avoid),
222                    map(mode_op, Operator::Mode),
223                    map(owner_op, Operator::Owner),
224                    map(group_op, Operator::Group),
225                    map(source_op, Operator::Source),
226                )),
227                end_of_lines,
228            ),
229            map(
230                // $binding/ -> link
231                //     children...
232                tuple((
233                    delimited(indentation(level), consumed(item_header), end_of_lines),
234                    many0(operator(level + 1)),
235                )),
236                |((line, (binding, is_directory, link)), children)| Operator::Item {
237                    line,
238                    binding,
239                    is_directory,
240                    link,
241                    children,
242                },
243            ),
244            map(
245                tuple((
246                    delimited(indentation(level), consumed(def_header), end_of_lines),
247                    many0(operator(level + 1)),
248                )),
249                |((line, (name, is_directory, link)), children)| Operator::Def {
250                    line,
251                    name,
252                    is_directory,
253                    link,
254                    children,
255                },
256            ),
257        )))(s)
258    }
259}
260
261#[derive(Debug, Clone, PartialEq)]
262enum Operator<'t> {
263    Item {
264        line: &'t str,
265        binding: Binding<'t>,
266        is_directory: bool,
267        link: Option<Expression<'t>>,
268        children: Vec<(&'t str, Operator<'t>)>,
269    },
270    Let {
271        name: Identifier<'t>,
272        expr: Expression<'t>,
273    },
274    Def {
275        line: &'t str,
276        name: Identifier<'t>,
277        is_directory: bool,
278        link: Option<Expression<'t>>,
279        children: Vec<(&'t str, Operator<'t>)>,
280    },
281    Use {
282        name: Identifier<'t>,
283    },
284    Match(Expression<'t>),
285    Avoid(Expression<'t>),
286    Mode(u16),
287    Owner(Expression<'t>),
288    Group(Expression<'t>),
289    Source(Expression<'t>),
290}
291
292fn blank_line(s: &str) -> Res<&str, &str> {
293    alt((
294        recognize(tuple((space0, line_ending))),
295        recognize(tuple((space1, eof))),
296        recognize(tuple((space0, comment, line_ending))),
297        recognize(tuple((space0, comment, eof))),
298    ))(s)
299}
300
301fn comment(s: &str) -> Res<&str, &str> {
302    alt((
303        recognize(tuple((tag("# "), is_not("\r\n")))),
304        terminated(tag("#"), eof),
305    ))(s)
306}
307
308/// Match and consume line endings and any following blank lines, or EOF
309fn end_of_lines(s: &str) -> Res<&str, &str> {
310    alt((recognize(tuple((line_ending, many0(blank_line)))), eof))(s)
311}
312
313fn binding(s: &str) -> Res<&str, Binding<'_>> {
314    alt((
315        map(preceded(char('$'), identifier), Binding::Dynamic),
316        map(filename, Binding::Static),
317    ))(s)
318}
319
320fn filename(s: &str) -> Res<&str, &str> {
321    recognize(many1(alt((alphanumeric1, is_a("_-.@^+%=")))))(s)
322}
323
324// $name/ -> link
325// name
326fn item_header(s: &str) -> Res<&str, (Binding, bool, Option<Expression>)> {
327    tuple((
328        binding,
329        map(opt(char('/')), |o| o.is_some()),
330        opt(preceded(tuple((space1, tag("->"), space1)), expression)),
331    ))(s)
332}
333
334// :def name/
335// :def name -> link
336fn def_header(s: &str) -> Res<&str, (Identifier, bool, Option<Expression>)> {
337    preceded(
338        tuple((tag(":def"), space1)),
339        tuple((
340            identifier,
341            map(opt(char('/')), |o| o.is_some()),
342            opt(preceded(tuple((space0, tag("->"), space0)), expression)),
343        )),
344    )(s)
345}
346
347fn octal(s: &str) -> Res<&str, u16> {
348    map(is_a("01234567"), |mode| {
349        u16::from_str_radix(mode, 8).unwrap()
350    })(s)
351}
352
353fn identifier(s: &str) -> Res<&str, Identifier> {
354    map(
355        recognize(pair(
356            alt((alpha1, tag("_"))),
357            many0(alt((alphanumeric1, tag("_")))),
358        )),
359        Identifier::new,
360    )(s)
361}
362
363/// Expression, such as "static/$varA/${varB}v2/${NAME}"
364fn expression(s: &str) -> Res<&str, Expression> {
365    map(many1(alt((non_variable, variable))), |tokens| {
366        Expression::from(tokens)
367    })(s)
368}
369
370/// A sequence of characters that are not part of any variable
371fn non_variable(s: &str) -> Res<&str, Token> {
372    map(is_not("$\n"), Token::Text)(s)
373}
374
375/// A variable name, optionally braced, prefixed by a dollar sign, such as `${example}`
376fn variable(s: &str) -> Res<&str, Token> {
377    let braced = |parser| alt((delimited(char('{'), parser, char('}')), parser));
378    let vars = |s| {
379        alt((
380            value(
381                Token::Special(Special::PathRelative),
382                tag(Special::SAME_PATH_RELATIVE),
383            ),
384            value(
385                Token::Special(Special::PathAbsolute),
386                tag(Special::SAME_PATH_ABSOLUTE),
387            ),
388            value(
389                Token::Special(Special::PathNameOnly),
390                tag(Special::SAME_PATH_NAME),
391            ),
392            value(
393                Token::Special(Special::ParentRelative),
394                tag(Special::PARENT_PATH_RELATIVE),
395            ),
396            value(
397                Token::Special(Special::ParentAbsolute),
398                tag(Special::PARENT_PATH_ABSOLUTE),
399            ),
400            value(
401                Token::Special(Special::ParentNameOnly),
402                tag(Special::PARENT_PATH_NAME),
403            ),
404            value(Token::Special(Special::RootPath), tag(Special::ROOT_PATH)),
405            map(identifier, Token::Variable),
406        ))(s)
407    };
408    preceded(char('$'), braced(vars))(s)
409}
410
411#[cfg(test)]
412mod tests;