Skip to main content

bel_format/
parser.rs

1use std::rc::Rc;
2use nom::{
3    IResult,
4    bytes::complete::{tag, take_while_m_n},
5    combinator::map_res,
6    sequence::tuple,
7    sequence::delimited,
8    character::complete::char,
9    bytes::complete::is_not,
10    error::ParseError,
11    character::complete::multispace0,
12    combinator::recognize,
13    sequence::pair,
14    branch::alt,
15    character::complete::{alpha1},
16    character::complete::alphanumeric1,
17    combinator::{cut, map, opt},
18    error::{context, VerboseError},
19    multi::{many0, many1},
20    sequence::{preceded, terminated},
21    character::complete::{digit1, multispace1, one_of},
22    multi::separated_list1,
23    Parser,
24};
25
26///////////////////////////////////////////////////////////////////////////////
27// PARSER DATA TYPES
28///////////////////////////////////////////////////////////////////////////////
29
30#[derive(Debug, Clone)]
31pub struct NsArg {
32    pub prefix: String,
33    pub value: String,
34    pub label: Option<String>,
35}
36
37/// # Parser AST
38/// 
39/// This AST implementation is extremely simple and handles differences between
40/// BEL versions (or whatever is causing the inconsistencies that I’m seeing).
41/// 
42/// At some point, I may introduce a higher level AST; akin to compiler
43/// pipelines. 
44#[derive(Debug, Clone)]
45pub enum Ast {
46    NsArg(NsArg),
47    Symbol(String),
48    Function(String, Vec<Ast>),
49    Relation(Box<Ast>, String, Box<Ast>),
50    /// Some functions contain a single string argument for some reason.
51    String(String),
52}
53
54impl std::fmt::Display for NsArg {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        if let Some(label) = self.label.as_ref() {
57            write!(f, "{}:{} ! {}", self.prefix, self.value, label)
58        } else {
59            write!(f, "{}:{}", self.prefix, self.value)
60        }
61    }
62}
63
64impl std::fmt::Display for Ast {
65    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66        match self {
67            Ast::NsArg(ns) => {
68                write!(f, "{}", ns)
69            }
70            Ast::Symbol(sym) => {
71                write!(f, "{}", sym)
72            }
73            Ast::Function(name, args) => {
74                let args = args
75                    .iter()
76                    .map(|x| format!("{}", x))
77                    .collect::<Vec<String>>()
78                    .join(", ");
79                write!(f, "{}({})", name, args)
80            }
81            Ast::Relation(left, infix_op, right) => {
82                write!(f, "{} {} {}", left, infix_op, right)
83            }
84            Ast::String(string) => {
85                write!(f, "{:?}", string)
86            }
87        }
88    }
89}
90
91
92///////////////////////////////////////////////////////////////////////////////
93// INTERNAL PARSER UTILS
94///////////////////////////////////////////////////////////////////////////////
95
96fn parens<'a, F: 'a, O, E: ParseError<&'a str>>(
97    inner: F
98) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
99    where F: Fn(&'a str) -> IResult<&'a str, O, E>
100{
101    delimited(char('('), inner, char(')'))
102}
103
104fn identifier(source: &str) -> Result<(&str, String), nom::Err<nom::error::Error<&str>>> {
105    let (source, ident) = recognize(
106        pair(
107            alt((alphanumeric1, tag("_"))),
108            many0(alt((alphanumeric1, tag("_"))))
109        )
110    )(source)?;
111    Ok((source, ident.to_owned()))
112}
113
114fn ws<'a, F: 'a, O, E: ParseError<&'a str>>(
115    inner: F
116) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
117    where F: Fn(&'a str) -> IResult<&'a str, O, E>
118{
119  delimited(
120    multispace0,
121    inner,
122    multispace0,
123  )
124}
125
126fn parse_ns_arg(
127    source: &str
128) -> Result<(&str, NsArg), nom::Err<nom::error::Error<&str>>>
129{
130    use crate::parser_utils::string::parse_string;
131    fn parse_text(
132        source: &str
133    ) -> Result<(&str, String), nom::Err<nom::error::Error<&str>>> {
134        alt((parse_string, identifier))(source)
135    }
136    fn parse_ident(
137        source: &str,
138    ) -> Result<(&str, (String, String)), nom::Err<nom::error::Error<&str>>> {
139        let (source, prefix) = parse_text(source)?;
140        let (source, _) = tag(":")(source)?;
141        let (source, value) = parse_text(source)?;
142        Ok((source, (prefix, value)))
143    }
144    fn parse_label(
145        source: &str
146    ) -> Result<(&str, String), nom::Err<nom::error::Error<&str>>> {
147        let (source, _) = ws(tag("!"))(source)?;
148        let (source, value) = parse_text(source)?;
149        Ok((source, value))
150    }
151    fn default_parser(
152        source: &str
153    ) -> Result<(&str, NsArg), nom::Err<nom::error::Error<&str>>> {
154        let (source, (prefix, value)) = parse_ident(source)?;
155        let (source, label) = opt(parse_label)(source)?;
156        let ast = NsArg{prefix, value, label};
157        Ok((source, ast))
158    }
159    default_parser(source)
160}
161
162fn parse_ns(
163    source: &str
164) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>>
165{
166    let (rest, ns_arg) = parse_ns_arg(source)?;
167    Ok((rest, Ast::NsArg(ns_arg)))
168}
169
170fn parse_symbol_to_ast(
171    source: &str
172) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>>
173{
174    let single = alt((
175        tag("A"),
176        tag("R"),
177        tag("N"),
178        tag("D"),
179        tag("C"),
180        tag("E"),
181        tag("Q"),
182        tag("G"),
183        tag("H"),
184        tag("I"),
185        tag("L"),
186        tag("K"),
187        tag("M"),
188        tag("F"),
189        tag("P"),
190        tag("S"),
191        tag("T"),
192        tag("W"),
193        tag("Y"),
194        tag("V"),
195        tag("U"),
196    ));
197    let long = alt((
198        tag("Ala"),
199        tag("Arg"),
200        tag("Asn"),
201        tag("Asp"),
202        tag("Cys"),
203        tag("Glu"),
204        tag("Gln"),
205        tag("Gly"),
206        tag("His"),
207        tag("Ile"),
208        tag("Leu"),
209        tag("Lys"),
210        tag("Met"),
211        tag("Phe"),
212        tag("Pro"),
213        tag("Ser"),
214        tag("Thr"),
215        tag("Trp"),
216        tag("Tyr"),
217        tag("Val"),
218    ));
219    let (rest, sym) = alt((single, long, digit1))(source)?;
220    Ok((rest, Ast::Symbol(sym.to_owned())))
221}
222
223fn parse_string_to_ast(
224    source: &str
225) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>>
226{
227    use crate::parser_utils::string::parse_string;
228    let (rest, value) = parse_string(source)?;
229    Ok((rest, Ast::String(value)))
230}
231
232fn parse_function(
233    source: &str
234) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>>
235{
236    fn term_parser(
237        source: &str
238    ) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>> {
239        alt((
240            parse_arg_relation,
241            parse_ns,
242            parse_function,
243            parse_symbol_to_ast,
244            parse_string_to_ast,
245        ))(source)
246    }
247    fn function_arguments(
248        source: &str
249    ) -> Result<(&str, Vec<Ast>), nom::Err<nom::error::Error<&str>>> {
250        let (source, _) = tag("(")(source)?;
251        let (source, args) = separated_list1(ws(char(',')), ws(term_parser))(source)?;
252        let (source, _) = tag(")")(source)?;
253        Ok((source, args))
254    }
255    let (source, name) = ws(identifier)(source)?;
256    let (source, args) = ws(function_arguments)(source)?;
257    let ast = Ast::Function(
258        name,
259        args,
260    );
261    Ok((source, ast))
262}
263
264fn top_level_parser(
265    source: &str
266) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>> {
267    parse_relation(source, true)
268}
269
270fn parse_arg_relation(
271    source: &str,
272) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>> {
273    parse_relation(source, false)
274}
275
276fn parse_relation(
277    source: &str,
278    top_level: bool,
279) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>> {
280    fn term_parser(
281        source: &str,
282    ) -> Result<(&str, Ast), nom::Err<nom::error::Error<&str>>> {
283        alt((parse_function, parse_ns))(source)
284    }
285    let (source, left) = ws(term_parser)(source)?;
286    let (source, relation) = ws(identifier)(source)?;
287    let (source, right) = ws(term_parser)(source)?;
288    let ast = Ast::Relation(
289        Box::new(left),
290        relation,
291        Box::new(right),
292    );
293    Ok((source, ast))
294}
295
296///////////////////////////////////////////////////////////////////////////////
297// PARSER ENTRYPOINT
298///////////////////////////////////////////////////////////////////////////////
299
300#[derive(Debug, Clone)]
301pub enum ParserError {
302    Unparsed(String),
303    ParserError(String),
304}
305
306#[derive(Debug, Clone)]
307pub struct ErrorReport {
308    pub error: ParserError,
309    pub line: String,
310}
311
312impl std::fmt::Display for ErrorReport {
313    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314        let lines = vec![
315            format!("[crate bel-format] given line: {}", &self.line),
316            match &self.error {
317                ParserError::Unparsed(rest) => {
318                    format!("\t☞ [unparsed input]: {:?}\n", rest)
319                }
320                ParserError::ParserError(msg) => {
321                    format!("\t☞ [parser error]: {}\n", msg)
322                }
323            }
324        ];
325        write!(f, "{}", lines.join("\n"))
326    }
327}
328
329
330/// `log_errors` will print error messgaes to stdout while parsing.
331pub(crate) fn parse_lines(
332    source: &str,
333    log_errors: bool,
334) -> (Vec<Ast>, Vec<ErrorReport>) {
335    let (ast, errors) = source
336        .lines()
337        .filter(|line| {
338            !line.starts_with("#")
339        })
340        .filter(|line| {
341            !line.starts_with("DEFINE")
342        })
343        .filter(|line| {
344            !line.starts_with("SET")
345        })
346        .filter(|line| {
347            !line.starts_with("UNSET")
348        })
349        .filter(|line| {
350            !line.trim().is_empty()
351        })
352        .map(|line| match top_level_parser(line) {
353            Ok((rest, xs)) if !rest.is_empty() => {
354                let error = ParserError::Unparsed(rest.to_owned());
355                let report = ErrorReport {
356                    error,
357                    line: line.to_owned(),
358                };
359                if log_errors {
360                    eprintln!("{}", report);
361                }
362                (Some(xs), Some(report))
363            }
364            Ok((_, xs)) => (Some(xs), None),
365            Err(msg) => {
366                let error = ParserError::ParserError(format!("{}", msg));
367                let report = ErrorReport {
368                    error,
369                    line: line.to_owned(),
370                };
371                if log_errors {
372                    eprintln!("{}", report);
373                }
374                (None, Some(report))
375            }
376        })
377        .unzip::<_, _, Vec<_>, Vec<_>>();
378    let ast = ast
379        .into_iter()
380        .filter_map(|x| x)
381        .collect::<Vec<_>>();
382    let errors = errors
383        .into_iter()
384        .filter_map(|x| x)
385        .collect::<Vec<_>>();
386    (ast, errors)
387}