Skip to main content

beancount_parser_lima/
lib.rs

1// TODO remove suppression for dead code warning
2#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6//! # Examples
7//!
8//! This example generates the output as shown above.
9//! The supporting function `parse` is required in order to avoid lifetime problems.
10//!
11//!```
12//! # use rust_decimal::Decimal;
13//! # use std::io::{self, Write};
14//! # use std::path::PathBuf;
15//!
16//!use beancount_parser_lima::{
17//!    BeancountParser, BeancountSources, DirectiveVariant, ParseError, ParseSuccess,
18//!};
19//!
20//!fn main() {
21//!    let sources = BeancountSources::try_from(PathBuf::from("examples/data/error-post-balancing.beancount")).unwrap();
22//!    let parser = BeancountParser::new(&sources);
23//!
24//!    parse(&sources, &parser, &mut io::stderr());
25//!}
26//!
27//!fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
28//!where
29//!    W: Write,
30//!{
31//!    match parser.parse() {
32//!        Ok(ParseSuccess {
33//!            directives,
34//!            options: _,
35//!            plugins: _,
36//!            mut warnings,
37//!        }) => {
38//!            let mut errors = Vec::new();
39//!
40//!            for directive in directives {
41//!                if let DirectiveVariant::Transaction(transaction) = directive.variant() {
42//!                    let mut postings = transaction.postings().collect::<Vec<_>>();
43//!                    let n_postings = postings.len();
44//!                    let n_amounts = itertools::partition(&mut postings, |p| p.amount().is_some());
45//!
46//!                    if postings.is_empty() {
47//!                        warnings.push(directive.warning("no postings"));
48//!                    } else if n_amounts + 1 < n_postings {
49//!                        errors.push(
50//!                            directive
51//!                                .error("multiple postings without amount specified")
52//!                                .related_to_all(postings[n_amounts..].iter().copied()),
53//!                        );
54//!                    } else if n_amounts == n_postings {
55//!                        let total: Decimal =
56//!                            postings.iter().map(|p| p.amount().unwrap().value()).sum();
57//!
58//!                        if total != Decimal::ZERO {
59//!                            let last_amount = postings.pop().unwrap().amount().unwrap();
60//!                            let other_amounts = postings.iter().map(|p| p.amount().unwrap());
61//!
62//!                            errors.push(
63//!                                last_amount
64//!                                    .error(format!("sum is {}, expected zero", total))
65//!                                    .related_to_all(other_amounts)
66//!                                    .in_context(&directive),
67//!                            )
68//!                        }
69//!                    }
70//!                }
71//!            }
72//!
73//!            sources.write_errors_or_warnings(error_w, errors).unwrap();
74//!            sources.write_errors_or_warnings(error_w, warnings).unwrap();
75//!        }
76//!
77//!        Err(ParseError { errors, warnings }) => {
78//!            sources.write_errors_or_warnings(error_w, errors).unwrap();
79//!            sources.write_errors_or_warnings(error_w, warnings).unwrap();
80//!        }
81//!    }
82//!}
83//!```
84
85use chumsky::prelude::{Input, Parser};
86use lexer::{lex, Token};
87use parsers::{file, ParserState};
88use sort::SortIteratorAdaptor;
89use std::{
90    collections::{HashMap, VecDeque},
91    path::{Path, PathBuf},
92};
93
94use crate::{parsers::includes, sources::resolve_included_path};
95pub use crate::{trim::trim_trailing_whitespace, types::*};
96
97#[derive(Clone, Debug)]
98enum IncludedGlob {
99    Expanded(Vec<PathBuf>), // the content and its char indices
100    Error(String),
101}
102
103// get all includes, discarding errors
104fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
105    fn get_includes_for_tokens(
106        tokens: Vec<(Token, Span_)>,
107        source_id: SourceId,
108        end_of_input: Span_,
109    ) -> Vec<String> {
110        let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
111
112        let spanned_tokens = tokens
113            .map(end_of_input, |(t, s)| (t, s))
114            .with_context(source_id);
115
116        // ignore any errors in parsing, we'll pick them up in the next pass
117        includes()
118            .parse_with_state(spanned_tokens, &mut parser_state)
119            .into_output()
120            .unwrap_or_default()
121    }
122
123    let tokens = lex_with_source(source_id, content);
124    get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
125}
126
127pub fn lex_with_source<'a>(source_id: SourceId, s: &'a str) -> Vec<(Token<'a>, Span_)> {
128    lex(s)
129        .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
130        .collect::<Vec<_>>()
131}
132
133type SpannedToken<'t> = (Token<'t>, Span_);
134
135/// The Beancount parser itself, which tokenizes and parses the source files
136/// contained in `BeancountSources`.
137///
138/// # Examples
139/// ```
140/// # use std::io::{self, Write};
141/// # use std::path::PathBuf;
142///
143/// use beancount_parser_lima::{BeancountParser, BeancountSources, ParseError, ParseSuccess};
144///
145/// fn main() {
146///     let sources = BeancountSources::try_from(PathBuf::from("examples/data/full.beancount")).unwrap();
147///     let parser = BeancountParser::new(&sources);
148///
149///     parse(&sources, &parser, &mut io::stderr());
150/// }
151///
152/// fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
153/// where
154///     W: Write,
155/// {
156///     match parser.parse() {
157///         Ok(ParseSuccess {
158///             directives,
159///             options: _,
160///             plugins: _,
161///             warnings,
162///         }) => {
163///             for directive in directives {
164///                 println!("{}\n", &directive);
165///             }
166///
167///             sources.write_errors_or_warnings(error_w, warnings).unwrap();
168///         }
169///         Err(ParseError { errors, warnings }) => {
170///             sources.write_errors_or_warnings(error_w, errors).unwrap();
171///             sources.write_errors_or_warnings(error_w, warnings).unwrap();
172///         }
173///     }
174/// }
175/// ````
176pub struct BeancountParser<'s> {
177    sources: &'s BeancountSources,
178    // indexed by source_id as per sources
179    tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
180}
181
182/// A successful parsing all the files, containing date-ordered `Directive`s, `Options`, `Plugin`s, and any `Warning`s.
183#[derive(Debug)]
184pub struct ParseSuccess<'t> {
185    pub directives: Vec<Spanned<Directive<'t>>>,
186    pub options: Options<'t>,
187    pub plugins: Vec<Plugin<'t>>,
188    pub warnings: Vec<Warning>,
189}
190
191/// The value returned when parsing fails.
192#[derive(Debug)]
193pub struct ParseError {
194    pub errors: Vec<Error>,
195    pub warnings: Vec<Warning>,
196}
197
198// result of parse_declarations
199type ParseDeclarationsResult<'t> = (
200    HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
201    Options<'t>,
202    Vec<Error>,
203    Vec<Warning>,
204);
205
206impl<'s> BeancountParser<'s> {
207    /// Create a `BeancountParser` from `BeancountSources` read from all input files.
208    pub fn new(sources: &'s BeancountSources) -> Self {
209        // `content_iter()` walks a `HashMap`, so iteration order is not guaranteed.
210        // We must index by `SourceId` rather than relying on iteration order.
211        let mut tokenized_sources = vec![Vec::new(); sources.num_sources()];
212
213        for (source_id, _path, content) in sources.content_iter() {
214            let i_source: usize = source_id.into();
215            tokenized_sources[i_source] = lex_with_source(source_id, content);
216        }
217
218        BeancountParser {
219            sources,
220            tokenized_sources,
221        }
222    }
223
224    /// Parse the sources, returning date-sorted directives and options, or errors, along with warnings in both cases.
225    pub fn parse<'a>(&'a self) -> Result<ParseSuccess<'a>, ParseError> {
226        let (parsed_sources, options, mut errors, mut warnings) = self.parse_declarations();
227        let mut p = PragmaProcessor::new(
228            self.root_path(),
229            parsed_sources,
230            self.sources.included_globs(),
231            self.sources.error_paths(),
232            options,
233        );
234
235        // directives are stable-sorted by date, with a secondary key enforcing the stream
236        // invariants from https://beancount.github.io/docs/beancount_design_doc.html#stream-invariants:
237        // Open sorts before Transaction; Balance sorts before Transaction.
238        let directives = p
239            .by_ref()
240            .sort(|d| {
241                (
242                    *d.item().date().item(),
243                    match d.variant() {
244                        DirectiveVariant::Open(_) => 0u8,
245                        DirectiveVariant::Balance(_) => 1u8,
246                        _ => 2u8,
247                    },
248                )
249            })
250            .collect::<Vec<_>>();
251        let (options, plugins, mut pragma_errors, mut pragma_warnings) = p.result();
252        errors.append(&mut pragma_errors);
253        warnings.append(&mut pragma_warnings);
254
255        if errors.is_empty() {
256            Ok(ParseSuccess {
257                directives,
258                options,
259                plugins,
260                warnings,
261            })
262        } else {
263            Err(ParseError { errors, warnings })
264        }
265    }
266
267    fn root_path(&self) -> Option<&'s Path> {
268        self.sources.root_path()
269    }
270
271    /// Parse the sources, returning declarations and any errors.
272    /// The declarations are indexed by SourceId
273    fn parse_declarations<'a>(&'a self) -> ParseDeclarationsResult<'a> {
274        let mut all_outputs = HashMap::new();
275        let mut all_errors = Vec::new();
276        let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
277
278        for (source_id, source_path, content) in self.sources.content_iter() {
279            let i_source: usize = source_id.into();
280            let tokens = &self.tokenized_sources[i_source];
281
282            let spanned_tokens = tokens
283                .map(end_of_input(source_id, content), |(t, s)| (t, s))
284                .with_context(source_id);
285
286            let (output, errors) = file(source_path)
287                .parse_with_state(spanned_tokens, &mut parser_state)
288                .into_output_errors();
289
290            all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
291            all_errors.extend(errors);
292        }
293
294        let ParserState { options, warnings } = parser_state.0;
295
296        (
297            all_outputs,
298            Options::new(options),
299            all_errors.into_iter().map(Error::from).collect(),
300            warnings,
301        )
302    }
303}
304
305/// Iterator which applies pragmas to the sequence of `Directive`s.
306///
307/// When the iterator is exhausted, any errors should be collected by the caller.
308#[derive(Debug)]
309struct PragmaProcessor<'s> {
310    current_path: Option<PathBuf>,
311    current_declarations: VecDeque<Spanned<Declaration<'s>>>,
312    stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
313    remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
314    included_globs: &'s HashMap<PathBuf, IncludedGlob>,
315    error_paths: HashMap<Option<PathBuf>, String>,
316    include_by_canonical_path: HashMap<PathBuf, IncludeContext<'s>>,
317    // tags and meta key/values for pragma push/pop
318    tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
319    meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
320    options: Options<'s>,
321    plugins: Vec<Plugin<'s>>,
322    // errors and warnings, for collection when the iterator is exhausted
323    errors: Vec<Error>,
324    warnings: Vec<Warning>,
325}
326
327#[derive(Debug)]
328struct IncludeContext<'s> {
329    tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
330    meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
331    span: Span,
332}
333
334fn fmt_include_context<'s>(
335    tags: &HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
336    meta_key_values: &HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
337) -> String {
338    let tags_s = itertools::intersperse(
339        tags.keys().map(|tag| tag.item().to_string()),
340        " ".to_string(),
341    )
342    .collect::<String>();
343    let meta_s = itertools::intersperse(
344        meta_key_values.iter().map(|(k, v)| {
345            format!(
346                "{}: [{}]",
347                k.item(),
348                itertools::intersperse(
349                    v.iter().map(|(_, v)| v.item().to_string()),
350                    " ".to_string()
351                )
352                .collect::<String>()
353            )
354        }),
355        " ".to_string(),
356    )
357    .collect::<String>();
358    format!("{} {}", tags_s, meta_s)
359}
360
361impl<'s> PragmaProcessor<'s> {
362    fn new(
363        root_path: Option<&Path>,
364        parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
365        included_globs: &'s HashMap<PathBuf, IncludedGlob>,
366        error_paths: HashMap<Option<&Path>, String>,
367        options: Options<'s>,
368    ) -> Self {
369        let mut remaining = parsed_sources
370            .into_iter()
371            .map(|(path, declarations)| {
372                (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
373            })
374            .collect::<HashMap<_, _>>();
375        let error_paths = error_paths
376            .into_iter()
377            .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
378            .collect::<HashMap<_, _>>();
379
380        let current_path = root_path.map(|p| p.to_path_buf());
381        let current_declarations = remaining.remove(&current_path).unwrap();
382
383        PragmaProcessor {
384            current_path,
385            current_declarations,
386            stacked: VecDeque::new(),
387            remaining,
388            included_globs,
389            error_paths,
390            include_by_canonical_path: HashMap::default(),
391            tags: HashMap::new(),
392            meta_key_values: HashMap::new(),
393            options,
394            plugins: Vec::new(),
395            errors: Vec::new(),
396            warnings: Vec::new(),
397        }
398    }
399
400    fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>, Vec<Warning>) {
401        // any leftover tags or key/values is an error
402        let mut errors = self.errors;
403        let warnings = self.warnings;
404
405        for (key, _value) in self.meta_key_values {
406            let e = Error::new(
407                "invalid pushmeta",
408                "missing corresponding popmeta",
409                key.span,
410            );
411            errors.push(e);
412        }
413
414        for (tag, others) in self.tags {
415            let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
416            errors.push(e);
417            for other in others {
418                let e = Error::new(
419                    "invalid pushtag",
420                    "missing corresponding poptag",
421                    other.span,
422                );
423                errors.push(e);
424            }
425        }
426
427        (self.options, self.plugins, errors, warnings)
428    }
429}
430
431impl<'s> Iterator for PragmaProcessor<'s> {
432    type Item = Spanned<Directive<'s>>;
433
434    fn next(&mut self) -> Option<Self::Item> {
435        match self.current_declarations.pop_front() {
436            Some(declaration) => {
437                match declaration.item {
438                    Declaration::Directive(mut directive) => {
439                        directive.metadata.augment_tags(&self.tags);
440                        directive.metadata.augment_key_values(&self.meta_key_values);
441
442                        Some(spanned(directive, declaration.span))
443                    }
444
445                    Declaration::Pragma(pragma) => {
446                        use Pragma::*;
447
448                        match pragma {
449                            Pushtag(tag) => match self.tags.get_mut(&tag) {
450                                Some(others) => {
451                                    others.push(tag);
452                                }
453                                None => {
454                                    self.tags.insert(tag, Vec::default());
455                                }
456                            },
457                            Poptag(tag) => {
458                                let mut last_tag = false;
459
460                                match self.tags.get_mut(&tag) {
461                                    Some(others) => {
462                                        if others.is_empty() {
463                                            last_tag = true;
464                                            // need to remove later because of borrowing
465                                        } else {
466                                            others.pop();
467                                        }
468                                    }
469                                    None => {
470                                        let e = Error::new(
471                                            "invalid poptag",
472                                            "missing corresponding pushtag",
473                                            tag.span,
474                                        );
475                                        self.errors.push(e);
476                                    }
477                                }
478
479                                if last_tag {
480                                    self.tags.remove(&tag);
481                                }
482                            }
483                            Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
484                                Some(values) => {
485                                    values.push((meta.key.span, meta.value));
486                                }
487                                None => {
488                                    self.meta_key_values
489                                        .insert(meta.key, vec![(meta.key.span, meta.value)]);
490                                }
491                            },
492                            Popmeta(meta) => {
493                                let mut last_meta = false;
494
495                                match self.meta_key_values.get_mut(&meta) {
496                                    Some(values) => {
497                                        values.pop();
498                                        if values.is_empty() {
499                                            last_meta = true;
500                                            // need to remove later because of borrowing
501                                        }
502                                    }
503                                    None => {
504                                        let e = Error::new(
505                                            "invalid popmeta",
506                                            "missing corresponding pushmeta",
507                                            meta.span,
508                                        );
509                                        self.errors.push(e);
510                                    }
511                                }
512
513                                if last_meta {
514                                    self.meta_key_values.remove(&meta);
515                                }
516                            }
517                            Include(rel_glob) => {
518                                let (abs_glob, span) = (
519                                    resolve_included_path(
520                                        self.current_path.as_ref(),
521                                        AsRef::<Path>::as_ref(*rel_glob.item()),
522                                    ),
523                                    *rel_glob.span(),
524                                );
525
526                                match self.included_globs.get(&abs_glob) {
527                                    None => panic!("impossible, I hope"),
528                                    Some(IncludedGlob::Expanded(paths)) => {
529                                        if paths.is_empty() {
530                                            // this is an error rather than a warning to catch plain paths whic fail to match
531                                            let e =
532                                                Error::new("include failed", "no such file", span);
533
534                                            self.errors.push(e)
535                                        }
536
537                                        for included in paths {
538                                            let included = Some(included.clone());
539
540                                            match self.remaining.remove_entry(&included) {
541                                                Some((included_path, included_declarations)) => {
542                                                    let stacked_path = std::mem::replace(
543                                                        &mut self.current_path,
544                                                        included_path,
545                                                    );
546                                                    let stacked_declarations = std::mem::replace(
547                                                        &mut self.current_declarations,
548                                                        included_declarations,
549                                                    );
550                                                    self.stacked.push_front((
551                                                        stacked_path,
552                                                        stacked_declarations,
553                                                    ));
554
555                                                    // record the span in case of a duplicate include error later
556                                                    if let Ok(canonical_path) = self
557                                                        .current_path
558                                                        .as_ref()
559                                                        .unwrap()
560                                                        .canonicalize()
561                                                    {
562                                                        self.include_by_canonical_path.insert(
563                                                            canonical_path,
564                                                            IncludeContext {
565                                                                tags: self.tags.clone(),
566                                                                meta_key_values: self
567                                                                    .meta_key_values
568                                                                    .clone(),
569                                                                span,
570                                                            },
571                                                        );
572                                                    }
573                                                }
574
575                                                None => {
576                                                    // either a known error path or a duplicate include
577                                                    if let Some(e) = self.error_paths.get(&included)
578                                                    {
579                                                        self.errors.push(Error::new(
580                                                            "can't read file",
581                                                            e.to_string(),
582                                                            span,
583                                                        ));
584                                                    } else {
585                                                        // duplicate include, only allowed if the include context is the same
586
587                                                        let e = Error::new(
588                                                            "duplicate include",
589                                                            format!(
590                                                                "context {}",
591                                                                fmt_include_context(
592                                                                    &self.tags,
593                                                                    &self.meta_key_values
594                                                                )
595                                                            ),
596                                                            span,
597                                                        );
598
599                                                        // relate the error to the first include if we can
600                                                        let e = if let Some(canonical_path) =
601                                                            included.and_then(|included| {
602                                                                included.canonicalize().ok()
603                                                            }) {
604                                                            if let Some(include_context) = self
605                                                                .include_by_canonical_path
606                                                                .get(&canonical_path)
607                                                            {
608                                                                if include_context.tags == self.tags
609                                                                    && include_context
610                                                                        .meta_key_values
611                                                                        == self.meta_key_values
612                                                                {
613                                                                    // include context is identical, so the include is harmless
614                                                                    // and we ignore it
615                                                                    None
616                                                                } else {
617                                                                    Some(e.related_to_named_span(
618                                                                        format!("context {}", fmt_include_context(&include_context.tags, &include_context.meta_key_values)),
619                                                                        include_context.span,
620                                                                    ))
621                                                                }
622                                                            } else {
623                                                                Some(e)
624                                                            }
625                                                        } else {
626                                                            Some(e)
627                                                        };
628
629                                                        if let Some(e) = e {
630                                                            self.errors.push(e);
631                                                        }
632                                                    }
633                                                }
634                                            }
635                                        }
636                                    }
637                                    Some(IncludedGlob::Error(e)) => {
638                                        let e = Error::new("can't expand glob", e, span);
639                                        self.errors.push(e);
640                                    }
641                                }
642                            }
643
644                            Option(opt) => {
645                                if let Err(e) = self.options.assimilate(opt) {
646                                    self.errors.push(e);
647                                }
648                            }
649
650                            Plugin(plugin) => self.plugins.push(plugin),
651                        }
652
653                        // having silently consumed a pragma, go on to the next declaration
654                        self.next()
655                    }
656                }
657            }
658            None => match self.stacked.pop_front() {
659                Some((path, declarations)) => {
660                    self.current_path = path;
661                    self.current_declarations = declarations;
662                    self.next()
663                }
664                None => None,
665            },
666        }
667    }
668}
669
670fn end_of_input(source_id: SourceId, s: &str) -> Span_ {
671    chumsky::span::Span::new(source_id, s.len()..s.len())
672}
673
674#[cfg(test)]
675pub use lexer::bare_lex;
676mod format;
677mod lexer;
678pub use options::Options;
679pub(crate) mod options;
680mod parsers;
681mod sort;
682mod sources;
683pub use sources::{BeancountSources, SyntheticSources};
684mod trim;
685pub mod types;