Skip to main content

beancount_parser_lima/
lib.rs

1// TODO remove suppression for dead code warning
2#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6//! # Examples
7//!
8//! This example generates the output as shown above.
9//! The supporting function `parse` is required in order to avoid lifetime problems.
10//!
11//!```
12//! # use rust_decimal::Decimal;
13//! # use std::io::{self, Write};
14//! # use std::path::PathBuf;
15//!
16//!use beancount_parser_lima::{
17//!    BeancountParser, BeancountSources, DirectiveVariant, ParseError, ParseSuccess,
18//!};
19//!
20//!fn main() {
21//!    let sources = BeancountSources::try_from(PathBuf::from("examples/data/error-post-balancing.beancount")).unwrap();
22//!    let parser = BeancountParser::new(&sources);
23//!
24//!    parse(&sources, &parser, &mut io::stderr());
25//!}
26//!
27//!fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
28//!where
29//!    W: Write,
30//!{
31//!    match parser.parse() {
32//!        Ok(ParseSuccess {
33//!            directives,
34//!            options: _,
35//!            plugins: _,
36//!            mut warnings,
37//!        }) => {
38//!            let mut errors = Vec::new();
39//!
40//!            for directive in directives {
41//!                if let DirectiveVariant::Transaction(transaction) = directive.variant() {
42//!                    let mut postings = transaction.postings().collect::<Vec<_>>();
43//!                    let n_postings = postings.len();
44//!                    let n_amounts = itertools::partition(&mut postings, |p| p.amount().is_some());
45//!
46//!                    if postings.is_empty() {
47//!                        warnings.push(directive.warning("no postings"));
48//!                    } else if n_amounts + 1 < n_postings {
49//!                        errors.push(
50//!                            directive
51//!                                .error("multiple postings without amount specified")
52//!                                .related_to_all(postings[n_amounts..].iter().copied()),
53//!                        );
54//!                    } else if n_amounts == n_postings {
55//!                        let total: Decimal =
56//!                            postings.iter().map(|p| p.amount().unwrap().value()).sum();
57//!
58//!                        if total != Decimal::ZERO {
59//!                            let last_amount = postings.pop().unwrap().amount().unwrap();
60//!                            let other_amounts = postings.iter().map(|p| p.amount().unwrap());
61//!
62//!                            errors.push(
63//!                                last_amount
64//!                                    .error(format!("sum is {}, expected zero", total))
65//!                                    .related_to_all(other_amounts)
66//!                                    .in_context(&directive),
67//!                            )
68//!                        }
69//!                    }
70//!                }
71//!            }
72//!
73//!            sources.write_errors_or_warnings(error_w, errors).unwrap();
74//!            sources.write_errors_or_warnings(error_w, warnings).unwrap();
75//!        }
76//!
77//!        Err(ParseError { errors, warnings }) => {
78//!            sources.write_errors_or_warnings(error_w, errors).unwrap();
79//!            sources.write_errors_or_warnings(error_w, warnings).unwrap();
80//!        }
81//!    }
82//!}
83//!```
84
85use chumsky::prelude::{Input, Parser};
86use lexer::{lex, Token};
87use parsers::{file, ParserState};
88use sort::SortIteratorAdaptor;
89use std::{
90    collections::{HashMap, VecDeque},
91    path::{Path, PathBuf},
92};
93
94use crate::{parsers::includes, sources::resolve_included_path};
95pub use crate::{trim::trim_trailing_whitespace, types::*};
96
97#[derive(Clone, Debug)]
98enum IncludedGlob {
99    Expanded(Vec<PathBuf>), // the content and its char indices
100    Error(String),
101}
102
103// get all includes, discarding errors
104fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
105    fn get_includes_for_tokens(
106        tokens: Vec<(Token, Span_)>,
107        source_id: SourceId,
108        end_of_input: Span_,
109    ) -> Vec<String> {
110        let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
111
112        let spanned_tokens = tokens
113            .map(end_of_input, |(t, s)| (t, s))
114            .with_context(source_id);
115
116        // ignore any errors in parsing, we'll pick them up in the next pass
117        includes()
118            .parse_with_state(spanned_tokens, &mut parser_state)
119            .into_output()
120            .unwrap_or_default()
121    }
122
123    let tokens = lex_with_source(source_id, content);
124    get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
125}
126
127pub fn lex_with_source<'a>(source_id: SourceId, s: &'a str) -> Vec<(Token<'a>, Span_)> {
128    lex(s)
129        .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
130        .collect::<Vec<_>>()
131}
132
133type SpannedToken<'t> = (Token<'t>, Span_);
134
135/// The Beancount parser itself, which tokenizes and parses the source files
136/// contained in `BeancountSources`.
137///
138/// # Examples
139/// ```
140/// # use std::io::{self, Write};
141/// # use std::path::PathBuf;
142///
143/// use beancount_parser_lima::{BeancountParser, BeancountSources, ParseError, ParseSuccess};
144///
145/// fn main() {
146///     let sources = BeancountSources::try_from(PathBuf::from("examples/data/full.beancount")).unwrap();
147///     let parser = BeancountParser::new(&sources);
148///
149///     parse(&sources, &parser, &mut io::stderr());
150/// }
151///
152/// fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
153/// where
154///     W: Write,
155/// {
156///     match parser.parse() {
157///         Ok(ParseSuccess {
158///             directives,
159///             options: _,
160///             plugins: _,
161///             warnings,
162///         }) => {
163///             for directive in directives {
164///                 println!("{}\n", &directive);
165///             }
166///
167///             sources.write_errors_or_warnings(error_w, warnings).unwrap();
168///         }
169///         Err(ParseError { errors, warnings }) => {
170///             sources.write_errors_or_warnings(error_w, errors).unwrap();
171///             sources.write_errors_or_warnings(error_w, warnings).unwrap();
172///         }
173///     }
174/// }
175/// ````
176pub struct BeancountParser<'s> {
177    sources: &'s BeancountSources,
178    // indexed by source_id as per sources
179    tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
180}
181
182/// A successful parsing all the files, containing date-ordered `Directive`s, `Options`, `Plugin`s, and any `Warning`s.
183#[derive(Debug)]
184pub struct ParseSuccess<'t> {
185    pub directives: Vec<Spanned<Directive<'t>>>,
186    pub options: Options<'t>,
187    pub plugins: Vec<Plugin<'t>>,
188    pub warnings: Vec<Warning>,
189}
190
191/// The value returned when parsing fails.
192#[derive(Debug)]
193pub struct ParseError {
194    pub errors: Vec<Error>,
195    pub warnings: Vec<Warning>,
196}
197
198// result of parse_declarations
199type ParseDeclarationsResult<'t> = (
200    HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
201    Options<'t>,
202    Vec<Error>,
203    Vec<Warning>,
204);
205
206impl<'s> BeancountParser<'s> {
207    /// Create a `BeancountParser` from `BeancountSources` read from all input files.
208    pub fn new(sources: &'s BeancountSources) -> Self {
209        // `content_iter()` walks a `HashMap`, so iteration order is not guaranteed.
210        // We must index by `SourceId` rather than relying on iteration order.
211        let mut tokenized_sources = vec![Vec::new(); sources.num_sources()];
212
213        for (source_id, _path, content) in sources.content_iter() {
214            let i_source: usize = source_id.into();
215            tokenized_sources[i_source] = lex_with_source(source_id, content);
216        }
217
218        BeancountParser {
219            sources,
220            tokenized_sources,
221        }
222    }
223
224    /// Parse the sources, returning date-sorted directives and options, or errors, along with warnings in both cases.
225    pub fn parse<'a>(&'a self) -> Result<ParseSuccess<'a>, ParseError> {
226        let (parsed_sources, options, mut errors, mut warnings) = self.parse_declarations();
227        let mut p = PragmaProcessor::new(
228            self.root_path(),
229            parsed_sources,
230            self.sources.included_globs(),
231            self.sources.error_paths(),
232            options,
233        );
234
235        // directives are stable-sorted by date, where balance directives sort ahead of the other directives for that day
236        // as per https://beancount.github.io/docs/beancount_design_doc.html#stream-invariants
237        let directives = p
238            .by_ref()
239            .sort(|d| {
240                (
241                    *d.item().date().item(),
242                    // secondary sort of Balance ahead of others
243                    !matches!(d.variant(), DirectiveVariant::Balance(_)),
244                )
245            })
246            .collect::<Vec<_>>();
247        let (options, plugins, mut pragma_errors, mut pragma_warnings) = p.result();
248        errors.append(&mut pragma_errors);
249        warnings.append(&mut pragma_warnings);
250
251        if errors.is_empty() {
252            Ok(ParseSuccess {
253                directives,
254                options,
255                plugins,
256                warnings,
257            })
258        } else {
259            Err(ParseError { errors, warnings })
260        }
261    }
262
263    fn root_path(&self) -> Option<&'s Path> {
264        self.sources.root_path()
265    }
266
267    /// Parse the sources, returning declarations and any errors.
268    /// The declarations are indexed by SourceId
269    fn parse_declarations<'a>(&'a self) -> ParseDeclarationsResult<'a> {
270        let mut all_outputs = HashMap::new();
271        let mut all_errors = Vec::new();
272        let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
273
274        for (source_id, source_path, content) in self.sources.content_iter() {
275            let i_source: usize = source_id.into();
276            let tokens = &self.tokenized_sources[i_source];
277
278            let spanned_tokens = tokens
279                .map(end_of_input(source_id, content), |(t, s)| (t, s))
280                .with_context(source_id);
281
282            let (output, errors) = file(source_path)
283                .parse_with_state(spanned_tokens, &mut parser_state)
284                .into_output_errors();
285
286            all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
287            all_errors.extend(errors);
288        }
289
290        let ParserState { options, warnings } = parser_state.0;
291
292        (
293            all_outputs,
294            Options::new(options),
295            all_errors.into_iter().map(Error::from).collect(),
296            warnings,
297        )
298    }
299}
300
301/// Iterator which applies pragmas to the sequence of `Directive`s.
302///
303/// When the iterator is exhausted, any errors should be collected by the caller.
304#[derive(Debug)]
305struct PragmaProcessor<'s> {
306    current_path: Option<PathBuf>,
307    current_declarations: VecDeque<Spanned<Declaration<'s>>>,
308    stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
309    remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
310    included_globs: &'s HashMap<PathBuf, IncludedGlob>,
311    error_paths: HashMap<Option<PathBuf>, String>,
312    include_by_canonical_path: HashMap<PathBuf, IncludeContext<'s>>,
313    // tags and meta key/values for pragma push/pop
314    tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
315    meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
316    options: Options<'s>,
317    plugins: Vec<Plugin<'s>>,
318    // errors and warnings, for collection when the iterator is exhausted
319    errors: Vec<Error>,
320    warnings: Vec<Warning>,
321}
322
323#[derive(Debug)]
324struct IncludeContext<'s> {
325    tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
326    meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
327    span: Span,
328}
329
330fn fmt_include_context<'s>(
331    tags: &HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
332    meta_key_values: &HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
333) -> String {
334    let tags_s = itertools::intersperse(
335        tags.keys().map(|tag| tag.item().to_string()),
336        " ".to_string(),
337    )
338    .collect::<String>();
339    let meta_s = itertools::intersperse(
340        meta_key_values.iter().map(|(k, v)| {
341            format!(
342                "{}: [{}]",
343                k.item(),
344                itertools::intersperse(
345                    v.iter().map(|(_, v)| v.item().to_string()),
346                    " ".to_string()
347                )
348                .collect::<String>()
349            )
350        }),
351        " ".to_string(),
352    )
353    .collect::<String>();
354    format!("{} {}", tags_s, meta_s)
355}
356
357impl<'s> PragmaProcessor<'s> {
358    fn new(
359        root_path: Option<&Path>,
360        parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
361        included_globs: &'s HashMap<PathBuf, IncludedGlob>,
362        error_paths: HashMap<Option<&Path>, String>,
363        options: Options<'s>,
364    ) -> Self {
365        let mut remaining = parsed_sources
366            .into_iter()
367            .map(|(path, declarations)| {
368                (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
369            })
370            .collect::<HashMap<_, _>>();
371        let error_paths = error_paths
372            .into_iter()
373            .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
374            .collect::<HashMap<_, _>>();
375
376        let current_path = root_path.map(|p| p.to_path_buf());
377        let current_declarations = remaining.remove(&current_path).unwrap();
378
379        PragmaProcessor {
380            current_path,
381            current_declarations,
382            stacked: VecDeque::new(),
383            remaining,
384            included_globs,
385            error_paths,
386            include_by_canonical_path: HashMap::default(),
387            tags: HashMap::new(),
388            meta_key_values: HashMap::new(),
389            options,
390            plugins: Vec::new(),
391            errors: Vec::new(),
392            warnings: Vec::new(),
393        }
394    }
395
396    fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>, Vec<Warning>) {
397        // any leftover tags or key/values is an error
398        let mut errors = self.errors;
399        let warnings = self.warnings;
400
401        for (key, _value) in self.meta_key_values {
402            let e = Error::new(
403                "invalid pushmeta",
404                "missing corresponding popmeta",
405                key.span,
406            );
407            errors.push(e);
408        }
409
410        for (tag, others) in self.tags {
411            let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
412            errors.push(e);
413            for other in others {
414                let e = Error::new(
415                    "invalid pushtag",
416                    "missing corresponding poptag",
417                    other.span,
418                );
419                errors.push(e);
420            }
421        }
422
423        (self.options, self.plugins, errors, warnings)
424    }
425}
426
427impl<'s> Iterator for PragmaProcessor<'s> {
428    type Item = Spanned<Directive<'s>>;
429
430    fn next(&mut self) -> Option<Self::Item> {
431        match self.current_declarations.pop_front() {
432            Some(declaration) => {
433                match declaration.item {
434                    Declaration::Directive(mut directive) => {
435                        directive.metadata.augment_tags(&self.tags);
436                        directive.metadata.augment_key_values(&self.meta_key_values);
437
438                        Some(spanned(directive, declaration.span))
439                    }
440
441                    Declaration::Pragma(pragma) => {
442                        use Pragma::*;
443
444                        match pragma {
445                            Pushtag(tag) => match self.tags.get_mut(&tag) {
446                                Some(others) => {
447                                    others.push(tag);
448                                }
449                                None => {
450                                    self.tags.insert(tag, Vec::default());
451                                }
452                            },
453                            Poptag(tag) => {
454                                let mut last_tag = false;
455
456                                match self.tags.get_mut(&tag) {
457                                    Some(others) => {
458                                        if others.is_empty() {
459                                            last_tag = true;
460                                            // need to remove later because of borrowing
461                                        } else {
462                                            others.pop();
463                                        }
464                                    }
465                                    None => {
466                                        let e = Error::new(
467                                            "invalid poptag",
468                                            "missing corresponding pushtag",
469                                            tag.span,
470                                        );
471                                        self.errors.push(e);
472                                    }
473                                }
474
475                                if last_tag {
476                                    self.tags.remove(&tag);
477                                }
478                            }
479                            Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
480                                Some(values) => {
481                                    values.push((meta.key.span, meta.value));
482                                }
483                                None => {
484                                    self.meta_key_values
485                                        .insert(meta.key, vec![(meta.key.span, meta.value)]);
486                                }
487                            },
488                            Popmeta(meta) => {
489                                let mut last_meta = false;
490
491                                match self.meta_key_values.get_mut(&meta) {
492                                    Some(values) => {
493                                        values.pop();
494                                        if values.is_empty() {
495                                            last_meta = true;
496                                            // need to remove later because of borrowing
497                                        }
498                                    }
499                                    None => {
500                                        let e = Error::new(
501                                            "invalid popmeta",
502                                            "missing corresponding pushmeta",
503                                            meta.span,
504                                        );
505                                        self.errors.push(e);
506                                    }
507                                }
508
509                                if last_meta {
510                                    self.meta_key_values.remove(&meta);
511                                }
512                            }
513                            Include(rel_glob) => {
514                                let (abs_glob, span) = (
515                                    resolve_included_path(
516                                        self.current_path.as_ref(),
517                                        AsRef::<Path>::as_ref(*rel_glob.item()),
518                                    ),
519                                    *rel_glob.span(),
520                                );
521
522                                match self.included_globs.get(&abs_glob) {
523                                    None => panic!("impossible, I hope"),
524                                    Some(IncludedGlob::Expanded(paths)) => {
525                                        if paths.is_empty() {
526                                            // this is an error rather than a warning to catch plain paths whic fail to match
527                                            let e =
528                                                Error::new("include failed", "no such file", span);
529
530                                            self.errors.push(e)
531                                        }
532
533                                        for included in paths {
534                                            let included = Some(included.clone());
535
536                                            match self.remaining.remove_entry(&included) {
537                                                Some((included_path, included_declarations)) => {
538                                                    let stacked_path = std::mem::replace(
539                                                        &mut self.current_path,
540                                                        included_path,
541                                                    );
542                                                    let stacked_declarations = std::mem::replace(
543                                                        &mut self.current_declarations,
544                                                        included_declarations,
545                                                    );
546                                                    self.stacked.push_front((
547                                                        stacked_path,
548                                                        stacked_declarations,
549                                                    ));
550
551                                                    // record the span in case of a duplicate include error later
552                                                    if let Ok(canonical_path) = self
553                                                        .current_path
554                                                        .as_ref()
555                                                        .unwrap()
556                                                        .canonicalize()
557                                                    {
558                                                        self.include_by_canonical_path.insert(
559                                                            canonical_path,
560                                                            IncludeContext {
561                                                                tags: self.tags.clone(),
562                                                                meta_key_values: self
563                                                                    .meta_key_values
564                                                                    .clone(),
565                                                                span,
566                                                            },
567                                                        );
568                                                    }
569                                                }
570
571                                                None => {
572                                                    // either a known error path or a duplicate include
573                                                    if let Some(e) = self.error_paths.get(&included)
574                                                    {
575                                                        self.errors.push(Error::new(
576                                                            "can't read file",
577                                                            e.to_string(),
578                                                            span,
579                                                        ));
580                                                    } else {
581                                                        // duplicate include, only allowed if the include context is the same
582
583                                                        let e = Error::new(
584                                                            "duplicate include",
585                                                            format!(
586                                                                "context {}",
587                                                                fmt_include_context(
588                                                                    &self.tags,
589                                                                    &self.meta_key_values
590                                                                )
591                                                            ),
592                                                            span,
593                                                        );
594
595                                                        // relate the error to the first include if we can
596                                                        let e = if let Some(canonical_path) =
597                                                            included.and_then(|included| {
598                                                                included.canonicalize().ok()
599                                                            }) {
600                                                            if let Some(include_context) = self
601                                                                .include_by_canonical_path
602                                                                .get(&canonical_path)
603                                                            {
604                                                                if include_context.tags == self.tags
605                                                                    && include_context
606                                                                        .meta_key_values
607                                                                        == self.meta_key_values
608                                                                {
609                                                                    // include context is identical, so the include is harmless
610                                                                    // and we ignore it
611                                                                    None
612                                                                } else {
613                                                                    Some(e.related_to_named_span(
614                                                                        format!("context {}", fmt_include_context(&include_context.tags, &include_context.meta_key_values)),
615                                                                        include_context.span,
616                                                                    ))
617                                                                }
618                                                            } else {
619                                                                Some(e)
620                                                            }
621                                                        } else {
622                                                            Some(e)
623                                                        };
624
625                                                        if let Some(e) = e {
626                                                            self.errors.push(e);
627                                                        }
628                                                    }
629                                                }
630                                            }
631                                        }
632                                    }
633                                    Some(IncludedGlob::Error(e)) => {
634                                        let e = Error::new("can't expand glob", e, span);
635                                        self.errors.push(e);
636                                    }
637                                }
638                            }
639
640                            Option(opt) => {
641                                if let Err(e) = self.options.assimilate(opt) {
642                                    self.errors.push(e);
643                                }
644                            }
645
646                            Plugin(plugin) => self.plugins.push(plugin),
647                        }
648
649                        // having silently consumed a pragma, go on to the next declaration
650                        self.next()
651                    }
652                }
653            }
654            None => match self.stacked.pop_front() {
655                Some((path, declarations)) => {
656                    self.current_path = path;
657                    self.current_declarations = declarations;
658                    self.next()
659                }
660                None => None,
661            },
662        }
663    }
664}
665
666fn end_of_input(source_id: SourceId, s: &str) -> Span_ {
667    chumsky::span::Span::new(source_id, s.len()..s.len())
668}
669
670#[cfg(test)]
671pub use lexer::bare_lex;
672mod format;
673mod lexer;
674pub use options::Options;
675pub(crate) mod options;
676mod parsers;
677mod sort;
678mod sources;
679pub use sources::{BeancountSources, SyntheticSources};
680mod trim;
681pub mod types;