beancount_parser_lima/lib.rs
1// TODO remove suppression for dead code warning
2#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6//! # Examples
7//!
8//! This example generates the output as shown above.
9//! The supporting function `parse` is required in order to avoid lifetime problems.
10//!
11//!```
12//! # use rust_decimal::Decimal;
13//! # use std::io::{self, Write};
14//! # use std::path::PathBuf;
15//!
16//!use beancount_parser_lima::{
17//! BeancountParser, BeancountSources, DirectiveVariant, ParseError, ParseSuccess,
18//!};
19//!
20//!fn main() {
21//! let sources = BeancountSources::try_from(PathBuf::from("examples/data/error-post-balancing.beancount")).unwrap();
22//! let parser = BeancountParser::new(&sources);
23//!
24//! parse(&sources, &parser, &mut io::stderr());
25//!}
26//!
27//!fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
28//!where
29//! W: Write,
30//!{
31//! match parser.parse() {
32//! Ok(ParseSuccess {
33//! directives,
34//! options: _,
35//! plugins: _,
36//! mut warnings,
37//! }) => {
38//! let mut errors = Vec::new();
39//!
40//! for directive in directives {
41//! if let DirectiveVariant::Transaction(transaction) = directive.variant() {
42//! let mut postings = transaction.postings().collect::<Vec<_>>();
43//! let n_postings = postings.len();
44//! let n_amounts = itertools::partition(&mut postings, |p| p.amount().is_some());
45//!
46//! if postings.is_empty() {
47//! warnings.push(directive.warning("no postings"));
48//! } else if n_amounts + 1 < n_postings {
49//! errors.push(
50//! directive
51//! .error("multiple postings without amount specified")
52//! .related_to_all(postings[n_amounts..].iter().copied()),
53//! );
54//! } else if n_amounts == n_postings {
55//! let total: Decimal =
56//! postings.iter().map(|p| p.amount().unwrap().value()).sum();
57//!
58//! if total != Decimal::ZERO {
59//! let last_amount = postings.pop().unwrap().amount().unwrap();
60//! let other_amounts = postings.iter().map(|p| p.amount().unwrap());
61//!
62//! errors.push(
63//! last_amount
64//! .error(format!("sum is {}, expected zero", total))
65//! .related_to_all(other_amounts)
66//! .in_context(&directive),
67//! )
68//! }
69//! }
70//! }
71//! }
72//!
73//! sources.write_errors_or_warnings(error_w, errors).unwrap();
74//! sources.write_errors_or_warnings(error_w, warnings).unwrap();
75//! }
76//!
77//! Err(ParseError { errors, warnings }) => {
78//! sources.write_errors_or_warnings(error_w, errors).unwrap();
79//! sources.write_errors_or_warnings(error_w, warnings).unwrap();
80//! }
81//! }
82//!}
83//!```
84
85use chumsky::prelude::{Input, Parser};
86use lexer::{lex, Token};
87use parsers::{file, ParserState};
88use sort::SortIteratorAdaptor;
89use std::{
90 collections::{HashMap, VecDeque},
91 path::{Path, PathBuf},
92};
93
94use crate::{parsers::includes, sources::resolve_included_path};
95pub use crate::{trim::trim_trailing_whitespace, types::*};
96
97#[derive(Clone, Debug)]
98enum IncludedGlob {
99 Expanded(Vec<PathBuf>), // the content and its char indices
100 Error(String),
101}
102
103// get all includes, discarding errors
104fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
105 fn get_includes_for_tokens(
106 tokens: Vec<(Token, Span_)>,
107 source_id: SourceId,
108 end_of_input: Span_,
109 ) -> Vec<String> {
110 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
111
112 let spanned_tokens = tokens
113 .map(end_of_input, |(t, s)| (t, s))
114 .with_context(source_id);
115
116 // ignore any errors in parsing, we'll pick them up in the next pass
117 includes()
118 .parse_with_state(spanned_tokens, &mut parser_state)
119 .into_output()
120 .unwrap_or_default()
121 }
122
123 let tokens = lex_with_source(source_id, content);
124 get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
125}
126
127pub fn lex_with_source<'a>(source_id: SourceId, s: &'a str) -> Vec<(Token<'a>, Span_)> {
128 lex(s)
129 .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
130 .collect::<Vec<_>>()
131}
132
133type SpannedToken<'t> = (Token<'t>, Span_);
134
135/// The Beancount parser itself, which tokenizes and parses the source files
136/// contained in `BeancountSources`.
137///
138/// # Examples
139/// ```
140/// # use std::io::{self, Write};
141/// # use std::path::PathBuf;
142///
143/// use beancount_parser_lima::{BeancountParser, BeancountSources, ParseError, ParseSuccess};
144///
145/// fn main() {
146/// let sources = BeancountSources::try_from(PathBuf::from("examples/data/full.beancount")).unwrap();
147/// let parser = BeancountParser::new(&sources);
148///
149/// parse(&sources, &parser, &mut io::stderr());
150/// }
151///
152/// fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
153/// where
154/// W: Write,
155/// {
156/// match parser.parse() {
157/// Ok(ParseSuccess {
158/// directives,
159/// options: _,
160/// plugins: _,
161/// warnings,
162/// }) => {
163/// for directive in directives {
164/// println!("{}\n", &directive);
165/// }
166///
167/// sources.write_errors_or_warnings(error_w, warnings).unwrap();
168/// }
169/// Err(ParseError { errors, warnings }) => {
170/// sources.write_errors_or_warnings(error_w, errors).unwrap();
171/// sources.write_errors_or_warnings(error_w, warnings).unwrap();
172/// }
173/// }
174/// }
175/// ````
176pub struct BeancountParser<'s> {
177 sources: &'s BeancountSources,
178 // indexed by source_id as per sources
179 tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
180}
181
182/// A successful parsing all the files, containing date-ordered `Directive`s, `Options`, `Plugin`s, and any `Warning`s.
183#[derive(Debug)]
184pub struct ParseSuccess<'t> {
185 pub directives: Vec<Spanned<Directive<'t>>>,
186 pub options: Options<'t>,
187 pub plugins: Vec<Plugin<'t>>,
188 pub warnings: Vec<Warning>,
189}
190
191/// The value returned when parsing fails.
192#[derive(Debug)]
193pub struct ParseError {
194 pub errors: Vec<Error>,
195 pub warnings: Vec<Warning>,
196}
197
198// result of parse_declarations
199type ParseDeclarationsResult<'t> = (
200 HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
201 Options<'t>,
202 Vec<Error>,
203 Vec<Warning>,
204);
205
206impl<'s> BeancountParser<'s> {
207 /// Create a `BeancountParser` from `BeancountSources` read from all input files.
208 pub fn new(sources: &'s BeancountSources) -> Self {
209 // `content_iter()` walks a `HashMap`, so iteration order is not guaranteed.
210 // We must index by `SourceId` rather than relying on iteration order.
211 let mut tokenized_sources = vec![Vec::new(); sources.num_sources()];
212
213 for (source_id, _path, content) in sources.content_iter() {
214 let i_source: usize = source_id.into();
215 tokenized_sources[i_source] = lex_with_source(source_id, content);
216 }
217
218 BeancountParser {
219 sources,
220 tokenized_sources,
221 }
222 }
223
224 /// Parse the sources, returning date-sorted directives and options, or errors, along with warnings in both cases.
225 pub fn parse<'a>(&'a self) -> Result<ParseSuccess<'a>, ParseError> {
226 let (parsed_sources, options, mut errors, mut warnings) = self.parse_declarations();
227 let mut p = PragmaProcessor::new(
228 self.root_path(),
229 parsed_sources,
230 self.sources.included_globs(),
231 self.sources.error_paths(),
232 options,
233 );
234
235 // directives are stable-sorted by date, where balance directives sort ahead of the other directives for that day
236 // as per https://beancount.github.io/docs/beancount_design_doc.html#stream-invariants
237 let directives = p
238 .by_ref()
239 .sort(|d| {
240 (
241 *d.item().date().item(),
242 // secondary sort of Balance ahead of others
243 !matches!(d.variant(), DirectiveVariant::Balance(_)),
244 )
245 })
246 .collect::<Vec<_>>();
247 let (options, plugins, mut pragma_errors, mut pragma_warnings) = p.result();
248 errors.append(&mut pragma_errors);
249 warnings.append(&mut pragma_warnings);
250
251 if errors.is_empty() {
252 Ok(ParseSuccess {
253 directives,
254 options,
255 plugins,
256 warnings,
257 })
258 } else {
259 Err(ParseError { errors, warnings })
260 }
261 }
262
263 fn root_path(&self) -> Option<&'s Path> {
264 self.sources.root_path()
265 }
266
267 /// Parse the sources, returning declarations and any errors.
268 /// The declarations are indexed by SourceId
269 fn parse_declarations<'a>(&'a self) -> ParseDeclarationsResult<'a> {
270 let mut all_outputs = HashMap::new();
271 let mut all_errors = Vec::new();
272 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
273
274 for (source_id, source_path, content) in self.sources.content_iter() {
275 let i_source: usize = source_id.into();
276 let tokens = &self.tokenized_sources[i_source];
277
278 let spanned_tokens = tokens
279 .map(end_of_input(source_id, content), |(t, s)| (t, s))
280 .with_context(source_id);
281
282 let (output, errors) = file(source_path)
283 .parse_with_state(spanned_tokens, &mut parser_state)
284 .into_output_errors();
285
286 all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
287 all_errors.extend(errors);
288 }
289
290 let ParserState { options, warnings } = parser_state.0;
291
292 (
293 all_outputs,
294 Options::new(options),
295 all_errors.into_iter().map(Error::from).collect(),
296 warnings,
297 )
298 }
299}
300
301/// Iterator which applies pragmas to the sequence of `Directive`s.
302///
303/// When the iterator is exhausted, any errors should be collected by the caller.
304#[derive(Debug)]
305struct PragmaProcessor<'s> {
306 current_path: Option<PathBuf>,
307 current_declarations: VecDeque<Spanned<Declaration<'s>>>,
308 stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
309 remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
310 included_globs: &'s HashMap<PathBuf, IncludedGlob>,
311 error_paths: HashMap<Option<PathBuf>, String>,
312 include_by_canonical_path: HashMap<PathBuf, IncludeContext<'s>>,
313 // tags and meta key/values for pragma push/pop
314 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
315 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
316 options: Options<'s>,
317 plugins: Vec<Plugin<'s>>,
318 // errors and warnings, for collection when the iterator is exhausted
319 errors: Vec<Error>,
320 warnings: Vec<Warning>,
321}
322
323#[derive(Debug)]
324struct IncludeContext<'s> {
325 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
326 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
327 span: Span,
328}
329
330fn fmt_include_context<'s>(
331 tags: &HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
332 meta_key_values: &HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
333) -> String {
334 let tags_s = itertools::intersperse(
335 tags.keys().map(|tag| tag.item().to_string()),
336 " ".to_string(),
337 )
338 .collect::<String>();
339 let meta_s = itertools::intersperse(
340 meta_key_values.iter().map(|(k, v)| {
341 format!(
342 "{}: [{}]",
343 k.item(),
344 itertools::intersperse(
345 v.iter().map(|(_, v)| v.item().to_string()),
346 " ".to_string()
347 )
348 .collect::<String>()
349 )
350 }),
351 " ".to_string(),
352 )
353 .collect::<String>();
354 format!("{} {}", tags_s, meta_s)
355}
356
357impl<'s> PragmaProcessor<'s> {
358 fn new(
359 root_path: Option<&Path>,
360 parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
361 included_globs: &'s HashMap<PathBuf, IncludedGlob>,
362 error_paths: HashMap<Option<&Path>, String>,
363 options: Options<'s>,
364 ) -> Self {
365 let mut remaining = parsed_sources
366 .into_iter()
367 .map(|(path, declarations)| {
368 (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
369 })
370 .collect::<HashMap<_, _>>();
371 let error_paths = error_paths
372 .into_iter()
373 .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
374 .collect::<HashMap<_, _>>();
375
376 let current_path = root_path.map(|p| p.to_path_buf());
377 let current_declarations = remaining.remove(¤t_path).unwrap();
378
379 PragmaProcessor {
380 current_path,
381 current_declarations,
382 stacked: VecDeque::new(),
383 remaining,
384 included_globs,
385 error_paths,
386 include_by_canonical_path: HashMap::default(),
387 tags: HashMap::new(),
388 meta_key_values: HashMap::new(),
389 options,
390 plugins: Vec::new(),
391 errors: Vec::new(),
392 warnings: Vec::new(),
393 }
394 }
395
396 fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>, Vec<Warning>) {
397 // any leftover tags or key/values is an error
398 let mut errors = self.errors;
399 let warnings = self.warnings;
400
401 for (key, _value) in self.meta_key_values {
402 let e = Error::new(
403 "invalid pushmeta",
404 "missing corresponding popmeta",
405 key.span,
406 );
407 errors.push(e);
408 }
409
410 for (tag, others) in self.tags {
411 let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
412 errors.push(e);
413 for other in others {
414 let e = Error::new(
415 "invalid pushtag",
416 "missing corresponding poptag",
417 other.span,
418 );
419 errors.push(e);
420 }
421 }
422
423 (self.options, self.plugins, errors, warnings)
424 }
425}
426
427impl<'s> Iterator for PragmaProcessor<'s> {
428 type Item = Spanned<Directive<'s>>;
429
430 fn next(&mut self) -> Option<Self::Item> {
431 match self.current_declarations.pop_front() {
432 Some(declaration) => {
433 match declaration.item {
434 Declaration::Directive(mut directive) => {
435 directive.metadata.augment_tags(&self.tags);
436 directive.metadata.augment_key_values(&self.meta_key_values);
437
438 Some(spanned(directive, declaration.span))
439 }
440
441 Declaration::Pragma(pragma) => {
442 use Pragma::*;
443
444 match pragma {
445 Pushtag(tag) => match self.tags.get_mut(&tag) {
446 Some(others) => {
447 others.push(tag);
448 }
449 None => {
450 self.tags.insert(tag, Vec::default());
451 }
452 },
453 Poptag(tag) => {
454 let mut last_tag = false;
455
456 match self.tags.get_mut(&tag) {
457 Some(others) => {
458 if others.is_empty() {
459 last_tag = true;
460 // need to remove later because of borrowing
461 } else {
462 others.pop();
463 }
464 }
465 None => {
466 let e = Error::new(
467 "invalid poptag",
468 "missing corresponding pushtag",
469 tag.span,
470 );
471 self.errors.push(e);
472 }
473 }
474
475 if last_tag {
476 self.tags.remove(&tag);
477 }
478 }
479 Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
480 Some(values) => {
481 values.push((meta.key.span, meta.value));
482 }
483 None => {
484 self.meta_key_values
485 .insert(meta.key, vec![(meta.key.span, meta.value)]);
486 }
487 },
488 Popmeta(meta) => {
489 let mut last_meta = false;
490
491 match self.meta_key_values.get_mut(&meta) {
492 Some(values) => {
493 values.pop();
494 if values.is_empty() {
495 last_meta = true;
496 // need to remove later because of borrowing
497 }
498 }
499 None => {
500 let e = Error::new(
501 "invalid popmeta",
502 "missing corresponding pushmeta",
503 meta.span,
504 );
505 self.errors.push(e);
506 }
507 }
508
509 if last_meta {
510 self.meta_key_values.remove(&meta);
511 }
512 }
513 Include(rel_glob) => {
514 let (abs_glob, span) = (
515 resolve_included_path(
516 self.current_path.as_ref(),
517 AsRef::<Path>::as_ref(*rel_glob.item()),
518 ),
519 *rel_glob.span(),
520 );
521
522 match self.included_globs.get(&abs_glob) {
523 None => panic!("impossible, I hope"),
524 Some(IncludedGlob::Expanded(paths)) => {
525 if paths.is_empty() {
526 // this is an error rather than a warning to catch plain paths whic fail to match
527 let e =
528 Error::new("include failed", "no such file", span);
529
530 self.errors.push(e)
531 }
532
533 for included in paths {
534 let included = Some(included.clone());
535
536 match self.remaining.remove_entry(&included) {
537 Some((included_path, included_declarations)) => {
538 let stacked_path = std::mem::replace(
539 &mut self.current_path,
540 included_path,
541 );
542 let stacked_declarations = std::mem::replace(
543 &mut self.current_declarations,
544 included_declarations,
545 );
546 self.stacked.push_front((
547 stacked_path,
548 stacked_declarations,
549 ));
550
551 // record the span in case of a duplicate include error later
552 if let Ok(canonical_path) = self
553 .current_path
554 .as_ref()
555 .unwrap()
556 .canonicalize()
557 {
558 self.include_by_canonical_path.insert(
559 canonical_path,
560 IncludeContext {
561 tags: self.tags.clone(),
562 meta_key_values: self
563 .meta_key_values
564 .clone(),
565 span,
566 },
567 );
568 }
569 }
570
571 None => {
572 // either a known error path or a duplicate include
573 if let Some(e) = self.error_paths.get(&included)
574 {
575 self.errors.push(Error::new(
576 "can't read file",
577 e.to_string(),
578 span,
579 ));
580 } else {
581 // duplicate include, only allowed if the include context is the same
582
583 let e = Error::new(
584 "duplicate include",
585 format!(
586 "context {}",
587 fmt_include_context(
588 &self.tags,
589 &self.meta_key_values
590 )
591 ),
592 span,
593 );
594
595 // relate the error to the first include if we can
596 let e = if let Some(canonical_path) =
597 included.and_then(|included| {
598 included.canonicalize().ok()
599 }) {
600 if let Some(include_context) = self
601 .include_by_canonical_path
602 .get(&canonical_path)
603 {
604 if include_context.tags == self.tags
605 && include_context
606 .meta_key_values
607 == self.meta_key_values
608 {
609 // include context is identical, so the include is harmless
610 // and we ignore it
611 None
612 } else {
613 Some(e.related_to_named_span(
614 format!("context {}", fmt_include_context(&include_context.tags, &include_context.meta_key_values)),
615 include_context.span,
616 ))
617 }
618 } else {
619 Some(e)
620 }
621 } else {
622 Some(e)
623 };
624
625 if let Some(e) = e {
626 self.errors.push(e);
627 }
628 }
629 }
630 }
631 }
632 }
633 Some(IncludedGlob::Error(e)) => {
634 let e = Error::new("can't expand glob", e, span);
635 self.errors.push(e);
636 }
637 }
638 }
639
640 Option(opt) => {
641 if let Err(e) = self.options.assimilate(opt) {
642 self.errors.push(e);
643 }
644 }
645
646 Plugin(plugin) => self.plugins.push(plugin),
647 }
648
649 // having silently consumed a pragma, go on to the next declaration
650 self.next()
651 }
652 }
653 }
654 None => match self.stacked.pop_front() {
655 Some((path, declarations)) => {
656 self.current_path = path;
657 self.current_declarations = declarations;
658 self.next()
659 }
660 None => None,
661 },
662 }
663 }
664}
665
666fn end_of_input(source_id: SourceId, s: &str) -> Span_ {
667 chumsky::span::Span::new(source_id, s.len()..s.len())
668}
669
670#[cfg(test)]
671pub use lexer::bare_lex;
672mod format;
673mod lexer;
674pub use options::Options;
675pub(crate) mod options;
676mod parsers;
677mod sort;
678mod sources;
679pub use sources::{BeancountSources, SyntheticSources};
680mod trim;
681pub mod types;