beancount_parser_lima/lib.rs
1// TODO remove suppression for dead code warning
2#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6//! # Examples
7//!
8//! This example generates the output as shown above.
9//! The supporting function `parse` is required in order to avoid lifetime problems.
10//!
11//!```
12//! # use rust_decimal::Decimal;
13//! # use std::io::{self, Write};
14//! # use std::path::PathBuf;
15//!
16//!use beancount_parser_lima::{
17//! BeancountParser, BeancountSources, DirectiveVariant, ParseError, ParseSuccess,
18//!};
19//!
20//!fn main() {
21//! let sources = BeancountSources::try_from(PathBuf::from("examples/data/error-post-balancing.beancount")).unwrap();
22//! let parser = BeancountParser::new(&sources);
23//!
24//! parse(&sources, &parser, &mut io::stderr());
25//!}
26//!
27//!fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
28//!where
29//! W: Write,
30//!{
31//! match parser.parse() {
32//! Ok(ParseSuccess {
33//! directives,
34//! options: _,
35//! plugins: _,
36//! mut warnings,
37//! }) => {
38//! let mut errors = Vec::new();
39//!
40//! for directive in directives {
41//! if let DirectiveVariant::Transaction(transaction) = directive.variant() {
42//! let mut postings = transaction.postings().collect::<Vec<_>>();
43//! let n_postings = postings.len();
44//! let n_amounts = itertools::partition(&mut postings, |p| p.amount().is_some());
45//!
46//! if postings.is_empty() {
47//! warnings.push(directive.warning("no postings"));
48//! } else if n_amounts + 1 < n_postings {
49//! errors.push(
50//! directive
51//! .error("multiple postings without amount specified")
52//! .related_to_all(postings[n_amounts..].iter().copied()),
53//! );
54//! } else if n_amounts == n_postings {
55//! let total: Decimal =
56//! postings.iter().map(|p| p.amount().unwrap().value()).sum();
57//!
58//! if total != Decimal::ZERO {
59//! let last_amount = postings.pop().unwrap().amount().unwrap();
60//! let other_amounts = postings.iter().map(|p| p.amount().unwrap());
61//!
62//! errors.push(
63//! last_amount
64//! .error(format!("sum is {}, expected zero", total))
65//! .related_to_all(other_amounts)
66//! .in_context(&directive),
67//! )
68//! }
69//! }
70//! }
71//! }
72//!
73//! sources.write_errors_or_warnings(error_w, errors).unwrap();
74//! sources.write_errors_or_warnings(error_w, warnings).unwrap();
75//! }
76//!
77//! Err(ParseError { errors, warnings }) => {
78//! sources.write_errors_or_warnings(error_w, errors).unwrap();
79//! sources.write_errors_or_warnings(error_w, warnings).unwrap();
80//! }
81//! }
82//!}
83//!```
84
85use chumsky::prelude::{Input, Parser};
86use lexer::{lex, Token};
87use parsers::{file, ParserState};
88use sort::SortIteratorAdaptor;
89use std::{
90 collections::{HashMap, VecDeque},
91 path::{Path, PathBuf},
92};
93
94use crate::{parsers::includes, sources::resolve_included_path};
95pub use crate::{trim::trim_trailing_whitespace, types::*};
96
97#[derive(Clone, Debug)]
98enum IncludedGlob {
99 Expanded(Vec<PathBuf>), // the content and its char indices
100 Error(String),
101}
102
103// get all includes, discarding errors
104fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
105 fn get_includes_for_tokens(
106 tokens: Vec<(Token, Span_)>,
107 source_id: SourceId,
108 end_of_input: Span_,
109 ) -> Vec<String> {
110 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
111
112 let spanned_tokens = tokens
113 .map(end_of_input, |(t, s)| (t, s))
114 .with_context(source_id);
115
116 // ignore any errors in parsing, we'll pick them up in the next pass
117 includes()
118 .parse_with_state(spanned_tokens, &mut parser_state)
119 .into_output()
120 .unwrap_or_default()
121 }
122
123 let tokens = lex_with_source(source_id, content);
124 get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
125}
126
127pub fn lex_with_source<'a>(source_id: SourceId, s: &'a str) -> Vec<(Token<'a>, Span_)> {
128 lex(s)
129 .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
130 .collect::<Vec<_>>()
131}
132
133type SpannedToken<'t> = (Token<'t>, Span_);
134
135/// The Beancount parser itself, which tokenizes and parses the source files
136/// contained in `BeancountSources`.
137///
138/// # Examples
139/// ```
140/// # use std::io::{self, Write};
141/// # use std::path::PathBuf;
142///
143/// use beancount_parser_lima::{BeancountParser, BeancountSources, ParseError, ParseSuccess};
144///
145/// fn main() {
146/// let sources = BeancountSources::try_from(PathBuf::from("examples/data/full.beancount")).unwrap();
147/// let parser = BeancountParser::new(&sources);
148///
149/// parse(&sources, &parser, &mut io::stderr());
150/// }
151///
152/// fn parse<W>(sources: &BeancountSources, parser: &BeancountParser, error_w: &mut W)
153/// where
154/// W: Write,
155/// {
156/// match parser.parse() {
157/// Ok(ParseSuccess {
158/// directives,
159/// options: _,
160/// plugins: _,
161/// warnings,
162/// }) => {
163/// for directive in directives {
164/// println!("{}\n", &directive);
165/// }
166///
167/// sources.write_errors_or_warnings(error_w, warnings).unwrap();
168/// }
169/// Err(ParseError { errors, warnings }) => {
170/// sources.write_errors_or_warnings(error_w, errors).unwrap();
171/// sources.write_errors_or_warnings(error_w, warnings).unwrap();
172/// }
173/// }
174/// }
175/// ````
176pub struct BeancountParser<'s> {
177 sources: &'s BeancountSources,
178 // indexed by source_id as per sources
179 tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
180}
181
182/// A successful parsing all the files, containing date-ordered `Directive`s, `Options`, `Plugin`s, and any `Warning`s.
183#[derive(Debug)]
184pub struct ParseSuccess<'t> {
185 pub directives: Vec<Spanned<Directive<'t>>>,
186 pub options: Options<'t>,
187 pub plugins: Vec<Plugin<'t>>,
188 pub warnings: Vec<Warning>,
189}
190
191/// The value returned when parsing fails.
192#[derive(Debug)]
193pub struct ParseError {
194 pub errors: Vec<Error>,
195 pub warnings: Vec<Warning>,
196}
197
198// result of parse_declarations
199type ParseDeclarationsResult<'t> = (
200 HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
201 Options<'t>,
202 Vec<Error>,
203 Vec<Warning>,
204);
205
206impl<'s> BeancountParser<'s> {
207 /// Create a `BeancountParser` from `BeancountSources` read from all input files.
208 pub fn new(sources: &'s BeancountSources) -> Self {
209 // `content_iter()` walks a `HashMap`, so iteration order is not guaranteed.
210 // We must index by `SourceId` rather than relying on iteration order.
211 let mut tokenized_sources = vec![Vec::new(); sources.num_sources()];
212
213 for (source_id, _path, content) in sources.content_iter() {
214 let i_source: usize = source_id.into();
215 tokenized_sources[i_source] = lex_with_source(source_id, content);
216 }
217
218 BeancountParser {
219 sources,
220 tokenized_sources,
221 }
222 }
223
224 /// Parse the sources, returning date-sorted directives and options, or errors, along with warnings in both cases.
225 pub fn parse<'a>(&'a self) -> Result<ParseSuccess<'a>, ParseError> {
226 let (parsed_sources, options, mut errors, mut warnings) = self.parse_declarations();
227 let mut p = PragmaProcessor::new(
228 self.root_path(),
229 parsed_sources,
230 self.sources.included_globs(),
231 self.sources.error_paths(),
232 options,
233 );
234
235 // directives are stable-sorted by date, with a secondary key enforcing the stream
236 // invariants from https://beancount.github.io/docs/beancount_design_doc.html#stream-invariants:
237 // Open sorts before Transaction; Balance sorts before Transaction.
238 let directives = p
239 .by_ref()
240 .sort(|d| {
241 (
242 *d.item().date().item(),
243 match d.variant() {
244 DirectiveVariant::Open(_) => 0u8,
245 DirectiveVariant::Balance(_) => 1u8,
246 _ => 2u8,
247 },
248 )
249 })
250 .collect::<Vec<_>>();
251 let (options, plugins, mut pragma_errors, mut pragma_warnings) = p.result();
252 errors.append(&mut pragma_errors);
253 warnings.append(&mut pragma_warnings);
254
255 if errors.is_empty() {
256 Ok(ParseSuccess {
257 directives,
258 options,
259 plugins,
260 warnings,
261 })
262 } else {
263 Err(ParseError { errors, warnings })
264 }
265 }
266
267 fn root_path(&self) -> Option<&'s Path> {
268 self.sources.root_path()
269 }
270
271 /// Parse the sources, returning declarations and any errors.
272 /// The declarations are indexed by SourceId
273 fn parse_declarations<'a>(&'a self) -> ParseDeclarationsResult<'a> {
274 let mut all_outputs = HashMap::new();
275 let mut all_errors = Vec::new();
276 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
277
278 for (source_id, source_path, content) in self.sources.content_iter() {
279 let i_source: usize = source_id.into();
280 let tokens = &self.tokenized_sources[i_source];
281
282 let spanned_tokens = tokens
283 .map(end_of_input(source_id, content), |(t, s)| (t, s))
284 .with_context(source_id);
285
286 let (output, errors) = file(source_path)
287 .parse_with_state(spanned_tokens, &mut parser_state)
288 .into_output_errors();
289
290 all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
291 all_errors.extend(errors);
292 }
293
294 let ParserState { options, warnings } = parser_state.0;
295
296 (
297 all_outputs,
298 Options::new(options),
299 all_errors.into_iter().map(Error::from).collect(),
300 warnings,
301 )
302 }
303}
304
305/// Iterator which applies pragmas to the sequence of `Directive`s.
306///
307/// When the iterator is exhausted, any errors should be collected by the caller.
308#[derive(Debug)]
309struct PragmaProcessor<'s> {
310 current_path: Option<PathBuf>,
311 current_declarations: VecDeque<Spanned<Declaration<'s>>>,
312 stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
313 remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
314 included_globs: &'s HashMap<PathBuf, IncludedGlob>,
315 error_paths: HashMap<Option<PathBuf>, String>,
316 include_by_canonical_path: HashMap<PathBuf, IncludeContext<'s>>,
317 // tags and meta key/values for pragma push/pop
318 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
319 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
320 options: Options<'s>,
321 plugins: Vec<Plugin<'s>>,
322 // errors and warnings, for collection when the iterator is exhausted
323 errors: Vec<Error>,
324 warnings: Vec<Warning>,
325}
326
327#[derive(Debug)]
328struct IncludeContext<'s> {
329 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
330 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
331 span: Span,
332}
333
334fn fmt_include_context<'s>(
335 tags: &HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
336 meta_key_values: &HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
337) -> String {
338 let tags_s = itertools::intersperse(
339 tags.keys().map(|tag| tag.item().to_string()),
340 " ".to_string(),
341 )
342 .collect::<String>();
343 let meta_s = itertools::intersperse(
344 meta_key_values.iter().map(|(k, v)| {
345 format!(
346 "{}: [{}]",
347 k.item(),
348 itertools::intersperse(
349 v.iter().map(|(_, v)| v.item().to_string()),
350 " ".to_string()
351 )
352 .collect::<String>()
353 )
354 }),
355 " ".to_string(),
356 )
357 .collect::<String>();
358 format!("{} {}", tags_s, meta_s)
359}
360
361impl<'s> PragmaProcessor<'s> {
362 fn new(
363 root_path: Option<&Path>,
364 parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
365 included_globs: &'s HashMap<PathBuf, IncludedGlob>,
366 error_paths: HashMap<Option<&Path>, String>,
367 options: Options<'s>,
368 ) -> Self {
369 let mut remaining = parsed_sources
370 .into_iter()
371 .map(|(path, declarations)| {
372 (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
373 })
374 .collect::<HashMap<_, _>>();
375 let error_paths = error_paths
376 .into_iter()
377 .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
378 .collect::<HashMap<_, _>>();
379
380 let current_path = root_path.map(|p| p.to_path_buf());
381 let current_declarations = remaining.remove(¤t_path).unwrap();
382
383 PragmaProcessor {
384 current_path,
385 current_declarations,
386 stacked: VecDeque::new(),
387 remaining,
388 included_globs,
389 error_paths,
390 include_by_canonical_path: HashMap::default(),
391 tags: HashMap::new(),
392 meta_key_values: HashMap::new(),
393 options,
394 plugins: Vec::new(),
395 errors: Vec::new(),
396 warnings: Vec::new(),
397 }
398 }
399
400 fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>, Vec<Warning>) {
401 // any leftover tags or key/values is an error
402 let mut errors = self.errors;
403 let warnings = self.warnings;
404
405 for (key, _value) in self.meta_key_values {
406 let e = Error::new(
407 "invalid pushmeta",
408 "missing corresponding popmeta",
409 key.span,
410 );
411 errors.push(e);
412 }
413
414 for (tag, others) in self.tags {
415 let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
416 errors.push(e);
417 for other in others {
418 let e = Error::new(
419 "invalid pushtag",
420 "missing corresponding poptag",
421 other.span,
422 );
423 errors.push(e);
424 }
425 }
426
427 (self.options, self.plugins, errors, warnings)
428 }
429}
430
431impl<'s> Iterator for PragmaProcessor<'s> {
432 type Item = Spanned<Directive<'s>>;
433
434 fn next(&mut self) -> Option<Self::Item> {
435 match self.current_declarations.pop_front() {
436 Some(declaration) => {
437 match declaration.item {
438 Declaration::Directive(mut directive) => {
439 directive.metadata.augment_tags(&self.tags);
440 directive.metadata.augment_key_values(&self.meta_key_values);
441
442 Some(spanned(directive, declaration.span))
443 }
444
445 Declaration::Pragma(pragma) => {
446 use Pragma::*;
447
448 match pragma {
449 Pushtag(tag) => match self.tags.get_mut(&tag) {
450 Some(others) => {
451 others.push(tag);
452 }
453 None => {
454 self.tags.insert(tag, Vec::default());
455 }
456 },
457 Poptag(tag) => {
458 let mut last_tag = false;
459
460 match self.tags.get_mut(&tag) {
461 Some(others) => {
462 if others.is_empty() {
463 last_tag = true;
464 // need to remove later because of borrowing
465 } else {
466 others.pop();
467 }
468 }
469 None => {
470 let e = Error::new(
471 "invalid poptag",
472 "missing corresponding pushtag",
473 tag.span,
474 );
475 self.errors.push(e);
476 }
477 }
478
479 if last_tag {
480 self.tags.remove(&tag);
481 }
482 }
483 Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
484 Some(values) => {
485 values.push((meta.key.span, meta.value));
486 }
487 None => {
488 self.meta_key_values
489 .insert(meta.key, vec![(meta.key.span, meta.value)]);
490 }
491 },
492 Popmeta(meta) => {
493 let mut last_meta = false;
494
495 match self.meta_key_values.get_mut(&meta) {
496 Some(values) => {
497 values.pop();
498 if values.is_empty() {
499 last_meta = true;
500 // need to remove later because of borrowing
501 }
502 }
503 None => {
504 let e = Error::new(
505 "invalid popmeta",
506 "missing corresponding pushmeta",
507 meta.span,
508 );
509 self.errors.push(e);
510 }
511 }
512
513 if last_meta {
514 self.meta_key_values.remove(&meta);
515 }
516 }
517 Include(rel_glob) => {
518 let (abs_glob, span) = (
519 resolve_included_path(
520 self.current_path.as_ref(),
521 AsRef::<Path>::as_ref(*rel_glob.item()),
522 ),
523 *rel_glob.span(),
524 );
525
526 match self.included_globs.get(&abs_glob) {
527 None => panic!("impossible, I hope"),
528 Some(IncludedGlob::Expanded(paths)) => {
529 if paths.is_empty() {
530 // this is an error rather than a warning to catch plain paths whic fail to match
531 let e =
532 Error::new("include failed", "no such file", span);
533
534 self.errors.push(e)
535 }
536
537 for included in paths {
538 let included = Some(included.clone());
539
540 match self.remaining.remove_entry(&included) {
541 Some((included_path, included_declarations)) => {
542 let stacked_path = std::mem::replace(
543 &mut self.current_path,
544 included_path,
545 );
546 let stacked_declarations = std::mem::replace(
547 &mut self.current_declarations,
548 included_declarations,
549 );
550 self.stacked.push_front((
551 stacked_path,
552 stacked_declarations,
553 ));
554
555 // record the span in case of a duplicate include error later
556 if let Ok(canonical_path) = self
557 .current_path
558 .as_ref()
559 .unwrap()
560 .canonicalize()
561 {
562 self.include_by_canonical_path.insert(
563 canonical_path,
564 IncludeContext {
565 tags: self.tags.clone(),
566 meta_key_values: self
567 .meta_key_values
568 .clone(),
569 span,
570 },
571 );
572 }
573 }
574
575 None => {
576 // either a known error path or a duplicate include
577 if let Some(e) = self.error_paths.get(&included)
578 {
579 self.errors.push(Error::new(
580 "can't read file",
581 e.to_string(),
582 span,
583 ));
584 } else {
585 // duplicate include, only allowed if the include context is the same
586
587 let e = Error::new(
588 "duplicate include",
589 format!(
590 "context {}",
591 fmt_include_context(
592 &self.tags,
593 &self.meta_key_values
594 )
595 ),
596 span,
597 );
598
599 // relate the error to the first include if we can
600 let e = if let Some(canonical_path) =
601 included.and_then(|included| {
602 included.canonicalize().ok()
603 }) {
604 if let Some(include_context) = self
605 .include_by_canonical_path
606 .get(&canonical_path)
607 {
608 if include_context.tags == self.tags
609 && include_context
610 .meta_key_values
611 == self.meta_key_values
612 {
613 // include context is identical, so the include is harmless
614 // and we ignore it
615 None
616 } else {
617 Some(e.related_to_named_span(
618 format!("context {}", fmt_include_context(&include_context.tags, &include_context.meta_key_values)),
619 include_context.span,
620 ))
621 }
622 } else {
623 Some(e)
624 }
625 } else {
626 Some(e)
627 };
628
629 if let Some(e) = e {
630 self.errors.push(e);
631 }
632 }
633 }
634 }
635 }
636 }
637 Some(IncludedGlob::Error(e)) => {
638 let e = Error::new("can't expand glob", e, span);
639 self.errors.push(e);
640 }
641 }
642 }
643
644 Option(opt) => {
645 if let Err(e) = self.options.assimilate(opt) {
646 self.errors.push(e);
647 }
648 }
649
650 Plugin(plugin) => self.plugins.push(plugin),
651 }
652
653 // having silently consumed a pragma, go on to the next declaration
654 self.next()
655 }
656 }
657 }
658 None => match self.stacked.pop_front() {
659 Some((path, declarations)) => {
660 self.current_path = path;
661 self.current_declarations = declarations;
662 self.next()
663 }
664 None => None,
665 },
666 }
667 }
668}
669
670fn end_of_input(source_id: SourceId, s: &str) -> Span_ {
671 chumsky::span::Span::new(source_id, s.len()..s.len())
672}
673
674#[cfg(test)]
675pub use lexer::bare_lex;
676mod format;
677mod lexer;
678pub use options::Options;
679pub(crate) mod options;
680mod parsers;
681mod sort;
682mod sources;
683pub use sources::{BeancountSources, SyntheticSources};
684mod trim;
685pub mod types;