Skip to main content

beancount_parser/
lib.rs

1//! A parsing library for the [beancount language](https://beancount.github.io/docs/beancount_language_syntax.html)
2//!
3//! # Usage
4//!
5//! Use [`parse`] to get an instance of [`BeancountFile`].
6//!
7//! This is generic over the decimal type. The examples use `f64` as a decimal type.
8//! You may also use `Decimal` from the [rust_decimal crate].
9//!
10//! [rust_decimal crate]: https://docs.rs/rust_decimal
11//!
12//! ```
13//! use beancount_parser::{BeancountFile, DirectiveContent};
14//!
15//! # fn main() -> Result<(), beancount_parser::Error> {
16//! let input = r#"
17//! 2023-05-20 * "Coffee beans"
18//!   Expenses:Groceries   10 CHF
19//!   Assets:Checking
20//! "#;
21//!
22//! // Parse into the `BeancountFile` struct:
23//! let beancount: BeancountFile<f64> = input.parse()?;
24//!
25//! let directive = &beancount.directives[0];
26//! assert_eq!(directive.date.year, 2023);
27//! assert_eq!(directive.date.month, 5);
28//! assert_eq!(directive.date.day, 20);
29//!
30//! let DirectiveContent::Transaction(trx) = &directive.content else {
31//!     panic!("was not a transaction")
32//! };
33//! assert_eq!(trx.narration.as_deref(), Some("Coffee beans"));
34//! assert_eq!(trx.postings[0].account.as_str(), "Expenses:Groceries");
35//! assert_eq!(trx.postings[0].amount.as_ref().unwrap().value, 10.0);
36//! assert_eq!(trx.postings[0].amount.as_ref().unwrap().currency.as_str(), "CHF");
37//! assert_eq!(trx.postings[1].account.as_str(), "Assets:Checking");
38//! assert_eq!(trx.postings[1].amount, None);
39//! # Ok(()) }
40//! ```
41
42use std::{
43    collections::HashSet,
44    fs::File,
45    io::Read,
46    path::{Path, PathBuf},
47    str::FromStr,
48};
49
50use nom::{
51    branch::alt,
52    bytes::complete::{tag, take_while},
53    character::complete::{char, line_ending, not_line_ending, space0, space1},
54    combinator::{all_consuming, cut, eof, iterator, map, not, opt, value},
55    sequence::{delimited, preceded, terminated},
56    Finish, Parser,
57};
58use nom_locate::position;
59
60pub use crate::{
61    account::{Account, Balance, Close, Open, Pad},
62    amount::{Amount, Currency, Decimal, Price},
63    date::Date,
64    error::{ConversionError, Error},
65    event::Event,
66    transaction::{Cost, Link, Posting, PostingPrice, Tag, Transaction},
67};
68use crate::{
69    error::{ReadFileErrorContent, ReadFileErrorV2},
70    iterator::Iter,
71};
72
73#[deprecated(note = "use `metadata::Value` instead", since = "1.0.0-beta.3")]
74#[doc(hidden)]
75pub type MetadataValue<D> = metadata::Value<D>;
76
77mod account;
78mod amount;
79mod date;
80mod error;
81mod event;
82mod iterator;
83pub mod metadata;
84mod transaction;
85
86/// Parse the input beancount file and return an instance of [`BeancountFile`] on success
87///
88/// It is generic over the [`Decimal`] type `D`.
89///
90/// See the root crate documentation for an example.
91///
92/// # Errors
93///
94/// Returns an [`Error`] in case of invalid beancount syntax found.
95pub fn parse<D: Decimal>(input: &str) -> Result<BeancountFile<D>, Error> {
96    input.parse()
97}
98
99/// Parse the beancount file and return an iterator over `Result<Entry<D>, Result>`
100///
101/// It is generic over the [`Decimal`] type `D`.
102///
103/// See [`Entry`]
104///
105/// # Errors
106///
107/// The iterator will emit an [`Error`] in case of invalid beancount syntax found.
108pub fn parse_iter<'a, D: Decimal + 'a>(
109    input: &'a str,
110) -> impl Iterator<Item = Result<Entry<D>, Error>> + 'a {
111    Iter::new(input, iterator(Span::new(input), entry::<D>))
112}
113
114impl<D: Decimal> FromStr for BeancountFile<D> {
115    type Err = Error;
116    fn from_str(input: &str) -> Result<Self, Self::Err> {
117        parse_iter(input).collect()
118    }
119}
120
121/// Read the files from disk and parse their content. Invokes `on_entry` for each entries found.
122///
123/// It follows the `include` directives found.
124///
125/// # Errors
126///
127/// Returns an error if any file could not be read (IO error)
128/// or if there is a beancount syntax error in any file read
129#[allow(deprecated)]
130#[deprecated(
131    since = "2.4.0",
132    note = "use `read_files_v2` or `read_files_to_vec` instead"
133)]
134pub fn read_files<D: Decimal, F: FnMut(Entry<D>)>(
135    files: impl IntoIterator<Item = PathBuf>,
136    on_entry: F,
137) -> Result<(), error::ReadFileError> {
138    read_files_v2(files, on_entry).map_err(|err| match err.error {
139        ReadFileErrorContent::Io(err) => error::ReadFileError::Io(err),
140        ReadFileErrorContent::Syntax(err) => error::ReadFileError::Syntax(err),
141    })
142}
143
144/// Read the files from disk and parse their content. Returns a [`Vec`] of each entries found.
145///
146/// It follows the `include` directives found.
147///
148/// # Errors
149///
150/// Returns an error if any file could not be read (IO error)
151/// or if there is a beancount syntax error in any file read
152pub fn read_files_to_vec<D: Decimal>(
153    files: impl IntoIterator<Item = PathBuf>,
154) -> Result<Vec<Entry<D>>, ReadFileErrorV2> {
155    let mut vec = Vec::new();
156    read_files_v2(files, |entry| vec.push(entry))?;
157    Ok(vec)
158}
159
160/// Read the files from disk and parse their content.
161///
162/// It follows the `include` directives found.
163///
164/// # Errors
165///
166/// Returns an error if any file could not be read (IO error)
167/// or if there is a beancount syntax error in any file read
168pub fn read_files_v2<D: Decimal, F: FnMut(Entry<D>)>(
169    files: impl IntoIterator<Item = PathBuf>,
170    mut on_entry: F,
171) -> Result<(), ReadFileErrorV2> {
172    let mut loaded: HashSet<PathBuf> = HashSet::new();
173    let mut pending: Vec<PathBuf> = files
174        .into_iter()
175        .map(|p| {
176            p.canonicalize()
177                .map_err(|err| ReadFileErrorV2::from_io(p, err))
178        })
179        .collect::<Result<_, _>>()?;
180    let mut buffer = String::new();
181    while let Some(path) = pending.pop() {
182        if loaded.contains(&path) {
183            continue;
184        }
185        loaded.insert(path.clone());
186        buffer.clear();
187        File::open(&path)
188            .and_then(|mut f| f.read_to_string(&mut buffer))
189            .map_err(|err| ReadFileErrorV2::from_io(path.clone(), err))?;
190        for result in parse_iter::<D>(&buffer) {
191            let entry = match result {
192                Ok(entry) => entry,
193                Err(err) => return Err(ReadFileErrorV2::from_syntax(path, err)),
194            };
195            match entry {
196                Entry::Include(include) => {
197                    let path = if include.is_relative() {
198                        let Some(parent) = path.parent() else {
199                            unreachable!("there must be a parent if the file was valid")
200                        };
201                        parent.join(&include)
202                    } else {
203                        include
204                    };
205                    let path = path
206                        .canonicalize()
207                        .map_err(|err| ReadFileErrorV2::from_io(path, err))?;
208                    if !loaded.contains(&path) {
209                        pending.push(path.clone());
210                    }
211                    on_entry(Entry::Include(path));
212                }
213                entry => on_entry(entry),
214            }
215        }
216    }
217    Ok(())
218}
219
220/// Main struct representing a parsed beancount file.
221///
222/// To get an instance of this, use [`parse`].
223///
224/// For an example, look at the root crate documentation.
225#[derive(Debug, Clone)]
226#[non_exhaustive]
227pub struct BeancountFile<D> {
228    /// List of beancount options
229    ///
230    /// See: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
231    pub options: Vec<BeanOption>,
232    /// Paths of include directives
233    ///
234    /// See: <https://beancount.github.io/docs/beancount_language_syntax.html#includes>
235    pub includes: Vec<PathBuf>,
236    /// List of [`Directive`] found in the file
237    pub directives: Vec<Directive<D>>,
238}
239
240impl<D> Default for BeancountFile<D> {
241    fn default() -> Self {
242        Self {
243            options: Vec::new(),
244            includes: Vec::new(),
245            directives: Vec::new(),
246        }
247    }
248}
249
250impl<D> BeancountFile<D> {
251    /// Returns the first value found for the option
252    ///
253    /// If the option is declared multiple times, this function returns the first one found.
254    ///
255    /// See [`Self::options`] to get all declared options.
256    ///
257    /// Syntax: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// use beancount_parser::BeancountFile;
263    /// let input = r#"
264    /// option "favorite_color" "blue"
265    /// option "operating_currency" "CHF"
266    /// option "operating_currency" "PLN"
267    /// "#;
268    /// let beancount: BeancountFile<f64> = input.parse().unwrap();
269    /// assert_eq!(beancount.option("favorite_color"), Some("blue"));
270    /// assert_eq!(beancount.option("operating_currency"), Some("CHF"));
271    /// assert_eq!(beancount.option("foo"), None);
272    /// ```
273    #[must_use]
274    pub fn option(&self, key: &str) -> Option<&str> {
275        self.options
276            .iter()
277            .find(|opt| opt.name == key)
278            .map(|opt| &opt.value[..])
279    }
280}
281
282impl<D: Decimal> BeancountFile<D> {
283    /// Read from files
284    ///
285    /// # Errors
286    ///
287    /// Return an error if any of the files cannot be read or contains a syntax error
288    pub fn read_files(files: impl IntoIterator<Item = PathBuf>) -> Result<Self, ReadFileErrorV2> {
289        let mut file = BeancountFile::default();
290        read_files_v2(files, |entry| {
291            file.extend(std::iter::once(entry));
292        })?;
293        Ok(file)
294    }
295}
296
297impl<D> Extend<Entry<D>> for BeancountFile<D> {
298    fn extend<T: IntoIterator<Item = Entry<D>>>(&mut self, iter: T) {
299        for entry in iter {
300            match entry {
301                Entry::Directive(d) => self.directives.push(d),
302                Entry::Option(o) => self.options.push(o),
303                Entry::Include(p) => self.includes.push(p),
304            }
305        }
306    }
307}
308
309impl<D> FromIterator<Entry<D>> for BeancountFile<D> {
310    fn from_iter<T: IntoIterator<Item = Entry<D>>>(iter: T) -> Self {
311        let mut file = BeancountFile::default();
312        file.extend(iter);
313        file
314    }
315}
316
317/// A beancount "directive"
318///
319/// It has fields common to all directives, and a [`Directive::content`] field with
320/// a different content for each directive type.
321///
322/// ```
323/// # use beancount_parser::{BeancountFile, DirectiveContent};
324/// let input = r#"
325/// 2022-01-01 open Assets:Cash
326/// 2022-01-01 * "Grocery shopping"
327///   Expenses:Groceries  10 CHF
328///   Assets:Cash
329/// "#;
330/// let beancount: BeancountFile<f64> = input.parse().unwrap();
331/// assert_eq!(beancount.directives.len(), 2);
332/// for directive in beancount.directives {
333///    println!("line: {}", directive.line_number);
334///    println!("metadata: {:#?}", directive.metadata);
335///    match directive.content {
336///       DirectiveContent::Open(open) => println!("open account directive: {open:?}"),
337///       DirectiveContent::Transaction(trx) => println!("transaction: {trx:?}"),
338///       other => println!("unknown directive: {other:?}"),
339///    }
340/// }
341/// ```
342#[derive(Debug, Clone, PartialEq)]
343#[non_exhaustive]
344pub struct Directive<D> {
345    /// Date of the directive
346    pub date: Date,
347    /// Content of the directive that is specific to each directive type
348    pub content: DirectiveContent<D>,
349    /// Metadata associated to the directive
350    ///
351    /// See the [`metadata`] module for more
352    pub metadata: metadata::Map<D>,
353    /// Line number where the directive was found in the input file
354    pub line_number: u32,
355}
356
357impl<D: Decimal> FromStr for Directive<D> {
358    type Err = Error;
359    fn from_str(s: &str) -> Result<Self, Self::Err> {
360        match all_consuming(directive).parse(Span::new(s)).finish() {
361            Ok((_, d)) => Ok(d),
362            Err(err) => Err(Error::new(s, err.input)),
363        }
364    }
365}
366
367/// Directive specific content
368#[allow(missing_docs)]
369#[derive(Debug, Clone, PartialEq)]
370#[non_exhaustive]
371pub enum DirectiveContent<D> {
372    Transaction(Transaction<D>),
373    Price(Price<D>),
374    Balance(Balance<D>),
375    Open(Open),
376    Close(Close),
377    Pad(Pad),
378    Commodity(Currency),
379    Event(Event),
380}
381
382impl<D> DirectiveContent<D> {
383    /// Returns `Some` if the directive content is a transaction
384    pub fn as_transaction(&self) -> Option<&Transaction<D>> {
385        match self {
386            DirectiveContent::Transaction(transaction) => Some(transaction),
387            _ => None,
388        }
389    }
390
391    /// Returns `Some` if the directive content is a price
392    pub fn as_price(&self) -> Option<&Price<D>> {
393        match self {
394            DirectiveContent::Price(price) => Some(price),
395            _ => None,
396        }
397    }
398
399    /// Returns `Some` if the directive content is a balance
400    pub fn as_balance(&self) -> Option<&Balance<D>> {
401        match self {
402            DirectiveContent::Balance(balance) => Some(balance),
403            _ => None,
404        }
405    }
406
407    /// Returns `Some` if the directive content is an open
408    pub fn as_open(&self) -> Option<&Open> {
409        match self {
410            DirectiveContent::Open(open) => Some(open),
411            _ => None,
412        }
413    }
414
415    /// Returns `Some` if the directive content is a close
416    pub fn as_close(&self) -> Option<&Close> {
417        match self {
418            DirectiveContent::Close(close) => Some(close),
419            _ => None,
420        }
421    }
422
423    /// Returns `Some` if the directive content is a pad
424    pub fn as_pad(&self) -> Option<&Pad> {
425        match self {
426            DirectiveContent::Pad(pad) => Some(pad),
427            _ => None,
428        }
429    }
430
431    /// Returns `Some` if the directive content is a commodity
432    pub fn as_commodity(&self) -> Option<&Currency> {
433        match self {
434            DirectiveContent::Commodity(currency) => Some(currency),
435            _ => None,
436        }
437    }
438
439    /// Returns `Some` if the directive content is an event
440    pub fn as_event(&self) -> Option<&Event> {
441        match self {
442            DirectiveContent::Event(event) => Some(event),
443            _ => None,
444        }
445    }
446}
447
448type Span<'a> = nom_locate::LocatedSpan<&'a str>;
449type IResult<'a, O> = nom::IResult<Span<'a>, O>;
450
451/// Entry in the beancount syntax
452///
453/// It is more general than `Directive` as an entry can also be option or an include.
454#[allow(missing_docs)]
455#[non_exhaustive]
456#[derive(Debug, Clone)]
457pub enum Entry<D> {
458    Directive(Directive<D>),
459    Option(BeanOption),
460    Include(PathBuf),
461}
462impl<D> Entry<D> {
463    /// Returns `Some` if the entry is a directive
464    pub fn as_directive(&self) -> Option<&Directive<D>> {
465        match self {
466            Entry::Directive(directive) => Some(directive),
467            _ => None,
468        }
469    }
470
471    /// Returns `Some` if the entry is an option
472    pub fn as_option(&self) -> Option<&BeanOption> {
473        match self {
474            Entry::Option(option) => Some(option),
475            _ => None,
476        }
477    }
478
479    /// Returns `Some` if the entry is an include
480    pub fn as_include(&self) -> Option<&Path> {
481        match self {
482            Entry::Include(include) => Some(include),
483            _ => None,
484        }
485    }
486}
487
488enum RawEntry<D> {
489    Directive(Directive<D>),
490    Option(BeanOption),
491    Include(PathBuf),
492    PushTag(Tag),
493    PopTag(Tag),
494    Comment,
495}
496
497/// An beancount option
498///
499/// See: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
500#[derive(Debug, Clone)]
501#[non_exhaustive]
502pub struct BeanOption {
503    /// Name of the option
504    pub name: String,
505    /// Value of the option
506    pub value: String,
507}
508
509fn entry<D: Decimal>(input: Span<'_>) -> IResult<'_, RawEntry<D>> {
510    alt((
511        directive.map(RawEntry::Directive),
512        option.map(|(name, value)| RawEntry::Option(BeanOption { name, value })),
513        include.map(|p| RawEntry::Include(p)),
514        tag_stack_operation,
515        line.map(|()| RawEntry::Comment),
516    ))
517    .parse(input)
518}
519
520fn directive<D: Decimal>(input: Span<'_>) -> IResult<'_, Directive<D>> {
521    let (input, position) = position(input)?;
522    let (input, date) = date::parse(input)?;
523    let (input, _) = cut(space1).parse(input)?;
524    let (input, (content, metadata)) = alt((
525        map(transaction::parse, |(t, m)| {
526            (DirectiveContent::Transaction(t), m)
527        }),
528        (
529            terminated(
530                alt((
531                    map(
532                        preceded(tag("price"), cut(preceded(space1, amount::price))),
533                        DirectiveContent::Price,
534                    ),
535                    map(
536                        preceded(tag("balance"), cut(preceded(space1, account::balance))),
537                        DirectiveContent::Balance,
538                    ),
539                    map(
540                        preceded(tag("open"), cut(preceded(space1, account::open))),
541                        DirectiveContent::Open,
542                    ),
543                    map(
544                        preceded(tag("close"), cut(preceded(space1, account::close))),
545                        DirectiveContent::Close,
546                    ),
547                    map(
548                        preceded(tag("pad"), cut(preceded(space1, account::pad))),
549                        DirectiveContent::Pad,
550                    ),
551                    map(
552                        preceded(tag("commodity"), cut(preceded(space1, amount::currency))),
553                        DirectiveContent::Commodity,
554                    ),
555                    map(
556                        preceded(tag("event"), cut(preceded(space1, event::parse))),
557                        DirectiveContent::Event,
558                    ),
559                )),
560                end_of_line,
561            ),
562            metadata::parse,
563        ),
564    ))
565    .parse(input)?;
566    Ok((
567        input,
568        Directive {
569            date,
570            content,
571            metadata,
572            line_number: position.location_line(),
573        },
574    ))
575}
576
577fn option(input: Span<'_>) -> IResult<'_, (String, String)> {
578    let (input, _) = tag("option")(input)?;
579    let (input, key) = preceded(space1, string).parse(input)?;
580    let (input, value) = preceded(space1, string).parse(input)?;
581    let (input, ()) = end_of_line(input)?;
582    Ok((input, (key, value)))
583}
584
585fn include(input: Span<'_>) -> IResult<'_, PathBuf> {
586    let (input, _) = tag("include")(input)?;
587    let (input, path) = cut(delimited(space1, string, end_of_line)).parse(input)?;
588    Ok((input, path.into()))
589}
590
591fn tag_stack_operation<D>(input: Span<'_>) -> IResult<'_, RawEntry<D>> {
592    alt((
593        preceded((tag("pushtag"), space1), transaction::parse_tag).map(RawEntry::PushTag),
594        preceded((tag("poptag"), space1), transaction::parse_tag).map(RawEntry::PopTag),
595    ))
596    .parse(input)
597}
598
599fn end_of_line(input: Span<'_>) -> IResult<'_, ()> {
600    let (input, _) = space0(input)?;
601    let (input, _) = opt(comment).parse(input)?;
602    let (input, _) = alt((line_ending, eof)).parse(input)?;
603    Ok((input, ()))
604}
605
606fn comment(input: Span<'_>) -> IResult<'_, ()> {
607    let (input, _) = char(';')(input)?;
608    let (input, _) = not_line_ending(input)?;
609    Ok((input, ()))
610}
611
612fn line(input: Span<'_>) -> IResult<'_, ()> {
613    let (input, _) = not_line_ending(input)?;
614    let (input, _) = line_ending(input)?;
615    Ok((input, ()))
616}
617
618fn empty_line(input: Span<'_>) -> IResult<'_, ()> {
619    let (input, ()) = not(eof).parse(input)?;
620    end_of_line(input)
621}
622
623fn string(input: Span<'_>) -> IResult<'_, String> {
624    let (input, _) = char('"')(input)?;
625    let mut string = String::new();
626    let mut take_data = take_while(|c: char| c != '"' && c != '\\');
627    let (mut input, mut part) = take_data.parse(input)?;
628    while !part.fragment().is_empty() {
629        string.push_str(part.fragment());
630        let (new_input, escaped) =
631            opt(alt((value('"', tag("\\\"")), value('\\', tag("\\\\"))))).parse_complete(input)?;
632        let Some(escaped) = escaped else { break };
633        string.push(escaped);
634        let (new_input, new_part) = take_data.parse(new_input)?;
635        input = new_input;
636        part = new_part;
637    }
638    let (input, _) = char('"')(input)?;
639    Ok((input, string))
640}