Skip to main content

beancount_parser_lima/
sources.rs

1use ariadne::{Color, Label};
2use glob::{self, glob_with};
3use lazy_format::lazy_format;
4use std::{
5    collections::{HashMap, HashSet, VecDeque},
6    ffi::OsStr,
7    fmt::{self, Formatter},
8    fs::File,
9    io::{self, Read, Write},
10    iter::once,
11    path::{Path, PathBuf},
12};
13
14pub use crate::{trim::trim_trailing_whitespace, types::*};
15
16use crate::{IncludedGlob, SourceId, get_includes};
17
18/// Contains the content of the Beancount source file, and the content of
19/// the transitive closure of all the include'd source files.
20///
21/// Zero-copy parsing means that all string values are returned as references into these strings.
22///
23/// # Examples
24/// ```
25/// # use std::path::PathBuf;
26/// use beancount_parser_lima::{BeancountParser, BeancountSources};
27///
28/// let sources = BeancountSources::try_from(PathBuf::from("examples/data/full.beancount")).unwrap();
29/// let beancount_parser = BeancountParser::new(&sources);
30///
31/// let result = beancount_parser.parse();
32/// ```
33#[derive(Clone)]
34pub struct BeancountSources {
35    root_path: Option<PathBuf>,
36    root_source_id: SourceId,
37    root_content: String,
38    root_content_char_indices: Vec<usize>,
39    included_globs: HashMap<PathBuf, IncludedGlob>,
40    included_content: HashMap<PathBuf, IncludedSource>,
41    source_id_strings: Vec<String>, // indexed by SourceId
42}
43
44#[derive(Clone, Debug)]
45enum IncludedSource {
46    Content(SourceId, String, Vec<usize>), // the content and its char indices
47    Error(String),
48    Duplicate,
49}
50
51impl BeancountSources {
52    fn try_read_with_includes(root_path: PathBuf) -> io::Result<Self> {
53        let root_content = read(&root_path)?;
54        Ok(Self::read_with_includes(Some(root_path), root_content))
55    }
56
57    fn read_with_includes(root_path: Option<PathBuf>, root_content: String) -> Self {
58        let root_source_id = SourceId::default();
59        let root_source_id_string = root_path
60            .as_ref()
61            .map(|p| p.to_string_lossy().into())
62            .unwrap_or("inline".to_string());
63        let mut source_id_strings = Vec::from([root_source_id_string]);
64
65        let mut pending_includes = get_includes(&root_content, root_source_id)
66            .into_iter()
67            .map(|included| resolve_included_path(root_path.as_ref(), included.as_ref()))
68            .collect::<VecDeque<_>>();
69
70        let mut included_globs = HashMap::new();
71        let mut included_content: HashMap<PathBuf, IncludedSource> = HashMap::new();
72
73        // for duplicate detection
74        let mut canonical_paths =
75            if let Some(canonical_root) = root_path.as_ref().and_then(|p| p.canonicalize().ok()) {
76                HashSet::from([canonical_root])
77            } else {
78                HashSet::default()
79            };
80
81        while !pending_includes.is_empty() {
82            let included = pending_includes.pop_front().unwrap();
83            let included_str = included.to_string_lossy();
84            let included_str = included_str.as_ref();
85
86            match glob_with(
87                included_str,
88                glob::MatchOptions {
89                    case_sensitive: true,
90                    require_literal_separator: true,
91                    require_literal_leading_dot: true,
92                },
93            ) {
94                Err(e) => {
95                    included_globs.insert(included, IncludedGlob::Error(e.to_string()));
96                }
97                Ok(globbed_includes) => {
98                    let mut glob_expansions = Vec::default();
99
100                    for globbed_include in globbed_includes {
101                        match globbed_include {
102                            Err(e) => {
103                                let path = e.path().to_path_buf();
104                                glob_expansions.push(path.clone());
105                                included_content.insert(path, IncludedSource::Error(e.to_string()));
106                            }
107                            Ok(globbed_include) => {
108                                glob_expansions.push(globbed_include.clone());
109
110                                if let Ok(canonical_path) = globbed_include.canonicalize() {
111                                    if canonical_paths.contains(&canonical_path) {
112                                        // don't overwrite existing content
113                                        included_content
114                                            .entry(globbed_include)
115                                            .or_insert(IncludedSource::Duplicate);
116                                    } else {
117                                        canonical_paths.insert(canonical_path);
118
119                                        let source_id = SourceId::from(source_id_strings.len());
120                                        source_id_strings
121                                            .push(globbed_include.to_string_lossy().into());
122
123                                        let included_source = read(&globbed_include).map_or_else(
124                                            |e| {
125                                                IncludedSource::Error(format!(
126                                                    "{}: {}",
127                                                    globbed_include.to_string_lossy(),
128                                                    e
129                                                ))
130                                            },
131                                            |c| {
132                                                // find the char indices for the content
133                                                // needed for mapping byte indices to char indices, to convert Chumsky spans to Ariadne spans
134                                                // see https://github.com/zesterer/chumsky/issues/65#issuecomment-1689216633
135                                                let char_indices = c
136                                                    .char_indices()
137                                                    .map(|(i, _)| i)
138                                                    .collect::<Vec<_>>();
139                                                IncludedSource::Content(source_id, c, char_indices)
140                                            },
141                                        );
142
143                                        // stabilisation of VacantEntry::insert_entry() would enable us to avoid cloning the path here
144                                        // and doing an immediate lookup
145                                        included_content
146                                            .insert(globbed_include.clone(), included_source);
147                                        let included_source =
148                                            included_content.get(&globbed_include).unwrap();
149
150                                        if let IncludedSource::Content(_, content, _) =
151                                            included_source
152                                        {
153                                            let mut includes = get_includes(content, source_id)
154                                                .into_iter()
155                                                .map(|included_path| {
156                                                    resolve_included_path(
157                                                        Some(&globbed_include),
158                                                        included_path.as_ref(),
159                                                    )
160                                                })
161                                                .collect::<VecDeque<_>>();
162                                            pending_includes.append(&mut includes);
163                                        }
164                                    }
165                                }
166                            }
167                        }
168                    }
169
170                    included_globs.insert(included, IncludedGlob::Expanded(glob_expansions));
171                }
172            }
173        }
174
175        let root_content_char_indices = root_content
176            .char_indices()
177            .map(|(i, _)| i)
178            .collect::<Vec<_>>();
179
180        Self {
181            root_path,
182            root_source_id,
183            root_content,
184            root_content_char_indices,
185            included_globs,
186            included_content,
187            source_id_strings,
188        }
189    }
190
191    #[deprecated(since = "0.12.0", note = "Use `write_errors_or_warnings` instead")]
192    pub fn write<W, E, K>(&self, w: &mut W, errors_or_warnings: Vec<E>) -> io::Result<()>
193    where
194        W: Write,
195        E: Into<AnnotatedErrorOrWarning<K>>,
196        K: ErrorOrWarningKind,
197    {
198        self.write_errors_or_warnings(w, errors_or_warnings)
199    }
200
201    /// Write human-readable error reports.
202    pub fn write_errors_or_warnings<W, E, K>(
203        &self,
204        w: &mut W,
205        errors_or_warnings: Vec<E>,
206    ) -> io::Result<()>
207    where
208        W: Write,
209        E: Into<AnnotatedErrorOrWarning<K>>,
210        K: ErrorOrWarningKind,
211    {
212        for error_or_warning in errors_or_warnings.into_iter() {
213            let AnnotatedErrorOrWarning {
214                error_or_warning,
215                annotation,
216            } = error_or_warning.into();
217
218            self.write_report::<W, K, ErrorOrWarning<K>>(w, &error_or_warning)?;
219
220            if let Some(annotation) = annotation {
221                // clippy thinks this is better than write! 🤷
222                w.write_fmt(core::format_args!("{}\n", &annotation))?;
223            }
224        }
225        Ok(())
226    }
227
228    /// Write human-readable error or warning report.
229    pub fn write_report<W, K, R>(&self, w: &mut W, report: &R) -> io::Result<()>
230    where
231        W: Write,
232        K: ErrorOrWarningKind,
233        R: Report,
234    {
235        write_report::<W, K, R, _>(
236            w,
237            report,
238            &|span| self.get_adjusted_source(span),
239            self.sources(),
240        )
241    }
242
243    /// Resolve the span into filename, line number range, and spanned content.
244    /// Filename will be present unless the sources were created from an inline string.
245    pub fn resolve_span<'a>(&'a self, span: &Span) -> SpannedSource<'a> {
246        let (source_content, source_id_str, byte_span, rune_span) = self.get_adjusted_source(*span);
247
248        let file_name = if Into::<SourceId>::into(span.source) == self.root_source_id {
249            self.root_path.as_ref().and(Some(source_id_str))
250        } else {
251            Some(source_id_str)
252        };
253
254        let mut source_chars = source_content.chars();
255        let start_line = source_chars
256            .by_ref()
257            .take(rune_span.start)
258            .filter(|c| *c == '\n')
259            .count()
260            + 1;
261        let lines_spanned = source_chars
262            .by_ref()
263            .take(rune_span.end - rune_span.start)
264            .filter(|c| *c == '\n')
265            .count();
266        let end_line = start_line + lines_spanned;
267
268        SpannedSource {
269            file_name,
270            start_line,
271            end_line,
272            content: source_content
273                .get(byte_span.start..byte_span.end)
274                .unwrap_or(""),
275        }
276    }
277
278    fn byte_to_rune(&self, char_indices: &[usize], byte_span: Span) -> Span {
279        let mut rune_span = byte_span;
280        rune_span.start = char_indices.partition_point(|&i| i < byte_span.start);
281        rune_span.end = char_indices.partition_point(|&i| i < byte_span.end);
282        rune_span
283    }
284
285    pub fn error_source_text<'a, K>(&'a self, error_or_warning: &ErrorOrWarning<K>) -> &'a str
286    where
287        K: ErrorOrWarningKind,
288    {
289        let (source_content, _, byte_span, _rune_span) =
290            self.get_adjusted_source(error_or_warning.0.span);
291        &source_content[byte_span.start..byte_span.end]
292    }
293
294    fn get_adjusted_source(&self, span: Span) -> (&str, &str, Span, Span) {
295        let safe_span = if span.source >= self.source_id_strings.len() {
296            // bad source collapses down to empty span,
297            // because we don't really have a good way to reject that
298            // and at least we mustn't panic
299            Span {
300                source: self.root_source_id.into(),
301                start: 0,
302                end: 0,
303            }
304        } else {
305            span
306        };
307        let source_id = safe_span.source.into();
308        let source_id_str = self.source_id_string(source_id);
309        let empty_char_indices = Vec::default();
310        let (source_content, source_content_char_indices) = if source_id == self.root_source_id {
311            (self.root_content.as_str(), &self.root_content_char_indices)
312        } else if let IncludedSource::Content(_, content, content_char_indices) =
313            self.included_content.get(Path::new(source_id_str)).unwrap()
314        {
315            (content.as_str(), content_char_indices)
316        } else {
317            ("", &empty_char_indices)
318        };
319
320        let byte_span = trimmed_span(source_content, safe_span);
321        let rune_span = byte_to_rune(source_content_char_indices, byte_span);
322
323        (source_content, source_id_str, byte_span, rune_span)
324    }
325
326    fn source_id_string(&self, source_id: SourceId) -> &str {
327        self.source_id_strings[Into::<usize>::into(source_id)].as_str()
328    }
329
330    fn sources(&self) -> Vec<(String, &str)> {
331        once((
332            self.source_id_string(self.root_source_id).to_string(),
333            self.root_content.as_str(),
334        ))
335        .chain(
336            self.included_content
337                .iter()
338                .filter_map(|(_, included_source)| {
339                    if let IncludedSource::Content(source_id, content, _) = included_source {
340                        Some((
341                            self.source_id_string(*source_id).to_string(),
342                            content.as_str(),
343                        ))
344                    } else {
345                        None
346                    }
347                }),
348        )
349        .collect()
350    }
351
352    pub(crate) fn content_iter(&self) -> impl Iterator<Item = (SourceId, Option<&Path>, &str)> {
353        once((
354            self.root_source_id,
355            self.root_path.as_deref(),
356            self.root_content.as_str(),
357        ))
358        .chain(
359            self.included_content
360                .iter()
361                .filter_map(|(pathbuf, included_source)| {
362                    if let IncludedSource::Content(source_id, content, _) = included_source {
363                        Some((*source_id, Some(pathbuf.as_path()), content.as_str()))
364                    } else {
365                        None
366                    }
367                }),
368        )
369    }
370
371    /// Number of real sources, excluding synthetic ones
372    pub(crate) fn num_sources(&self) -> usize {
373        self.source_id_strings.len()
374    }
375
376    pub(crate) fn root_path(&self) -> Option<&Path> {
377        self.root_path.as_deref()
378    }
379
380    pub(crate) fn included_globs(&self) -> &HashMap<PathBuf, IncludedGlob> {
381        &self.included_globs
382    }
383
384    pub(crate) fn error_paths(&self) -> HashMap<Option<&Path>, String> {
385        self.included_content
386            .iter()
387            .filter_map(|(pathbuf, included_source)| {
388                if let IncludedSource::Error(e) = included_source {
389                    Some((Some(pathbuf.as_path()), e.clone()))
390                } else {
391                    None
392                }
393            })
394            .collect::<HashMap<_, _>>()
395    }
396}
397
398impl TryFrom<PathBuf> for BeancountSources {
399    type Error = io::Error;
400
401    fn try_from(source_path: PathBuf) -> io::Result<Self> {
402        Self::try_read_with_includes(source_path)
403    }
404}
405
406impl TryFrom<&Path> for BeancountSources {
407    type Error = io::Error;
408
409    fn try_from(source_path: &Path) -> io::Result<Self> {
410        Self::try_read_with_includes(source_path.to_owned())
411    }
412}
413
414impl From<String> for BeancountSources {
415    fn from(source_string: String) -> Self {
416        Self::read_with_includes(None, source_string)
417    }
418}
419
420impl From<&str> for BeancountSources {
421    fn from(source_string: &str) -> Self {
422        Self::read_with_includes(None, source_string.to_owned())
423    }
424}
425
426impl std::fmt::Debug for BeancountSources {
427    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
428        writeln!(f, "BeancountSources(",)?;
429
430        for (path, included_source) in &self.included_content {
431            match included_source {
432                IncludedSource::Content(source_id, content, _) => writeln!(
433                    f,
434                    "    {} ok len {},",
435                    self.source_id_string(*source_id),
436                    content.len()
437                )?,
438                IncludedSource::Error(e) => writeln!(f, "    {:?} err {},", path, e)?,
439                IncludedSource::Duplicate => writeln!(f, "    {:?} duplicate include", path)?,
440            }
441        }
442
443        writeln!(f, ")",)
444    }
445}
446
447#[derive(Clone)]
448pub struct SyntheticSources<'a> {
449    sources: &'a BeancountSources,
450    base_id: usize,
451    content: HashMap<String, (SourceId, String, Vec<usize>)>, // content and char indices, indexed by source name
452    source_id_strings: Vec<String>,                           // indexed by SourceId - base_id
453}
454
455impl<'a> SyntheticSources<'a> {
456    pub fn new(sources: &'a BeancountSources) -> Self {
457        SyntheticSources {
458            sources,
459            base_id: sources.num_sources(),
460            content: HashMap::default(),
461            source_id_strings: Vec::default(),
462        }
463    }
464}
465
466impl<'a> SyntheticSources<'a> {
467    fn sources(&self) -> Vec<(String, &str)> {
468        let mut sources = self.sources.sources();
469        sources.extend(
470            self.content.iter().map(|(source_id_str, (_, content, _))| {
471                (source_id_str.to_string(), content.as_str())
472            }),
473        );
474        sources
475    }
476
477    /// A synthetic span is a content fragment which doesn't occur in the original sources, but
478    /// may be referred to in error reports.  Multiple fragments may share the same source name.
479    pub fn create_synthetic_span(&mut self, source_name: &str, content_fragment: &str) -> Span {
480        if let Some((source_id, content, char_indices)) = self.content.get_mut(source_name) {
481            let start = content.len();
482            let end = start + content_fragment.len();
483            let span = Span {
484                source: (*source_id).into(),
485                start,
486                end,
487            };
488
489            let original_len = char_indices.len();
490            char_indices.extend(
491                content_fragment
492                    .char_indices()
493                    .map(|(i, _)| i + original_len),
494            );
495            content.push_str(content_fragment);
496
497            span
498        } else {
499            let source = self.source_id_strings.len() + self.base_id;
500            self.source_id_strings.push(source_name.to_string());
501
502            let span = Span {
503                source,
504                start: 0,
505                end: content_fragment.len(),
506            };
507
508            let content = content_fragment.to_string();
509            let char_indices = content.char_indices().map(|(i, _)| i).collect::<Vec<_>>();
510            self.content.insert(
511                source_name.to_string(),
512                (source.into(), content, char_indices),
513            );
514
515            span
516        }
517    }
518
519    /// Write human-readable error reports.
520    pub fn write_errors_or_warnings<W, E, K>(
521        &self,
522        w: &mut W,
523        errors_or_warnings: Vec<E>,
524    ) -> io::Result<()>
525    where
526        W: Write,
527        E: Into<AnnotatedErrorOrWarning<K>>,
528        K: ErrorOrWarningKind,
529    {
530        for error_or_warning in errors_or_warnings.into_iter() {
531            let AnnotatedErrorOrWarning {
532                error_or_warning,
533                annotation,
534            } = error_or_warning.into();
535
536            self.write_report::<W, K, ErrorOrWarning<K>>(w, &error_or_warning)?;
537
538            if let Some(annotation) = annotation {
539                // clippy thinks this is better than write! 🤷
540                w.write_fmt(core::format_args!("{}\n", &annotation))?;
541            }
542        }
543        Ok(())
544    }
545
546    /// Write human-readable error or warning report.
547    pub fn write_report<W, K, R>(&self, w: &mut W, report: &R) -> io::Result<()>
548    where
549        W: Write,
550        K: ErrorOrWarningKind,
551        R: Report,
552    {
553        write_report::<W, K, R, _>(
554            w,
555            report,
556            &|span| self.get_adjusted_source(span),
557            self.sources(),
558        )
559    }
560
561    /// Resolve the span into filename, line number range, and spanned content.
562    /// Filename will be present unless the sources were created from an inline string.
563    pub fn resolve_span<'s>(&'s self, span: &Span) -> SpannedSource<'s> {
564        resolve_span(*span, &|span| self.get_adjusted_source(span), true)
565    }
566
567    pub fn error_source_text<'s, K>(&'s self, error_or_warning: &ErrorOrWarning<K>) -> &'s str
568    where
569        K: ErrorOrWarningKind,
570    {
571        let (source_content, _, byte_span, _rune_span) =
572            self.get_adjusted_source(error_or_warning.0.span);
573        &source_content[byte_span.start..byte_span.end]
574    }
575
576    fn get_adjusted_source(&self, span: Span) -> (&str, &str, Span, Span) {
577        if span.source >= self.base_id && span.source < self.base_id + self.source_id_strings.len()
578        {
579            let source_id_str = self.source_id_strings[span.source - self.base_id].as_str();
580
581            let (_, content, content_char_indices) = self.content.get(source_id_str).unwrap();
582            let content = content.as_str();
583
584            let byte_span = trimmed_span(content, span);
585            let rune_span = byte_to_rune(content_char_indices, byte_span);
586
587            (content, source_id_str, byte_span, rune_span)
588        } else {
589            self.sources.get_adjusted_source(span)
590        }
591    }
592}
593
594// get included path relative to including path
595pub(crate) fn resolve_included_path(
596    including_path: Option<&PathBuf>,
597    included_path: &Path,
598) -> PathBuf {
599    match including_path.and_then(|p| path_dir(p.as_ref())) {
600        Some(p) => p.join(included_path),
601        None => included_path.to_path_buf(),
602    }
603}
604
605// get directory for a path if any
606fn path_dir(p: &Path) -> Option<&Path> {
607    p.parent().and_then(|p| {
608        if !AsRef::<OsStr>::as_ref(&p).is_empty() {
609            Some(p)
610        } else {
611            None
612        }
613    })
614}
615
616fn read<P>(file_path: P) -> io::Result<String>
617where
618    P: AsRef<Path>,
619{
620    let mut f = File::open(&file_path)?;
621    let mut file_content = String::new();
622
623    // read the whole file
624    f.read_to_string(&mut file_content)?;
625    Ok(file_content)
626}
627
628fn write_report<'a, W, K, R, F>(
629    w: &mut W,
630    report: &R,
631    get_adjusted_source: &F,
632    sources: Vec<(String, &str)>,
633) -> io::Result<()>
634where
635    W: Write,
636    K: ErrorOrWarningKind,
637    R: Report,
638    F: Fn(Span) -> (&'a str, &'a str, Span, Span),
639{
640    let (src_id, span) =
641        source_id_string_and_adjusted_rune_span(report.span(), get_adjusted_source);
642    let color = K::color();
643    let report_kind = K::report_kind();
644
645    ariadne::Report::build(report_kind, (src_id.clone(), (span.start..span.end)))
646        .with_message(report.message())
647        .with_labels(Some(
648            Label::new((src_id, (span.start..span.end)))
649                .with_message(report.reason())
650                .with_color(color),
651        ))
652        .with_labels(report.contexts().map(|(label, span)| {
653            let (src_id, span) = source_id_string_and_adjusted_rune_span(span, get_adjusted_source);
654            Label::new((src_id, (span.start..span.end)))
655                .with_message(lazy_format!("in this {}", label))
656                .with_color(Color::Yellow)
657        }))
658        .with_labels(report.related().map(|(label, span)| {
659            let (src_id, span) = source_id_string_and_adjusted_rune_span(span, get_adjusted_source);
660            Label::new((src_id, (span.start..span.end)))
661                .with_message(lazy_format!("{}", label))
662                .with_color(Color::Yellow)
663        }))
664        .finish()
665        .write(ariadne::sources(sources), w)
666}
667
668/// Resolve the span into filename, line number range, and spanned content.
669/// Filename will be present unless the sources were created from an inline string.
670fn resolve_span<'a, F>(
671    span: Span,
672    get_adjusted_source: &F,
673    source_id_is_file_name: bool,
674) -> SpannedSource<'a>
675where
676    F: Fn(Span) -> (&'a str, &'a str, Span, Span),
677{
678    let (source_content, source_id_str, byte_span, rune_span) = get_adjusted_source(span);
679
680    let mut source_chars = source_content.chars();
681    let start_line = source_chars
682        .by_ref()
683        .take(rune_span.start)
684        .filter(|c| *c == '\n')
685        .count()
686        + 1;
687    let lines_spanned = source_chars
688        .by_ref()
689        .take(rune_span.end - rune_span.start)
690        .filter(|c| *c == '\n')
691        .count();
692    let end_line = start_line + lines_spanned;
693
694    SpannedSource {
695        file_name: source_id_is_file_name.then_some(source_id_str),
696        start_line,
697        end_line,
698        content: source_content
699            .get(byte_span.start..byte_span.end)
700            .unwrap_or(""),
701    }
702}
703
704fn source_id_string_and_adjusted_rune_span<'a, F>(
705    span: Span,
706    get_adjusted_source: &F,
707) -> (String, Span)
708where
709    F: Fn(Span) -> (&'a str, &'a str, Span, Span),
710{
711    let (_, source_id, _byte_span, rune_span) = get_adjusted_source(span);
712    (source_id.to_string(), rune_span)
713}
714
715fn trimmed_span(source: &str, span: Span) -> Span {
716    let mut trimmed = span;
717
718    // invalid spans fall back to nothing
719    if source.get(span.start..span.end).is_none() {
720        trimmed.start = 0;
721        trimmed.end = 0;
722    } else {
723        trimmed.end = trim_trailing_whitespace(source, span.start, span.end);
724    }
725    trimmed
726}
727
728fn byte_to_rune(char_indices: &[usize], byte_span: Span) -> Span {
729    let mut rune_span = byte_span;
730    rune_span.start = char_indices.partition_point(|&i| i < byte_span.start);
731    rune_span.end = char_indices.partition_point(|&i| i < byte_span.end);
732    rune_span
733}