1#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6use ariadne::{Color, Label, Report};
86use chumsky::prelude::{Input, Parser};
87use lazy_format::lazy_format;
88use lexer::{lex, Token};
89use parsers::{file, includes, ParserState};
90use sort::SortIteratorAdaptor;
91use std::{
92 collections::{HashMap, HashSet, VecDeque},
93 ffi::OsStr,
94 fmt::{self, Formatter},
95 fs::File,
96 io::{self, Read, Write},
97 iter::once,
98 path::{Path, PathBuf},
99};
100
101pub use crate::{trim::trim_trailing_whitespace, types::*};
102
103#[derive(Clone)]
119pub struct BeancountSources {
120 root_path: Option<PathBuf>,
121 root_source_id: SourceId,
122 root_content: String,
123 root_content_char_indices: Vec<usize>,
124 included_content: HashMap<PathBuf, IncludedSource>,
125 source_id_strings: Vec<String>, }
127
128#[derive(Clone, Debug)]
129enum IncludedSource {
130 Content(SourceId, String, Vec<usize>), IoError(String),
132 Duplicate,
133}
134
135fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
137 fn get_includes_for_tokens(
138 tokens: Vec<(Token, Span)>,
139 source_id: SourceId,
140 end_of_input: Span,
141 ) -> Vec<String> {
142 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
143
144 let spanned_tokens = tokens
145 .map(end_of_input, |(t, s)| (t, s))
146 .with_context(source_id);
147
148 includes()
150 .parse_with_state(spanned_tokens, &mut parser_state)
151 .into_output()
152 .unwrap_or_default()
153 }
154
155 let tokens = lex_with_source(source_id, content);
156 get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
157}
158
159fn path_dir(p: &Path) -> Option<&Path> {
161 p.parent().and_then(|p| {
162 if !AsRef::<OsStr>::as_ref(&p).is_empty() {
163 Some(p)
164 } else {
165 None
166 }
167 })
168}
169
170fn resolve_included_path(including_path: Option<&PathBuf>, included_path: &Path) -> PathBuf {
172 match including_path.and_then(|p| path_dir(p.as_ref())) {
173 Some(p) => p.join(included_path),
174 None => included_path.to_path_buf(),
175 }
176}
177
178impl BeancountSources {
179 fn try_read_with_includes(root_path: PathBuf) -> io::Result<Self> {
180 let root_content = read(&root_path)?;
181 Ok(Self::read_with_includes(Some(root_path), root_content))
182 }
183
184 fn read_with_includes(root_path: Option<PathBuf>, root_content: String) -> Self {
185 let root_source_id = SourceId::default();
186 let root_source_id_string = root_path
187 .as_ref()
188 .map(|p| p.to_string_lossy().into())
189 .unwrap_or("inline".to_string());
190 let mut source_id_strings = Vec::from([root_source_id_string]);
191
192 let mut pending_paths = get_includes(&root_content, root_source_id)
193 .into_iter()
194 .map(|included_path| resolve_included_path(root_path.as_ref(), included_path.as_ref()))
195 .collect::<VecDeque<_>>();
196
197 let mut included_content: HashMap<PathBuf, IncludedSource> = HashMap::new();
198
199 let mut canonical_paths =
201 HashSet::from([root_path.as_ref().and_then(|p| p.canonicalize().ok())]);
202
203 while !pending_paths.is_empty() {
204 let path = pending_paths.pop_front().unwrap();
205 let canonical_path = path.canonicalize().ok();
206
207 if canonical_paths.contains(&canonical_path) {
208 included_content
210 .entry(path)
211 .or_insert(IncludedSource::Duplicate);
212 } else {
213 canonical_paths.insert(canonical_path);
214
215 let source_id = SourceId::from(source_id_strings.len());
216 source_id_strings.push(path.to_string_lossy().into());
217
218 let included_source = read(&path).map_or_else(
219 |e| IncludedSource::IoError(e.to_string()),
220 |c| {
221 let char_indices = c.char_indices().map(|(i, _)| i).collect::<Vec<_>>();
225 IncludedSource::Content(source_id, c, char_indices)
226 },
227 );
228
229 included_content.insert(path.clone(), included_source);
232 let included_source = included_content.get(&path).unwrap();
233
234 if let IncludedSource::Content(_, content, _) = included_source {
235 let mut includes = get_includes(content, source_id)
236 .into_iter()
237 .map(|included_path| {
238 resolve_included_path(Some(&path), included_path.as_ref())
239 })
240 .collect::<VecDeque<_>>();
241 pending_paths.append(&mut includes);
242 }
243 }
244 }
245
246 let root_content_char_indices = root_content
247 .char_indices()
248 .map(|(i, _)| i)
249 .collect::<Vec<_>>();
250
251 Self {
252 root_path,
253 root_source_id,
254 root_content,
255 root_content_char_indices,
256 included_content,
257 source_id_strings,
258 }
259 }
260
261 #[deprecated(since = "0.12.0", note = "Use `write_errors_or_warnings` instead")]
262 pub fn write<W, E, K>(&self, w: W, errors_or_warnings: Vec<E>) -> io::Result<()>
263 where
264 W: Write + Copy,
265 E: Into<AnnotatedErrorOrWarning<K>>,
266 K: ErrorOrWarningKind,
267 {
268 self.write_errors_or_warnings(w, errors_or_warnings)
269 }
270
271 pub fn write_errors_or_warnings<W, E, K>(
273 &self,
274 mut w: W,
275 errors_or_warnings: Vec<E>,
276 ) -> io::Result<()>
277 where
278 W: Write + Copy,
279 E: Into<AnnotatedErrorOrWarning<K>>,
280 K: ErrorOrWarningKind,
281 {
282 for error_or_warning in errors_or_warnings.into_iter() {
283 use chumsky::span::Span;
284 let AnnotatedErrorOrWarning {
285 error_or_warning,
286 annotation,
287 } = error_or_warning.into();
288 let error_or_warning = *(error_or_warning.0);
289 let (src_id, span) =
290 self.source_id_string_and_adjusted_rune_span(&error_or_warning.span);
291 let color = error_or_warning.color();
292 let report_kind = error_or_warning.report_kind();
293
294 Report::build(report_kind, (src_id.clone(), (span.start()..span.end())))
295 .with_message(error_or_warning.message)
296 .with_labels(Some(
297 Label::new((src_id, (span.start()..span.end())))
298 .with_message(error_or_warning.reason)
299 .with_color(color),
300 ))
301 .with_labels(error_or_warning.contexts.into_iter().map(|(label, span)| {
302 let (src_id, span) = self.source_id_string_and_adjusted_rune_span(&span);
303 Label::new((src_id, (span.start()..span.end())))
304 .with_message(lazy_format!("in this {}", label))
305 .with_color(Color::Yellow)
306 }))
307 .with_labels(error_or_warning.related.into_iter().map(|(label, span)| {
308 let (src_id, span) = self.source_id_string_and_adjusted_rune_span(&span);
309 Label::new((src_id, (span.start()..span.end())))
310 .with_message(lazy_format!("{}", label))
311 .with_color(Color::Yellow)
312 }))
313 .finish()
314 .write(ariadne::sources(self.sources()), w)?;
315
316 if let Some(annotation) = annotation {
317 w.write_fmt(core::format_args!("{}\n", &annotation))?;
319 }
320 }
321 Ok(())
322 }
323
324 fn byte_to_rune(&self, char_indices: &[usize], byte_span: Span) -> Span {
325 let mut rune_span = byte_span;
326 rune_span.start = char_indices.partition_point(|&i| i < byte_span.start);
327 rune_span.end = char_indices.partition_point(|&i| i < byte_span.end);
328 rune_span
329 }
330
331 pub fn error_source_text<'a, K>(&'a self, error_or_warning: &ErrorOrWarning<K>) -> &'a str
332 where
333 K: ErrorOrWarningKind,
334 {
335 let (source_content, _, byte_span, _rune_span) =
336 self.get_adjusted_source(&error_or_warning.0.span);
337 &source_content[byte_span.start..byte_span.end]
338 }
339
340 fn source_id_string_and_adjusted_rune_span(&self, span: &Span) -> (String, Span) {
341 let (_, source_id, _byte_span, rune_span) = self.get_adjusted_source(span);
342 (source_id.to_string(), rune_span)
343 }
344
345 fn get_adjusted_source(&self, span: &Span) -> (&str, &str, Span, Span) {
346 use chumsky::span::Span;
347 let source_id = span.context();
348 let source_id_str = self.source_id_string(source_id);
349 let empty_char_indices = Vec::default();
350 let (source_content, source_content_char_indices) = if source_id == self.root_source_id {
351 (self.root_content.as_str(), &self.root_content_char_indices)
352 } else if let IncludedSource::Content(_, content, content_char_indices) =
353 self.included_content.get(Path::new(source_id_str)).unwrap()
354 {
355 (content.as_str(), content_char_indices)
356 } else {
357 ("", &empty_char_indices)
358 };
359
360 let byte_span = trimmed_span(source_content, span);
361 let rune_span = self.byte_to_rune(source_content_char_indices, byte_span);
362
363 (source_content, source_id_str, byte_span, rune_span)
364 }
365
366 fn source_id_string(&self, source_id: SourceId) -> &str {
367 self.source_id_strings[Into::<usize>::into(source_id)].as_str()
368 }
369
370 fn sources(&self) -> Vec<(String, &str)> {
371 once((
372 self.source_id_string(self.root_source_id).to_string(),
373 self.root_content.as_str(),
374 ))
375 .chain(
376 self.included_content
377 .iter()
378 .filter_map(|(_, included_source)| {
379 if let IncludedSource::Content(source_id, content, _) = included_source {
380 Some((
381 self.source_id_string(*source_id).to_string(),
382 content.as_str(),
383 ))
384 } else {
385 None
386 }
387 }),
388 )
389 .collect()
390 }
391
392 fn content_iter(&self) -> impl Iterator<Item = (SourceId, Option<&Path>, &str)> {
393 once((
394 self.root_source_id,
395 self.root_path.as_deref(),
396 self.root_content.as_str(),
397 ))
398 .chain(
399 self.included_content
400 .iter()
401 .filter_map(|(pathbuf, included_source)| {
402 if let IncludedSource::Content(source_id, content, _) = included_source {
403 Some((*source_id, Some(pathbuf.as_path()), content.as_str()))
404 } else {
405 None
406 }
407 }),
408 )
409 }
410
411 fn error_path_iter(&self) -> impl Iterator<Item = (Option<&Path>, String)> {
412 self.included_content
413 .iter()
414 .filter_map(|(pathbuf, included_source)| {
415 if let IncludedSource::IoError(e) = included_source {
416 Some((Some(pathbuf.as_path()), e.clone()))
417 } else {
418 None
419 }
420 })
421 }
422}
423
424impl TryFrom<PathBuf> for BeancountSources {
425 type Error = io::Error;
426
427 fn try_from(source_path: PathBuf) -> io::Result<Self> {
428 Self::try_read_with_includes(source_path)
429 }
430}
431
432impl TryFrom<&Path> for BeancountSources {
433 type Error = io::Error;
434
435 fn try_from(source_path: &Path) -> io::Result<Self> {
436 Self::try_read_with_includes(source_path.to_owned())
437 }
438}
439
440impl From<String> for BeancountSources {
441 fn from(source_string: String) -> Self {
442 Self::read_with_includes(None, source_string)
443 }
444}
445
446impl From<&str> for BeancountSources {
447 fn from(source_string: &str) -> Self {
448 Self::read_with_includes(None, source_string.to_owned())
449 }
450}
451
452impl std::fmt::Debug for BeancountSources {
453 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
454 writeln!(f, "BeancountSources(",)?;
455
456 for (path, included_source) in &self.included_content {
457 match included_source {
458 IncludedSource::Content(source_id, content, _) => writeln!(
459 f,
460 " {} ok len {},",
461 self.source_id_string(*source_id),
462 content.len()
463 )?,
464 IncludedSource::IoError(e) => writeln!(f, " {:?} err {},", path, e)?,
465 IncludedSource::Duplicate => writeln!(f, " {:?} duplicate include", path)?,
466 }
467 }
468
469 writeln!(f, ")",)
470 }
471}
472
473pub fn lex_with_source<'a>(source_id: SourceId, s: &'a str) -> Vec<(Token<'a>, Span)> {
474 lex(s)
475 .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
476 .collect::<Vec<_>>()
477}
478
479fn read<P>(file_path: P) -> io::Result<String>
480where
481 P: AsRef<Path>,
482{
483 let mut f = File::open(&file_path)?;
484 let mut file_content = String::new();
485
486 f.read_to_string(&mut file_content)?;
488 Ok(file_content)
489}
490
491type SpannedToken<'t> = (Token<'t>, Span);
492
493pub struct BeancountParser<'s> {
535 sources: &'s BeancountSources,
536 tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
538}
539
540#[derive(Debug)]
542pub struct ParseSuccess<'t> {
543 pub directives: Vec<Spanned<Directive<'t>>>,
544 pub options: Options<'t>,
545 pub plugins: Vec<Plugin<'t>>,
546 pub warnings: Vec<Warning>,
547}
548
549#[derive(Debug)]
551pub struct ParseError {
552 pub errors: Vec<Error>,
553 pub warnings: Vec<Warning>,
554}
555
556type ParseDeclarationsResult<'t> = (
558 HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
559 Options<'t>,
560 Vec<Error>,
561 Vec<Warning>,
562);
563
564impl<'s> BeancountParser<'s> {
565 pub fn new(sources: &'s BeancountSources) -> Self {
567 let mut tokenized_sources = Vec::new();
568
569 for (source_id, _path, content) in sources.content_iter() {
570 tokenized_sources.push(lex_with_source(source_id, content));
571 }
572
573 BeancountParser {
574 sources,
575 tokenized_sources,
576 }
577 }
578
579 pub fn parse<'a>(&'a self) -> Result<ParseSuccess<'a>, ParseError> {
581 let (parsed_sources, options, mut errors, warnings) = self.parse_declarations();
582 let error_paths = self.sources.error_path_iter().collect::<HashMap<_, _>>();
583 let mut p = PragmaProcessor::new(self.root_path(), parsed_sources, error_paths, options);
584
585 let directives = p
588 .by_ref()
589 .sort(|d| {
590 (
591 *d.item().date().item(),
592 !matches!(d.variant(), DirectiveVariant::Balance(_)),
594 )
595 })
596 .collect::<Vec<_>>();
597 let (options, plugins, mut pragma_errors) = p.result();
598 errors.append(&mut pragma_errors);
599
600 if errors.is_empty() {
601 Ok(ParseSuccess {
602 directives,
603 options,
604 plugins,
605 warnings,
606 })
607 } else {
608 Err(ParseError { errors, warnings })
609 }
610 }
611
612 fn root_path(&self) -> Option<&'s Path> {
613 self.sources.root_path.as_deref()
614 }
615
616 fn parse_declarations<'a>(&'a self) -> ParseDeclarationsResult<'a> {
619 let mut all_outputs = HashMap::new();
620 let mut all_errors = Vec::new();
621 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
622
623 for (source_id, source_path, content) in self.sources.content_iter() {
624 let i_source: usize = source_id.into();
625 let tokens = &self.tokenized_sources[i_source];
626
627 let spanned_tokens = tokens
628 .map(end_of_input(source_id, content), |(t, s)| (t, s))
629 .with_context(source_id);
630
631 let (output, errors) = file(source_path)
632 .parse_with_state(spanned_tokens, &mut parser_state)
633 .into_output_errors();
634
635 all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
636 all_errors.extend(errors);
637 }
638
639 let ParserState { options, warnings } = parser_state.0;
640
641 (
642 all_outputs,
643 Options::new(options),
644 all_errors.into_iter().map(Error::from).collect(),
645 warnings,
646 )
647 }
648}
649
650#[derive(Debug)]
654struct PragmaProcessor<'s> {
655 current_path: Option<PathBuf>,
656 current_declarations: VecDeque<Spanned<Declaration<'s>>>,
657 stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
658 remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
659 error_paths: HashMap<Option<PathBuf>, String>,
660 include_span_by_canonical_path: HashMap<PathBuf, Span>,
661 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
663 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
664 options: Options<'s>,
665 plugins: Vec<Plugin<'s>>,
666 errors: Vec<Error>,
668}
669
670impl<'s> PragmaProcessor<'s> {
671 fn new(
672 root_path: Option<&Path>,
673 parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
674 error_paths: HashMap<Option<&Path>, String>,
675 options: Options<'s>,
676 ) -> Self {
677 let mut remaining = parsed_sources
678 .into_iter()
679 .map(|(path, declarations)| {
680 (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
681 })
682 .collect::<HashMap<_, _>>();
683 let error_paths = error_paths
684 .into_iter()
685 .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
686 .collect::<HashMap<_, _>>();
687
688 let current_path = root_path.map(|p| p.to_path_buf());
689 let current_declarations = remaining.remove(¤t_path).unwrap();
690
691 PragmaProcessor {
692 current_path,
693 current_declarations,
694 stacked: VecDeque::new(),
695 remaining,
696 error_paths,
697 include_span_by_canonical_path: HashMap::default(),
698 tags: HashMap::new(),
699 meta_key_values: HashMap::new(),
700 options,
701 plugins: Vec::new(),
702 errors: Vec::new(),
703 }
704 }
705
706 fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>) {
707 let mut errors = self.errors;
709
710 for (key, _value) in self.meta_key_values {
711 let e = Error::new(
712 "invalid pushmeta",
713 "missing corresponding popmeta",
714 key.span,
715 );
716 errors.push(e);
717 }
718
719 for (tag, others) in self.tags {
720 let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
721 errors.push(e);
722 for other in others {
723 let e = Error::new(
724 "invalid pushtag",
725 "missing corresponding poptag",
726 other.span,
727 );
728 errors.push(e);
729 }
730 }
731
732 (self.options, self.plugins, errors)
733 }
734}
735
736impl<'s> Iterator for PragmaProcessor<'s> {
737 type Item = Spanned<Directive<'s>>;
738
739 fn next(&mut self) -> Option<Self::Item> {
740 match self.current_declarations.pop_front() {
741 Some(declaration) => {
742 match declaration.item {
743 Declaration::Directive(mut directive) => {
744 directive.metadata.augment_tags(&self.tags);
745 directive.metadata.augment_key_values(&self.meta_key_values);
746
747 Some(spanned(directive, declaration.span))
748 }
749
750 Declaration::Pragma(pragma) => {
751 use Pragma::*;
752
753 match pragma {
754 Pushtag(tag) => match self.tags.get_mut(&tag) {
755 Some(others) => {
756 others.push(tag);
757 }
758 None => {
759 self.tags.insert(tag, Vec::default());
760 }
761 },
762 Poptag(tag) => {
763 let mut last_tag = false;
764
765 match self.tags.get_mut(&tag) {
766 Some(others) => {
767 if others.is_empty() {
768 last_tag = true;
769 } else {
771 others.pop();
772 }
773 }
774 None => {
775 let e = Error::new(
776 "invalid poptag",
777 "missing corresponding pushtag",
778 tag.span,
779 );
780 self.errors.push(e);
781 }
782 }
783
784 if last_tag {
785 self.tags.remove(&tag);
786 }
787 }
788 Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
789 Some(values) => {
790 values.push((meta.key.span, meta.value));
791 }
792 None => {
793 self.meta_key_values
794 .insert(meta.key, vec![(meta.key.span, meta.value)]);
795 }
796 },
797 Popmeta(meta) => {
798 let mut last_meta = false;
799
800 match self.meta_key_values.get_mut(&meta) {
801 Some(values) => {
802 values.pop();
803 if values.is_empty() {
804 last_meta = true;
805 }
807 }
808 None => {
809 let e = Error::new(
810 "invalid popmeta",
811 "missing corresponding pushmeta",
812 meta.span,
813 );
814 self.errors.push(e);
815 }
816 }
817
818 if last_meta {
819 self.meta_key_values.remove(&meta);
820 }
821 }
822 Include(relpath) => {
823 let (path, span) = (
824 Some(resolve_included_path(
825 self.current_path.as_ref(),
826 AsRef::<Path>::as_ref(*relpath.item()),
827 )),
828 *relpath.span(),
829 );
830 let canonical_path =
831 path.as_ref().and_then(|p| p.canonicalize().ok());
832
833 match self.remaining.remove_entry(&path) {
834 Some((included_path, included_declarations)) => {
835 let stacked_path = std::mem::replace(
836 &mut self.current_path,
837 included_path,
838 );
839 let stacked_declarations = std::mem::replace(
840 &mut self.current_declarations,
841 included_declarations,
842 );
843 self.stacked
844 .push_front((stacked_path, stacked_declarations));
845
846 if let Some(canonical_path) = canonical_path {
848 self.include_span_by_canonical_path
849 .insert(canonical_path, span);
850 }
851 }
852
853 None => {
854 let e = match self.error_paths.get(&path) {
856 Some(e) => {
857 Error::new("can't read file", e.to_string(), span)
858 }
859 None => {
860 let e = Error::new(
861 "duplicate include",
862 "file already included",
863 span,
864 );
865
866 if let Some(span) = canonical_path.and_then(|p| {
868 self.include_span_by_canonical_path.get(&p)
869 }) {
870 e.related_to_named_span("file", *span)
871 } else {
872 e
873 }
874 }
875 };
876 self.errors.push(e);
877 }
878 }
879 }
880
881 Option(opt) => {
882 if let Err(e) = self.options.assimilate(opt) {
883 self.errors.push(e);
884 }
885 }
886
887 Plugin(plugin) => self.plugins.push(plugin),
888 }
889
890 self.next()
892 }
893 }
894 }
895 None => match self.stacked.pop_front() {
896 Some((path, declarations)) => {
897 self.current_path = path;
898 self.current_declarations = declarations;
899 self.next()
900 }
901 None => None,
902 },
903 }
904 }
905}
906
907fn end_of_input(source_id: SourceId, s: &str) -> Span {
908 chumsky::span::Span::new(source_id, s.len()..s.len())
909}
910
911fn trimmed_span(source: &str, span: &Span) -> Span {
912 let mut trimmed = *span;
913 trimmed.end = trim_trailing_whitespace(source, span.start, span.end);
914 trimmed
915}
916
917#[cfg(test)]
918pub use lexer::bare_lex;
919mod format;
920mod lexer;
921pub use options::Options;
922pub(crate) mod options;
923mod parsers;
924mod sort;
925mod trim;
926pub mod types;