1#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6use ariadne::{Color, Label, Report};
86use chumsky::prelude::{Input, Parser};
87use lazy_format::lazy_format;
88use lexer::{lex, Token};
89use parsers::{file, includes, ParserState};
90use sort::SortIteratorAdaptor;
91use std::{
92 collections::{HashMap, HashSet, VecDeque},
93 ffi::OsStr,
94 fmt::{self, Formatter},
95 fs::File,
96 io::{self, Read, Write},
97 iter::once,
98 path::{Path, PathBuf},
99};
100
101pub use crate::{trim::trim_trailing_whitespace, types::*};
102
103#[derive(Clone)]
119pub struct BeancountSources {
120 root_path: Option<PathBuf>,
121 root_source_id: SourceId,
122 root_content: String,
123 root_content_char_indices: Vec<usize>,
124 included_content: HashMap<PathBuf, IncludedSource>,
125 source_id_strings: Vec<String>, }
127
128#[derive(Clone, Debug)]
129enum IncludedSource {
130 Content(SourceId, String, Vec<usize>), IoError(String),
132 Duplicate,
133}
134
135fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
137 fn get_includes_for_tokens(
138 tokens: Vec<(Token, Span)>,
139 source_id: SourceId,
140 end_of_input: Span,
141 ) -> Vec<String> {
142 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
143
144 let spanned_tokens = tokens
145 .map(end_of_input, |(t, s)| (t, s))
146 .with_context(source_id);
147
148 includes()
150 .parse_with_state(spanned_tokens, &mut parser_state)
151 .into_output()
152 .unwrap_or_default()
153 }
154
155 let tokens = lex_with_source(source_id, content);
156 get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
157}
158
159fn path_dir(p: &Path) -> Option<&Path> {
161 p.parent().and_then(|p| {
162 if !AsRef::<OsStr>::as_ref(&p).is_empty() {
163 Some(p)
164 } else {
165 None
166 }
167 })
168}
169
170fn resolve_included_path(including_path: Option<&PathBuf>, included_path: &Path) -> PathBuf {
172 match including_path.and_then(|p| path_dir(p.as_ref())) {
173 Some(p) => p.join(included_path),
174 None => included_path.to_path_buf(),
175 }
176}
177
178impl BeancountSources {
179 fn try_read_with_includes(root_path: PathBuf) -> io::Result<Self> {
180 let root_content = read(&root_path)?;
181 Ok(Self::read_with_includes(Some(root_path), root_content))
182 }
183
184 fn read_with_includes(root_path: Option<PathBuf>, root_content: String) -> Self {
185 let root_source_id = SourceId::default();
186 let root_source_id_string = root_path
187 .as_ref()
188 .map(|p| p.to_string_lossy().into())
189 .unwrap_or("inline".to_string());
190 let mut source_id_strings = Vec::from([root_source_id_string]);
191
192 let mut pending_paths = get_includes(&root_content, root_source_id)
193 .into_iter()
194 .map(|included_path| resolve_included_path(root_path.as_ref(), included_path.as_ref()))
195 .collect::<VecDeque<_>>();
196
197 let mut included_content: HashMap<PathBuf, IncludedSource> = HashMap::new();
198
199 let mut canonical_paths =
201 HashSet::from([root_path.as_ref().and_then(|p| p.canonicalize().ok())]);
202
203 while !pending_paths.is_empty() {
204 let path = pending_paths.pop_front().unwrap();
205 let canonical_path = path.canonicalize().ok();
206
207 if canonical_paths.contains(&canonical_path) {
208 included_content
210 .entry(path)
211 .or_insert(IncludedSource::Duplicate);
212 } else {
213 canonical_paths.insert(canonical_path);
214
215 let source_id = SourceId::from(source_id_strings.len());
216 source_id_strings.push(path.to_string_lossy().into());
217
218 let included_source = read(&path).map_or_else(
219 |e| IncludedSource::IoError(e.to_string()),
220 |c| {
221 let char_indices = c.char_indices().map(|(i, _)| i).collect::<Vec<_>>();
225 IncludedSource::Content(source_id, c, char_indices)
226 },
227 );
228
229 included_content.insert(path.clone(), included_source);
232 let included_source = included_content.get(&path).unwrap();
233
234 if let IncludedSource::Content(_, content, _) = included_source {
235 let mut includes = get_includes(content, source_id)
236 .into_iter()
237 .map(|included_path| {
238 resolve_included_path(Some(&path), included_path.as_ref())
239 })
240 .collect::<VecDeque<_>>();
241 pending_paths.append(&mut includes);
242 }
243 }
244 }
245
246 let root_content_char_indices = root_content
247 .char_indices()
248 .map(|(i, _)| i)
249 .collect::<Vec<_>>();
250
251 Self {
252 root_path,
253 root_source_id,
254 root_content,
255 root_content_char_indices,
256 included_content,
257 source_id_strings,
258 }
259 }
260
261 #[deprecated(since = "0.12.0", note = "Use `write_errors_or_warnings` instead")]
262 pub fn write<W, E, K>(&self, w: W, errors_or_warnings: Vec<E>) -> io::Result<()>
263 where
264 W: Write + Copy,
265 E: Into<AnnotatedErrorOrWarning<K>>,
266 K: ErrorOrWarningKind,
267 {
268 self.write_errors_or_warnings(w, errors_or_warnings)
269 }
270
271 pub fn write_errors_or_warnings<W, E, K>(
273 &self,
274 mut w: W,
275 errors_or_warnings: Vec<E>,
276 ) -> io::Result<()>
277 where
278 W: Write + Copy,
279 E: Into<AnnotatedErrorOrWarning<K>>,
280 K: ErrorOrWarningKind,
281 {
282 for error_or_warning in errors_or_warnings.into_iter() {
283 use chumsky::span::Span;
284 let AnnotatedErrorOrWarning {
285 error_or_warning,
286 annotation,
287 } = error_or_warning.into();
288 let (src_id, span) =
289 self.source_id_string_and_adjusted_rune_span(&error_or_warning.span);
290 let color = error_or_warning.color();
291 let report_kind = error_or_warning.report_kind();
292
293 Report::build(report_kind, (src_id.clone(), (span.start()..span.end())))
294 .with_message(error_or_warning.message)
295 .with_labels(Some(
296 Label::new((src_id, (span.start()..span.end())))
297 .with_message(error_or_warning.reason)
298 .with_color(color),
299 ))
300 .with_labels(error_or_warning.contexts.into_iter().map(|(label, span)| {
301 let (src_id, span) = self.source_id_string_and_adjusted_rune_span(&span);
302 Label::new((src_id, (span.start()..span.end())))
303 .with_message(lazy_format!("in this {}", label))
304 .with_color(Color::Yellow)
305 }))
306 .with_labels(error_or_warning.related.into_iter().map(|(label, span)| {
307 let (src_id, span) = self.source_id_string_and_adjusted_rune_span(&span);
308 Label::new((src_id, (span.start()..span.end())))
309 .with_message(lazy_format!("{}", label))
310 .with_color(Color::Yellow)
311 }))
312 .finish()
313 .write(ariadne::sources(self.sources()), w)?;
314
315 if let Some(annotation) = annotation {
316 w.write_fmt(core::format_args!("{}\n", &annotation))?;
318 }
319 }
320 Ok(())
321 }
322
323 fn byte_to_rune(&self, char_indices: &[usize], byte_span: Span) -> Span {
324 let mut rune_span = byte_span;
325 rune_span.start = char_indices.partition_point(|&i| i < byte_span.start);
326 rune_span.end = char_indices.partition_point(|&i| i < byte_span.end);
327 rune_span
328 }
329
330 pub fn error_source_text<'a, K>(&'a self, error_or_warning: &ErrorOrWarning<K>) -> &'a str
331 where
332 K: ErrorOrWarningKind,
333 {
334 let (source_content, _, byte_span, _rune_span) =
335 self.get_adjusted_source(&error_or_warning.span);
336 &source_content[byte_span.start..byte_span.end]
337 }
338
339 fn source_id_string_and_adjusted_rune_span(&self, span: &Span) -> (String, Span) {
340 let (_, source_id, _byte_span, rune_span) = self.get_adjusted_source(span);
341 (source_id.to_string(), rune_span)
342 }
343
344 fn get_adjusted_source(&self, span: &Span) -> (&str, &str, Span, Span) {
345 use chumsky::span::Span;
346 let source_id = span.context();
347 let source_id_str = self.source_id_string(source_id);
348 let empty_char_indices = Vec::default();
349 let (source_content, source_content_char_indices) = if source_id == self.root_source_id {
350 (self.root_content.as_str(), &self.root_content_char_indices)
351 } else if let IncludedSource::Content(_, content, content_char_indices) =
352 self.included_content.get(Path::new(source_id_str)).unwrap()
353 {
354 (content.as_str(), content_char_indices)
355 } else {
356 ("", &empty_char_indices)
357 };
358
359 let byte_span = trimmed_span(source_content, span);
360 let rune_span = self.byte_to_rune(source_content_char_indices, byte_span);
361
362 (source_content, source_id_str, byte_span, rune_span)
363 }
364
365 fn source_id_string(&self, source_id: SourceId) -> &str {
366 self.source_id_strings[Into::<usize>::into(source_id)].as_str()
367 }
368
369 fn sources(&self) -> Vec<(String, &str)> {
370 once((
371 self.source_id_string(self.root_source_id).to_string(),
372 self.root_content.as_str(),
373 ))
374 .chain(
375 self.included_content
376 .iter()
377 .filter_map(|(_, included_source)| {
378 if let IncludedSource::Content(source_id, content, _) = included_source {
379 Some((
380 self.source_id_string(*source_id).to_string(),
381 content.as_str(),
382 ))
383 } else {
384 None
385 }
386 }),
387 )
388 .collect()
389 }
390
391 fn content_iter(&self) -> impl Iterator<Item = (SourceId, Option<&Path>, &str)> {
392 once((
393 self.root_source_id,
394 self.root_path.as_deref(),
395 self.root_content.as_str(),
396 ))
397 .chain(
398 self.included_content
399 .iter()
400 .filter_map(|(pathbuf, included_source)| {
401 if let IncludedSource::Content(source_id, content, _) = included_source {
402 Some((*source_id, Some(pathbuf.as_path()), content.as_str()))
403 } else {
404 None
405 }
406 }),
407 )
408 }
409
410 fn error_path_iter(&self) -> impl Iterator<Item = (Option<&Path>, String)> {
411 self.included_content
412 .iter()
413 .filter_map(|(pathbuf, included_source)| {
414 if let IncludedSource::IoError(e) = included_source {
415 Some((Some(pathbuf.as_path()), e.clone()))
416 } else {
417 None
418 }
419 })
420 }
421}
422
423impl TryFrom<PathBuf> for BeancountSources {
424 type Error = io::Error;
425
426 fn try_from(source_path: PathBuf) -> io::Result<Self> {
427 Self::try_read_with_includes(source_path)
428 }
429}
430
431impl TryFrom<&Path> for BeancountSources {
432 type Error = io::Error;
433
434 fn try_from(source_path: &Path) -> io::Result<Self> {
435 Self::try_read_with_includes(source_path.to_owned())
436 }
437}
438
439impl From<String> for BeancountSources {
440 fn from(source_string: String) -> Self {
441 Self::read_with_includes(None, source_string)
442 }
443}
444
445impl From<&str> for BeancountSources {
446 fn from(source_string: &str) -> Self {
447 Self::read_with_includes(None, source_string.to_owned())
448 }
449}
450
451impl std::fmt::Debug for BeancountSources {
452 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
453 writeln!(f, "BeancountSources(",)?;
454
455 for (path, included_source) in &self.included_content {
456 match included_source {
457 IncludedSource::Content(source_id, content, _) => writeln!(
458 f,
459 " {} ok len {},",
460 self.source_id_string(*source_id),
461 content.len()
462 )?,
463 IncludedSource::IoError(e) => writeln!(f, " {:?} err {},", path, e)?,
464 IncludedSource::Duplicate => writeln!(f, " {:?} duplicate include", path)?,
465 }
466 }
467
468 writeln!(f, ")",)
469 }
470}
471
472pub fn lex_with_source(source_id: SourceId, s: &str) -> Vec<(Token, Span)> {
473 lex(s)
474 .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
475 .collect::<Vec<_>>()
476}
477
478fn read<P>(file_path: P) -> io::Result<String>
479where
480 P: AsRef<Path>,
481{
482 let mut f = File::open(&file_path)?;
483 let mut file_content = String::new();
484
485 f.read_to_string(&mut file_content)?;
487 Ok(file_content)
488}
489
490type SpannedToken<'t> = (Token<'t>, Span);
491
492pub struct BeancountParser<'s> {
534 sources: &'s BeancountSources,
535 tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
537}
538
539#[derive(Debug)]
541pub struct ParseSuccess<'t> {
542 pub directives: Vec<Spanned<Directive<'t>>>,
543 pub options: Options<'t>,
544 pub plugins: Vec<Plugin<'t>>,
545 pub warnings: Vec<Warning>,
546}
547
548#[derive(Debug)]
550pub struct ParseError {
551 pub errors: Vec<Error>,
552 pub warnings: Vec<Warning>,
553}
554
555type ParseDeclarationsResult<'t> = (
557 HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
558 Options<'t>,
559 Vec<Error>,
560 Vec<Warning>,
561);
562
563impl<'s> BeancountParser<'s> {
564 pub fn new(sources: &'s BeancountSources) -> Self {
566 let mut tokenized_sources = Vec::new();
567
568 for (source_id, _path, content) in sources.content_iter() {
569 tokenized_sources.push(lex_with_source(source_id, content));
570 }
571
572 BeancountParser {
573 sources,
574 tokenized_sources,
575 }
576 }
577
578 pub fn parse(&self) -> Result<ParseSuccess, ParseError> {
580 let (parsed_sources, options, mut errors, warnings) = self.parse_declarations();
581 let error_paths = self.sources.error_path_iter().collect::<HashMap<_, _>>();
582 let mut p = PragmaProcessor::new(self.root_path(), parsed_sources, error_paths, options);
583
584 let directives = p
587 .by_ref()
588 .sort(|d| {
589 (
590 *d.item().date().item(),
591 !matches!(d.variant(), DirectiveVariant::Balance(_)),
593 )
594 })
595 .collect::<Vec<_>>();
596 let (options, plugins, mut pragma_errors) = p.result();
597 errors.append(&mut pragma_errors);
598
599 if errors.is_empty() {
600 Ok(ParseSuccess {
601 directives,
602 options,
603 plugins,
604 warnings,
605 })
606 } else {
607 Err(ParseError { errors, warnings })
608 }
609 }
610
611 fn root_path(&self) -> Option<&'s Path> {
612 self.sources.root_path.as_deref()
613 }
614
615 fn parse_declarations(&self) -> ParseDeclarationsResult {
618 let mut all_outputs = HashMap::new();
619 let mut all_errors = Vec::new();
620 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
621
622 for (source_id, source_path, content) in self.sources.content_iter() {
623 let i_source: usize = source_id.into();
624 let tokens = &self.tokenized_sources[i_source];
625
626 let spanned_tokens = tokens
627 .map(end_of_input(source_id, content), |(t, s)| (t, s))
628 .with_context(source_id);
629
630 let (output, errors) = file(source_path)
631 .parse_with_state(spanned_tokens, &mut parser_state)
632 .into_output_errors();
633
634 all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
635 all_errors.extend(errors);
636 }
637
638 let ParserState { options, warnings } = parser_state.0;
639
640 (
641 all_outputs,
642 Options::new(options),
643 all_errors.into_iter().map(Error::from).collect(),
644 warnings,
645 )
646 }
647}
648
649#[derive(Debug)]
653struct PragmaProcessor<'s> {
654 current_path: Option<PathBuf>,
655 current_declarations: VecDeque<Spanned<Declaration<'s>>>,
656 stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
657 remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
658 error_paths: HashMap<Option<PathBuf>, String>,
659 include_span_by_canonical_path: HashMap<PathBuf, Span>,
660 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
662 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
663 options: Options<'s>,
664 plugins: Vec<Plugin<'s>>,
665 errors: Vec<Error>,
667}
668
669impl<'s> PragmaProcessor<'s> {
670 fn new(
671 root_path: Option<&Path>,
672 parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
673 error_paths: HashMap<Option<&Path>, String>,
674 options: Options<'s>,
675 ) -> Self {
676 let mut remaining = parsed_sources
677 .into_iter()
678 .map(|(path, declarations)| {
679 (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
680 })
681 .collect::<HashMap<_, _>>();
682 let error_paths = error_paths
683 .into_iter()
684 .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
685 .collect::<HashMap<_, _>>();
686
687 let current_path = root_path.map(|p| p.to_path_buf());
688 let current_declarations = remaining.remove(¤t_path).unwrap();
689
690 PragmaProcessor {
691 current_path,
692 current_declarations,
693 stacked: VecDeque::new(),
694 remaining,
695 error_paths,
696 include_span_by_canonical_path: HashMap::default(),
697 tags: HashMap::new(),
698 meta_key_values: HashMap::new(),
699 options,
700 plugins: Vec::new(),
701 errors: Vec::new(),
702 }
703 }
704
705 fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>) {
706 let mut errors = self.errors;
708
709 for (key, _value) in self.meta_key_values {
710 let e = Error::new(
711 "invalid pushmeta",
712 "missing corresponding popmeta",
713 key.span,
714 );
715 errors.push(e);
716 }
717
718 for (tag, others) in self.tags {
719 let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
720 errors.push(e);
721 for other in others {
722 let e = Error::new(
723 "invalid pushtag",
724 "missing corresponding poptag",
725 other.span,
726 );
727 errors.push(e);
728 }
729 }
730
731 (self.options, self.plugins, errors)
732 }
733}
734
735impl<'s> Iterator for PragmaProcessor<'s> {
736 type Item = Spanned<Directive<'s>>;
737
738 fn next(&mut self) -> Option<Self::Item> {
739 match self.current_declarations.pop_front() {
740 Some(declaration) => {
741 match declaration.item {
742 Declaration::Directive(mut directive) => {
743 directive.metadata.augment_tags(&self.tags);
744 directive.metadata.augment_key_values(&self.meta_key_values);
745
746 Some(spanned(directive, declaration.span))
747 }
748
749 Declaration::Pragma(pragma) => {
750 use Pragma::*;
751
752 match pragma {
753 Pushtag(tag) => match self.tags.get_mut(&tag) {
754 Some(others) => {
755 others.push(tag);
756 }
757 None => {
758 self.tags.insert(tag, Vec::default());
759 }
760 },
761 Poptag(tag) => {
762 let mut last_tag = false;
763
764 match self.tags.get_mut(&tag) {
765 Some(others) => {
766 if others.is_empty() {
767 last_tag = true;
768 } else {
770 others.pop();
771 }
772 }
773 None => {
774 let e = Error::new(
775 "invalid poptag",
776 "missing corresponding pushtag",
777 tag.span,
778 );
779 self.errors.push(e);
780 }
781 }
782
783 if last_tag {
784 self.tags.remove(&tag);
785 }
786 }
787 Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
788 Some(values) => {
789 values.push((meta.key.span, meta.value));
790 }
791 None => {
792 self.meta_key_values
793 .insert(meta.key, vec![(meta.key.span, meta.value)]);
794 }
795 },
796 Popmeta(meta) => {
797 let mut last_meta = false;
798
799 match self.meta_key_values.get_mut(&meta) {
800 Some(values) => {
801 values.pop();
802 if values.is_empty() {
803 last_meta = true;
804 }
806 }
807 None => {
808 let e = Error::new(
809 "invalid popmeta",
810 "missing corresponding pushmeta",
811 meta.span,
812 );
813 self.errors.push(e);
814 }
815 }
816
817 if last_meta {
818 self.meta_key_values.remove(&meta);
819 }
820 }
821 Include(relpath) => {
822 let (path, span) = (
823 Some(resolve_included_path(
824 self.current_path.as_ref(),
825 AsRef::<Path>::as_ref(*relpath.item()),
826 )),
827 *relpath.span(),
828 );
829 let canonical_path =
830 path.as_ref().and_then(|p| p.canonicalize().ok());
831
832 match self.remaining.remove_entry(&path) {
833 Some((included_path, included_declarations)) => {
834 let stacked_path = std::mem::replace(
835 &mut self.current_path,
836 included_path,
837 );
838 let stacked_declarations = std::mem::replace(
839 &mut self.current_declarations,
840 included_declarations,
841 );
842 self.stacked
843 .push_front((stacked_path, stacked_declarations));
844
845 if let Some(canonical_path) = canonical_path {
847 self.include_span_by_canonical_path
848 .insert(canonical_path, span);
849 }
850 }
851
852 None => {
853 let e = match self.error_paths.get(&path) {
855 Some(e) => {
856 Error::new("can't read file", e.to_string(), span)
857 }
858 None => {
859 let e = Error::new(
860 "duplicate include",
861 "file already included",
862 span,
863 );
864
865 if let Some(span) = canonical_path.and_then(|p| {
867 self.include_span_by_canonical_path.get(&p)
868 }) {
869 e.related_to_named_span("file", *span)
870 } else {
871 e
872 }
873 }
874 };
875 self.errors.push(e);
876 }
877 }
878 }
879
880 Option(opt) => {
881 if let Err(e) = self.options.assimilate(opt) {
882 self.errors.push(e);
883 }
884 }
885
886 Plugin(plugin) => self.plugins.push(plugin),
887 }
888
889 self.next()
891 }
892 }
893 }
894 None => match self.stacked.pop_front() {
895 Some((path, declarations)) => {
896 self.current_path = path;
897 self.current_declarations = declarations;
898 self.next()
899 }
900 None => None,
901 },
902 }
903 }
904}
905
906fn end_of_input(source_id: SourceId, s: &str) -> Span {
907 chumsky::span::Span::new(source_id, s.len()..s.len())
908}
909
910fn trimmed_span(source: &str, span: &Span) -> Span {
911 let mut trimmed = *span;
912 trimmed.end = trim_trailing_whitespace(source, span.start, span.end);
913 trimmed
914}
915
916#[cfg(test)]
917pub use lexer::bare_lex;
918mod format;
919mod lexer;
920pub use options::Options;
921mod options;
922mod parsers;
923mod sort;
924mod trim;
925pub mod types;