1#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6use ariadne::{Color, Label, Report};
86use chumsky::prelude::{Input, Parser};
87use lazy_format::lazy_format;
88use lexer::{lex, Token};
89use parsers::{file, includes, ParserState};
90use sort::SortIteratorAdaptor;
91use std::{
92 collections::{HashMap, HashSet, VecDeque},
93 ffi::OsStr,
94 fmt::{self, Formatter},
95 fs::File,
96 io::{self, Read, Write},
97 iter::once,
98 path::{Path, PathBuf},
99};
100
101pub use crate::{trim::trim_trailing_whitespace, types::*};
102
103#[derive(Clone)]
119pub struct BeancountSources {
120 root_path: Option<PathBuf>,
121 root_source_id: SourceId,
122 root_content: String,
123 root_content_char_indices: Vec<usize>,
124 included_content: HashMap<PathBuf, IncludedSource>,
125 source_id_strings: Vec<String>, }
127
128#[derive(Clone, Debug)]
129enum IncludedSource {
130 Content(SourceId, String, Vec<usize>), IoError(String),
132 Duplicate,
133}
134
135fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
137 fn get_includes_for_tokens(
138 tokens: Vec<(Token, Span)>,
139 source_id: SourceId,
140 end_of_input: Span,
141 ) -> Vec<String> {
142 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
143
144 let spanned_tokens = tokens
145 .map(end_of_input, |(t, s)| (t, s))
146 .with_context(source_id);
147
148 includes()
150 .parse_with_state(spanned_tokens, &mut parser_state)
151 .into_output()
152 .unwrap_or_default()
153 }
154
155 let tokens = lex_with_source(source_id, content);
156 get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
157}
158
159fn path_dir(p: &Path) -> Option<&Path> {
161 p.parent().and_then(|p| {
162 if !AsRef::<OsStr>::as_ref(&p).is_empty() {
163 Some(p)
164 } else {
165 None
166 }
167 })
168}
169
170fn resolve_included_path(including_path: Option<&PathBuf>, included_path: &Path) -> PathBuf {
172 match including_path.and_then(|p| path_dir(p.as_ref())) {
173 Some(p) => p.join(included_path),
174 None => included_path.to_path_buf(),
175 }
176}
177
178impl BeancountSources {
179 fn try_read_with_includes(root_path: PathBuf) -> io::Result<Self> {
180 let root_content = read(&root_path)?;
181 Ok(Self::read_with_includes(Some(root_path), root_content))
182 }
183
184 fn read_with_includes(root_path: Option<PathBuf>, root_content: String) -> Self {
185 let root_source_id = SourceId::default();
186 let root_source_id_string = root_path
187 .as_ref()
188 .map(|p| p.to_string_lossy().into())
189 .unwrap_or("inline".to_string());
190 let mut source_id_strings = Vec::from([root_source_id_string]);
191
192 let mut pending_paths = get_includes(&root_content, root_source_id)
193 .into_iter()
194 .map(|included_path| resolve_included_path(root_path.as_ref(), included_path.as_ref()))
195 .collect::<VecDeque<_>>();
196
197 let mut included_content: HashMap<PathBuf, IncludedSource> = HashMap::new();
198
199 let mut canonical_paths =
201 HashSet::from([root_path.as_ref().and_then(|p| p.canonicalize().ok())]);
202
203 while !pending_paths.is_empty() {
204 let path = pending_paths.pop_front().unwrap();
205 let canonical_path = path.canonicalize().ok();
206
207 if canonical_paths.contains(&canonical_path) {
208 included_content
210 .entry(path)
211 .or_insert(IncludedSource::Duplicate);
212 } else {
213 canonical_paths.insert(canonical_path);
214
215 let source_id = SourceId::from(source_id_strings.len());
216 source_id_strings.push(path.to_string_lossy().into());
217
218 let included_source = read(&path).map_or_else(
219 |e| IncludedSource::IoError(e.to_string()),
220 |c| {
221 let char_indices = c.char_indices().map(|(i, _)| i).collect::<Vec<_>>();
225 IncludedSource::Content(source_id, c, char_indices)
226 },
227 );
228
229 included_content.insert(path.clone(), included_source);
232 let included_source = included_content.get(&path).unwrap();
233
234 if let IncludedSource::Content(_, content, _) = included_source {
235 let mut includes = get_includes(content, source_id)
236 .into_iter()
237 .map(|included_path| {
238 resolve_included_path(Some(&path), included_path.as_ref())
239 })
240 .collect::<VecDeque<_>>();
241 pending_paths.append(&mut includes);
242 }
243 }
244 }
245
246 let root_content_char_indices = root_content
247 .char_indices()
248 .map(|(i, _)| i)
249 .collect::<Vec<_>>();
250
251 Self {
252 root_path,
253 root_source_id,
254 root_content,
255 root_content_char_indices,
256 included_content,
257 source_id_strings,
258 }
259 }
260
261 #[deprecated(since = "0.12.0", note = "Use `write_errors_or_warnings` instead")]
262 pub fn write<W, E, K>(&self, w: W, errors_or_warnings: Vec<E>) -> io::Result<()>
263 where
264 W: Write + Copy,
265 E: Into<AnnotatedErrorOrWarning<K>>,
266 K: ErrorOrWarningKind,
267 {
268 self.write_errors_or_warnings(w, errors_or_warnings)
269 }
270
271 pub fn write_errors_or_warnings<W, E, K>(
273 &self,
274 mut w: W,
275 errors_or_warnings: Vec<E>,
276 ) -> io::Result<()>
277 where
278 W: Write + Copy,
279 E: Into<AnnotatedErrorOrWarning<K>>,
280 K: ErrorOrWarningKind,
281 {
282 for error_or_warning in errors_or_warnings.into_iter() {
283 use chumsky::span::Span;
284 let AnnotatedErrorOrWarning {
285 error_or_warning,
286 annotation,
287 } = error_or_warning.into();
288 let error_or_warning = *(error_or_warning.0);
289 let (src_id, span) =
290 self.source_id_string_and_adjusted_rune_span(&error_or_warning.span);
291 let color = error_or_warning.color();
292 let report_kind = error_or_warning.report_kind();
293
294 Report::build(report_kind, (src_id.clone(), (span.start()..span.end())))
295 .with_message(error_or_warning.message)
296 .with_labels(Some(
297 Label::new((src_id, (span.start()..span.end())))
298 .with_message(error_or_warning.reason)
299 .with_color(color),
300 ))
301 .with_labels(error_or_warning.contexts.into_iter().map(|(label, span)| {
302 let (src_id, span) = self.source_id_string_and_adjusted_rune_span(&span);
303 Label::new((src_id, (span.start()..span.end())))
304 .with_message(lazy_format!("in this {}", label))
305 .with_color(Color::Yellow)
306 }))
307 .with_labels(error_or_warning.related.into_iter().map(|(label, span)| {
308 let (src_id, span) = self.source_id_string_and_adjusted_rune_span(&span);
309 Label::new((src_id, (span.start()..span.end())))
310 .with_message(lazy_format!("{}", label))
311 .with_color(Color::Yellow)
312 }))
313 .finish()
314 .write(ariadne::sources(self.sources()), w)?;
315
316 if let Some(annotation) = annotation {
317 w.write_fmt(core::format_args!("{}\n", &annotation))?;
319 }
320 }
321 Ok(())
322 }
323
324 fn byte_to_rune(&self, char_indices: &[usize], byte_span: Span) -> Span {
325 let mut rune_span = byte_span;
326 rune_span.start = char_indices.partition_point(|&i| i < byte_span.start);
327 rune_span.end = char_indices.partition_point(|&i| i < byte_span.end);
328 rune_span
329 }
330
331 pub fn error_source_text<'a, K>(&'a self, error_or_warning: &ErrorOrWarning<K>) -> &'a str
332 where
333 K: ErrorOrWarningKind,
334 {
335 let (source_content, _, byte_span, _rune_span) =
336 self.get_adjusted_source(&error_or_warning.0.span);
337 &source_content[byte_span.start..byte_span.end]
338 }
339
340 fn source_id_string_and_adjusted_rune_span(&self, span: &Span) -> (String, Span) {
341 let (_, source_id, _byte_span, rune_span) = self.get_adjusted_source(span);
342 (source_id.to_string(), rune_span)
343 }
344
345 fn get_adjusted_source(&self, span: &Span) -> (&str, &str, Span, Span) {
346 use chumsky::span::Span;
347 let source_id = span.context();
348 let source_id_str = self.source_id_string(source_id);
349 let empty_char_indices = Vec::default();
350 let (source_content, source_content_char_indices) = if source_id == self.root_source_id {
351 (self.root_content.as_str(), &self.root_content_char_indices)
352 } else if let IncludedSource::Content(_, content, content_char_indices) =
353 self.included_content.get(Path::new(source_id_str)).unwrap()
354 {
355 (content.as_str(), content_char_indices)
356 } else {
357 ("", &empty_char_indices)
358 };
359
360 let byte_span = trimmed_span(source_content, span);
361 let rune_span = self.byte_to_rune(source_content_char_indices, byte_span);
362
363 (source_content, source_id_str, byte_span, rune_span)
364 }
365
366 fn source_id_string(&self, source_id: SourceId) -> &str {
367 self.source_id_strings[Into::<usize>::into(source_id)].as_str()
368 }
369
370 fn sources(&self) -> Vec<(String, &str)> {
371 once((
372 self.source_id_string(self.root_source_id).to_string(),
373 self.root_content.as_str(),
374 ))
375 .chain(
376 self.included_content
377 .iter()
378 .filter_map(|(_, included_source)| {
379 if let IncludedSource::Content(source_id, content, _) = included_source {
380 Some((
381 self.source_id_string(*source_id).to_string(),
382 content.as_str(),
383 ))
384 } else {
385 None
386 }
387 }),
388 )
389 .collect()
390 }
391
392 fn content_iter(&self) -> impl Iterator<Item = (SourceId, Option<&Path>, &str)> {
393 once((
394 self.root_source_id,
395 self.root_path.as_deref(),
396 self.root_content.as_str(),
397 ))
398 .chain(
399 self.included_content
400 .iter()
401 .filter_map(|(pathbuf, included_source)| {
402 if let IncludedSource::Content(source_id, content, _) = included_source {
403 Some((*source_id, Some(pathbuf.as_path()), content.as_str()))
404 } else {
405 None
406 }
407 }),
408 )
409 }
410
411 fn error_path_iter(&self) -> impl Iterator<Item = (Option<&Path>, String)> {
412 self.included_content
413 .iter()
414 .filter_map(|(pathbuf, included_source)| {
415 if let IncludedSource::IoError(e) = included_source {
416 Some((Some(pathbuf.as_path()), e.clone()))
417 } else {
418 None
419 }
420 })
421 }
422}
423
424impl TryFrom<PathBuf> for BeancountSources {
425 type Error = io::Error;
426
427 fn try_from(source_path: PathBuf) -> io::Result<Self> {
428 Self::try_read_with_includes(source_path)
429 }
430}
431
432impl TryFrom<&Path> for BeancountSources {
433 type Error = io::Error;
434
435 fn try_from(source_path: &Path) -> io::Result<Self> {
436 Self::try_read_with_includes(source_path.to_owned())
437 }
438}
439
440impl From<String> for BeancountSources {
441 fn from(source_string: String) -> Self {
442 Self::read_with_includes(None, source_string)
443 }
444}
445
446impl From<&str> for BeancountSources {
447 fn from(source_string: &str) -> Self {
448 Self::read_with_includes(None, source_string.to_owned())
449 }
450}
451
452impl std::fmt::Debug for BeancountSources {
453 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
454 writeln!(f, "BeancountSources(",)?;
455
456 for (path, included_source) in &self.included_content {
457 match included_source {
458 IncludedSource::Content(source_id, content, _) => writeln!(
459 f,
460 " {} ok len {},",
461 self.source_id_string(*source_id),
462 content.len()
463 )?,
464 IncludedSource::IoError(e) => writeln!(f, " {:?} err {},", path, e)?,
465 IncludedSource::Duplicate => writeln!(f, " {:?} duplicate include", path)?,
466 }
467 }
468
469 writeln!(f, ")",)
470 }
471}
472
473pub fn lex_with_source<'a>(source_id: SourceId, s: &'a str) -> Vec<(Token<'a>, Span)> {
474 lex(s)
475 .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
476 .collect::<Vec<_>>()
477}
478
479fn read<P>(file_path: P) -> io::Result<String>
480where
481 P: AsRef<Path>,
482{
483 let mut f = File::open(&file_path)?;
484 let mut file_content = String::new();
485
486 f.read_to_string(&mut file_content)?;
488 Ok(file_content)
489}
490
491type SpannedToken<'t> = (Token<'t>, Span);
492
493pub struct BeancountParser<'s> {
535 sources: &'s BeancountSources,
536 tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
538}
539
540#[derive(Debug)]
542pub struct ParseSuccess<'t> {
543 pub directives: Vec<Spanned<Directive<'t>>>,
544 pub options: Options<'t>,
545 pub plugins: Vec<Plugin<'t>>,
546 pub warnings: Vec<Warning>,
547}
548
549#[derive(Debug)]
551pub struct ParseError {
552 pub errors: Vec<Error>,
553 pub warnings: Vec<Warning>,
554}
555
556type ParseDeclarationsResult<'t> = (
558 HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
559 Options<'t>,
560 Vec<Error>,
561 Vec<Warning>,
562);
563
564impl<'s> BeancountParser<'s> {
565 pub fn new(sources: &'s BeancountSources) -> Self {
567 let mut tokenized_sources = vec![Vec::new(); sources.source_id_strings.len()];
570
571 for (source_id, _path, content) in sources.content_iter() {
572 let i_source: usize = source_id.into();
573 tokenized_sources[i_source] = lex_with_source(source_id, content);
574 }
575
576 BeancountParser {
577 sources,
578 tokenized_sources,
579 }
580 }
581
582 pub fn parse<'a>(&'a self) -> Result<ParseSuccess<'a>, ParseError> {
584 let (parsed_sources, options, mut errors, warnings) = self.parse_declarations();
585 let error_paths = self.sources.error_path_iter().collect::<HashMap<_, _>>();
586 let mut p = PragmaProcessor::new(self.root_path(), parsed_sources, error_paths, options);
587
588 let directives = p
591 .by_ref()
592 .sort(|d| {
593 (
594 *d.item().date().item(),
595 !matches!(d.variant(), DirectiveVariant::Balance(_)),
597 )
598 })
599 .collect::<Vec<_>>();
600 let (options, plugins, mut pragma_errors) = p.result();
601 errors.append(&mut pragma_errors);
602
603 if errors.is_empty() {
604 Ok(ParseSuccess {
605 directives,
606 options,
607 plugins,
608 warnings,
609 })
610 } else {
611 Err(ParseError { errors, warnings })
612 }
613 }
614
615 fn root_path(&self) -> Option<&'s Path> {
616 self.sources.root_path.as_deref()
617 }
618
619 fn parse_declarations<'a>(&'a self) -> ParseDeclarationsResult<'a> {
622 let mut all_outputs = HashMap::new();
623 let mut all_errors = Vec::new();
624 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
625
626 for (source_id, source_path, content) in self.sources.content_iter() {
627 let i_source: usize = source_id.into();
628 let tokens = &self.tokenized_sources[i_source];
629
630 let spanned_tokens = tokens
631 .map(end_of_input(source_id, content), |(t, s)| (t, s))
632 .with_context(source_id);
633
634 let (output, errors) = file(source_path)
635 .parse_with_state(spanned_tokens, &mut parser_state)
636 .into_output_errors();
637
638 all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
639 all_errors.extend(errors);
640 }
641
642 let ParserState { options, warnings } = parser_state.0;
643
644 (
645 all_outputs,
646 Options::new(options),
647 all_errors.into_iter().map(Error::from).collect(),
648 warnings,
649 )
650 }
651}
652
653#[derive(Debug)]
657struct PragmaProcessor<'s> {
658 current_path: Option<PathBuf>,
659 current_declarations: VecDeque<Spanned<Declaration<'s>>>,
660 stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
661 remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
662 error_paths: HashMap<Option<PathBuf>, String>,
663 include_span_by_canonical_path: HashMap<PathBuf, Span>,
664 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
666 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
667 options: Options<'s>,
668 plugins: Vec<Plugin<'s>>,
669 errors: Vec<Error>,
671}
672
673impl<'s> PragmaProcessor<'s> {
674 fn new(
675 root_path: Option<&Path>,
676 parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
677 error_paths: HashMap<Option<&Path>, String>,
678 options: Options<'s>,
679 ) -> Self {
680 let mut remaining = parsed_sources
681 .into_iter()
682 .map(|(path, declarations)| {
683 (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
684 })
685 .collect::<HashMap<_, _>>();
686 let error_paths = error_paths
687 .into_iter()
688 .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
689 .collect::<HashMap<_, _>>();
690
691 let current_path = root_path.map(|p| p.to_path_buf());
692 let current_declarations = remaining.remove(¤t_path).unwrap();
693
694 PragmaProcessor {
695 current_path,
696 current_declarations,
697 stacked: VecDeque::new(),
698 remaining,
699 error_paths,
700 include_span_by_canonical_path: HashMap::default(),
701 tags: HashMap::new(),
702 meta_key_values: HashMap::new(),
703 options,
704 plugins: Vec::new(),
705 errors: Vec::new(),
706 }
707 }
708
709 fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>) {
710 let mut errors = self.errors;
712
713 for (key, _value) in self.meta_key_values {
714 let e = Error::new(
715 "invalid pushmeta",
716 "missing corresponding popmeta",
717 key.span,
718 );
719 errors.push(e);
720 }
721
722 for (tag, others) in self.tags {
723 let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
724 errors.push(e);
725 for other in others {
726 let e = Error::new(
727 "invalid pushtag",
728 "missing corresponding poptag",
729 other.span,
730 );
731 errors.push(e);
732 }
733 }
734
735 (self.options, self.plugins, errors)
736 }
737}
738
739impl<'s> Iterator for PragmaProcessor<'s> {
740 type Item = Spanned<Directive<'s>>;
741
742 fn next(&mut self) -> Option<Self::Item> {
743 match self.current_declarations.pop_front() {
744 Some(declaration) => {
745 match declaration.item {
746 Declaration::Directive(mut directive) => {
747 directive.metadata.augment_tags(&self.tags);
748 directive.metadata.augment_key_values(&self.meta_key_values);
749
750 Some(spanned(directive, declaration.span))
751 }
752
753 Declaration::Pragma(pragma) => {
754 use Pragma::*;
755
756 match pragma {
757 Pushtag(tag) => match self.tags.get_mut(&tag) {
758 Some(others) => {
759 others.push(tag);
760 }
761 None => {
762 self.tags.insert(tag, Vec::default());
763 }
764 },
765 Poptag(tag) => {
766 let mut last_tag = false;
767
768 match self.tags.get_mut(&tag) {
769 Some(others) => {
770 if others.is_empty() {
771 last_tag = true;
772 } else {
774 others.pop();
775 }
776 }
777 None => {
778 let e = Error::new(
779 "invalid poptag",
780 "missing corresponding pushtag",
781 tag.span,
782 );
783 self.errors.push(e);
784 }
785 }
786
787 if last_tag {
788 self.tags.remove(&tag);
789 }
790 }
791 Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
792 Some(values) => {
793 values.push((meta.key.span, meta.value));
794 }
795 None => {
796 self.meta_key_values
797 .insert(meta.key, vec![(meta.key.span, meta.value)]);
798 }
799 },
800 Popmeta(meta) => {
801 let mut last_meta = false;
802
803 match self.meta_key_values.get_mut(&meta) {
804 Some(values) => {
805 values.pop();
806 if values.is_empty() {
807 last_meta = true;
808 }
810 }
811 None => {
812 let e = Error::new(
813 "invalid popmeta",
814 "missing corresponding pushmeta",
815 meta.span,
816 );
817 self.errors.push(e);
818 }
819 }
820
821 if last_meta {
822 self.meta_key_values.remove(&meta);
823 }
824 }
825 Include(relpath) => {
826 let (path, span) = (
827 Some(resolve_included_path(
828 self.current_path.as_ref(),
829 AsRef::<Path>::as_ref(*relpath.item()),
830 )),
831 *relpath.span(),
832 );
833 let canonical_path =
834 path.as_ref().and_then(|p| p.canonicalize().ok());
835
836 match self.remaining.remove_entry(&path) {
837 Some((included_path, included_declarations)) => {
838 let stacked_path = std::mem::replace(
839 &mut self.current_path,
840 included_path,
841 );
842 let stacked_declarations = std::mem::replace(
843 &mut self.current_declarations,
844 included_declarations,
845 );
846 self.stacked
847 .push_front((stacked_path, stacked_declarations));
848
849 if let Some(canonical_path) = canonical_path {
851 self.include_span_by_canonical_path
852 .insert(canonical_path, span);
853 }
854 }
855
856 None => {
857 let e = match self.error_paths.get(&path) {
859 Some(e) => {
860 Error::new("can't read file", e.to_string(), span)
861 }
862 None => {
863 let e = Error::new(
864 "duplicate include",
865 "file already included",
866 span,
867 );
868
869 if let Some(span) = canonical_path.and_then(|p| {
871 self.include_span_by_canonical_path.get(&p)
872 }) {
873 e.related_to_named_span("file", *span)
874 } else {
875 e
876 }
877 }
878 };
879 self.errors.push(e);
880 }
881 }
882 }
883
884 Option(opt) => {
885 if let Err(e) = self.options.assimilate(opt) {
886 self.errors.push(e);
887 }
888 }
889
890 Plugin(plugin) => self.plugins.push(plugin),
891 }
892
893 self.next()
895 }
896 }
897 }
898 None => match self.stacked.pop_front() {
899 Some((path, declarations)) => {
900 self.current_path = path;
901 self.current_declarations = declarations;
902 self.next()
903 }
904 None => None,
905 },
906 }
907 }
908}
909
910fn end_of_input(source_id: SourceId, s: &str) -> Span {
911 chumsky::span::Span::new(source_id, s.len()..s.len())
912}
913
914fn trimmed_span(source: &str, span: &Span) -> Span {
915 let mut trimmed = *span;
916 trimmed.end = trim_trailing_whitespace(source, span.start, span.end);
917 trimmed
918}
919
920#[cfg(test)]
921pub use lexer::bare_lex;
922mod format;
923mod lexer;
924pub use options::Options;
925pub(crate) mod options;
926mod parsers;
927mod sort;
928mod trim;
929pub mod types;