1use std::{
42 cell::RefCell,
43 path::{Path, PathBuf},
44 rc::Rc,
45 string::ToString,
46};
47
48use tracing::instrument;
49
50mod blocks;
51mod constants;
52mod error;
53pub(crate) mod grammar;
54mod model;
55mod options;
56mod parsed;
57mod preprocessor;
58mod safe_mode;
59mod warning;
60
61pub(crate) use grammar::{InlinePreprocessorParserState, ProcessedContent, inline_preprocessing};
62use preprocessor::Preprocessor;
63
64pub use error::{Error, Positioning, SourceLocation};
65pub use grammar::parse_text_for_quotes;
66pub use model::{
67 Admonition, AdmonitionVariant, Anchor, AttributeName, AttributeValue, Attribution, Audio,
68 Author, Autolink, Block, BlockMetadata, Bold, Button, CalloutList, CalloutListItem, CalloutRef,
69 CalloutRefKind, CiteTitle, ColumnFormat, ColumnStyle, ColumnWidth, Comment, CrossReference,
70 CurvedApostrophe, CurvedQuotation, DelimitedBlock, DelimitedBlockType, DescriptionList,
71 DescriptionListItem, DiscreteHeader, Document, DocumentAttribute, DocumentAttributes,
72 ElementAttributes, Footnote, Form, HEADER, Header, Highlight, HorizontalAlignment, ICON_SIZES,
73 Icon, Image, IndexTerm, IndexTermKind, InlineMacro, InlineNode, Italic, Keyboard, LineBreak,
74 Link, ListItem, ListItemCheckedStatus, Location, MAX_SECTION_LEVELS, MAX_TOC_LEVELS, Mailto,
75 Menu, Monospace, NORMAL, OrderedList, PageBreak, Paragraph, Pass, PassthroughKind, Plain,
76 Position, Raw, Role, Section, Source, SourceUrl, StandaloneCurvedApostrophe, Stem, StemContent,
77 StemNotation, Subscript, Substitution, SubstitutionOp, SubstitutionSpec, Subtitle, Superscript,
78 Table, TableColumn, TableOfContents, TableRow, ThematicBreak, Title, TocEntry,
79 UNNUMBERED_SECTION_STYLES, UnorderedList, Url, VERBATIM, Verbatim, VerticalAlignment, Video,
80 inlines_to_string, strip_quotes, substitute,
81};
82pub use options::{Options, OptionsBuilder, SafeMode};
83pub use parsed::{OwnedSource, ParseInlineResult, ParseResult};
84pub use warning::{Warning, WarningKind};
85
86#[derive(Debug)]
130pub struct Parser<'input> {
131 input: &'input str,
132 options: Options<'input>,
133}
134
135impl<'input> Parser<'input> {
136 #[must_use]
150 pub fn new(input: &'input str) -> Self {
151 Self {
152 input,
153 options: Options::default(),
154 }
155 }
156
157 #[must_use]
175 pub fn with_options(mut self, options: Options<'input>) -> Self {
176 self.options = options;
177 self
178 }
179
180 pub fn parse(self) -> Result<ParseResult, Error> {
195 parse(self.input, &self.options)
196 }
197
198 pub fn parse_inline(self) -> Result<ParseInlineResult, Error> {
216 parse_inline(self.input, &self.options)
217 }
218}
219
220#[instrument(skip(reader))]
240pub fn parse_from_reader<R: std::io::Read>(
241 reader: R,
242 options: &Options<'_>,
243) -> Result<ParseResult, Error> {
244 let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
247 let result = {
248 let _span = tracing::info_span!("preprocess").entered();
249 Preprocessor::process_reader(reader, options, Rc::clone(&warnings_handle))?
250 };
251 let text: Box<str> = result.text.into_owned().into_boxed_str();
252 let _span = tracing::info_span!("grammar_parse", input_len = text.len()).entered();
253 parse_input(
254 text,
255 options.clone(),
256 None,
257 result.leveloffset_ranges,
258 result.source_ranges,
259 warnings_handle,
260 )
261}
262
263#[instrument]
282pub fn parse(input: &str, options: &Options<'_>) -> Result<ParseResult, Error> {
283 let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
284 let result = {
285 let _span = tracing::info_span!("preprocess").entered();
286 Preprocessor::process(input, options, Rc::clone(&warnings_handle))?
287 };
288 let text: Box<str> = result.text.into_owned().into_boxed_str();
289 let _span = tracing::info_span!("grammar_parse", input_len = text.len()).entered();
290 parse_input(
291 text,
292 options.clone(),
293 None,
294 result.leveloffset_ranges,
295 result.source_ranges,
296 warnings_handle,
297 )
298}
299
300#[instrument(skip(file_path))]
320pub fn parse_file<P: AsRef<Path>>(
321 file_path: P,
322 options: &Options<'_>,
323) -> Result<ParseResult, Error> {
324 let path = file_path.as_ref().to_path_buf();
325 let raw = preprocessor::read_and_decode_file(file_path.as_ref(), None)?;
326 let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
327 let result = {
328 let _span = tracing::info_span!("preprocess").entered();
329 Preprocessor::process_with_file(
330 &raw,
331 file_path.as_ref(),
332 options,
333 Rc::clone(&warnings_handle),
334 )?
335 };
336 let text: Box<str> = result.text.into_owned().into_boxed_str();
337 let _span = tracing::info_span!("grammar_parse", input_len = text.len()).entered();
338 parse_input(
339 text,
340 options.clone(),
341 Some(path),
342 result.leveloffset_ranges,
343 result.source_ranges,
344 warnings_handle,
345 )
346}
347
348fn peg_error_to_source_location(
351 error: &peg::error::ParseError<peg::str::LineCol>,
352 state: &grammar::ParserState,
353) -> SourceLocation {
354 let offset = error.location.offset;
355 if let Some(range) = state
356 .source_ranges
357 .iter()
358 .rev()
359 .find(|r| r.contains(offset))
360 {
361 let line_in_file = state
362 .input
363 .get(range.start_offset..offset)
364 .map_or(0, |s| s.matches('\n').count());
365 SourceLocation {
366 file: Some(range.file.clone()),
367 positioning: Positioning::Position(Position {
368 line: range.start_line + line_in_file,
369 column: error.location.column,
370 }),
371 }
372 } else {
373 SourceLocation {
374 file: state.current_file.clone(),
375 positioning: Positioning::Position(Position {
376 line: error.location.line,
377 column: error.location.column,
378 }),
379 }
380 }
381}
382
383#[instrument(skip_all)]
384fn parse_input(
385 input: Box<str>,
386 options: Options<'_>,
387 file_path: Option<PathBuf>,
388 leveloffset_ranges: Vec<model::LeveloffsetRange>,
389 source_ranges: Vec<model::SourceRange>,
390 warnings_handle: Rc<RefCell<Vec<Warning>>>,
391) -> Result<ParseResult, Error> {
392 tracing::trace!(?input, "post preprocessor");
393 let owner = parsed::OwnedInput::new(input);
400 let options_owned = options.into_static();
401 let warnings_for_state = Rc::clone(&warnings_handle);
408
409 ParseResult::try_new(owner, warnings_handle, move |owner| {
410 let mut state = grammar::ParserState::new(&owner.source, &owner.arena);
411 state.document_attributes = Rc::new(options_owned.document_attributes.clone());
412 state.options = Rc::new(options_owned);
413 state.current_file = file_path;
414 state.leveloffset_ranges = leveloffset_ranges;
415 state.source_ranges = source_ranges;
416 state.warnings = warnings_for_state;
417 let result = match grammar::document_parser::document(&owner.source, &mut state) {
418 Ok(Ok(doc)) => Ok(doc),
419 Ok(Err(e)) => Err(e),
420 Err(error) => {
421 tracing::error!(?error, "error parsing document content");
422 let source_location = peg_error_to_source_location(&error, &state);
423 Err(Error::Parse(Box::new(source_location), error.to_string()))
424 }
425 };
426 state.emit_warnings();
427 result
428 })
429}
430
431#[instrument]
456pub fn parse_inline(input: &str, options: &Options<'_>) -> Result<ParseInlineResult, Error> {
457 tracing::trace!(?input, "post preprocessor");
458 let owner = parsed::OwnedInput::new(input.into());
459 let options_owned = options.clone().into_static();
460 let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
461 let warnings_for_state = Rc::clone(&warnings_handle);
462
463 ParseInlineResult::try_new(owner, warnings_handle, move |owner| {
464 let mut state = grammar::ParserState::new(&owner.source, &owner.arena);
465 state.document_attributes = Rc::new(options_owned.document_attributes.clone());
466 state.options = Rc::new(options_owned);
467 state.warnings = warnings_for_state;
468 let result = match grammar::inline_parser::inlines(&owner.source, &mut state) {
469 Ok(inlines) => Ok(inlines),
470 Err(error) => {
471 tracing::error!(?error, "error parsing inline content");
472 Err(Error::Parse(
473 Box::new(peg_error_to_source_location(&error, &state)),
474 error.to_string(),
475 ))
476 }
477 };
478 state.emit_warnings();
479 result
480 })
481}
482
483#[cfg(test)]
484mod proptests;
485
486#[cfg(test)]
487#[allow(clippy::unwrap_used)]
488#[allow(clippy::panic)]
489#[allow(clippy::expect_used)]
490mod tests {
491 use std::{fs, path::PathBuf};
492
493 use pretty_assertions::assert_eq;
494
495 use super::*;
496
497 fn read_file_contents_with_extension(path: &PathBuf, ext: &str) -> Result<String, Error> {
498 let test_file_path = path.with_extension(ext);
499 let file_contents = fs::read_to_string(&test_file_path).inspect_err(
500 |e| tracing::warn!(?path, ?test_file_path, error = %e, "test file not found"),
501 )?;
502 Ok(file_contents)
503 }
504
505 #[rstest::rstest]
506 #[tracing_test::traced_test]
507 fn test_with_fixtures(#[files("fixtures/tests/**/*.adoc")] path: PathBuf) -> Result<(), Error> {
508 let options = Options::builder().with_safe_mode(SafeMode::Unsafe).build();
509
510 match parse_file(&path, &options) {
511 Ok(result) => {
512 let expected = read_file_contents_with_extension(&path, "json")?;
513 let actual = serde_json::to_string_pretty(result.document())
514 .expect("could not serialize result");
515 assert_eq!(expected, actual);
516 }
517 Err(e) => {
518 let file_contents = read_file_contents_with_extension(&path, "error")?;
519 let expected = file_contents.trim();
521 assert_eq!(expected, e.to_string());
522 }
523 }
524 Ok(())
525 }
526
527 #[cfg(test)]
528 mod empty_document_tests {
529 use crate::{Options, parse};
530
531 #[test]
532 fn test_whitespace_only_documents() {
533 let test_cases = vec![
534 "\n", "\n\n", "\t", " \n\t\n ", " ",
535 "\n\n\t",
536 ];
537
538 for input in test_cases {
539 let options = Options::default();
540 let result = parse(input, &options);
541
542 match result {
543 Ok(parsed) => {
544 let doc = parsed.document();
545 assert!(
547 doc.location.absolute_start <= doc.location.absolute_end,
548 "Failed for input {input:?}: absolute_start {} > absolute_end {}",
549 doc.location.absolute_start,
550 doc.location.absolute_end
551 );
552
553 doc.location.validate(input).unwrap_or_else(|e| {
555 panic!("Location validation failed for {input:?}: {e}")
556 });
557 }
558 Err(e) => {
559 panic!("Failed to parse {input:?}: {e}");
560 }
561 }
562 }
563 }
564
565 #[test]
566 fn test_document_with_content_after_whitespace() {
567 let test_cases = vec!["\n\nHello", "\t\tWorld", " \n = Title"];
568
569 for input in test_cases {
570 let options = Options::default();
571 let parsed =
572 parse(input, &options).unwrap_or_else(|_| panic!("Should parse {input:?}"));
573 let doc = parsed.document();
574
575 assert!(
576 doc.location.absolute_start <= doc.location.absolute_end,
577 "Failed for input {input:?}: absolute_start {} > absolute_end {}",
578 doc.location.absolute_start,
579 doc.location.absolute_end
580 );
581
582 doc.location
584 .validate(input)
585 .unwrap_or_else(|e| panic!("Location validation failed for {input:?}: {e}"));
586 }
587 }
588
589 #[test]
590 fn test_unicode_characters() {
591 let test_cases = vec![
593 "π", "Χ", "Hello δΈη", "\u{200b}", ];
598
599 for input in test_cases {
600 let options = Options::default();
601 let result = parse(input, &options);
602
603 match result {
604 Ok(parsed) => {
605 let doc = parsed.document();
606 assert!(
608 input.is_char_boundary(doc.location.absolute_start),
609 "Absolute start {} not on UTF-8 boundary for {input:?}",
610 doc.location.absolute_start,
611 );
612 assert!(
613 input.is_char_boundary(doc.location.absolute_end),
614 "Absolute end {} not on UTF-8 boundary for {input:?}",
615 doc.location.absolute_end,
616 );
617
618 doc.location.validate(input).unwrap_or_else(|e| {
620 panic!("Location validation failed for {input:?}: {e}");
621 });
622 }
623 Err(e) => {
624 println!("Failed to parse {input:?}: {e} (this might be expected)");
627 }
628 }
629 }
630 }
631 }
632
633 mod warning_deduplication_tests {
640 use crate::{Options, parse};
641
642 #[test]
643 #[tracing_test::traced_test]
644 fn counter_reference_peg_backtracking_does_not_duplicate() {
645 let input = "= Title\n\n{counter:hits} then {counter:hits} again";
649 let options = Options::default();
650 let result = parse(input, &options).expect("should parse");
651 let counter_warnings = result
652 .warnings()
653 .iter()
654 .filter(|w| {
655 w.kind
656 .to_string()
657 .contains("not supported and will be removed")
658 })
659 .count();
660 assert_eq!(
661 counter_warnings,
662 2,
663 "expected 2 counter warnings (one per position), got {counter_warnings}: {:?}",
664 result.warnings(),
665 );
666 assert!(
668 result
669 .warnings()
670 .iter()
671 .all(|w| w.source_location().is_some()),
672 "counter warnings must carry locations",
673 );
674 }
675
676 #[test]
677 #[tracing_test::traced_test]
678 fn distinct_warnings_all_emitted() {
679 let input = "= Title\n\n{counter:a} and {counter2:b}";
681 let options = Options::default();
682 let _doc = parse(input, &options).expect("should parse");
683 assert!(logs_contain(
684 "Counters ({counter:a}) are not supported and will be removed from output"
685 ));
686 assert!(logs_contain(
687 "Counters ({counter2:b}) are not supported and will be removed from output"
688 ));
689 }
690 }
691
692 mod parse_result_tests {
693 use crate::{Options, WarningKind, parse, parse_file};
694
695 #[test]
701 fn missing_include_warning_surfaces_on_parse_result() {
702 use std::io::Write;
703 let tmp = std::env::temp_dir().join("acdc_test_missing_include.adoc");
705 let mut f = std::fs::File::create(&tmp).expect("create tmp");
706 writeln!(
707 f,
708 "= Doc Title\n\ninclude::definitely-missing-{}.adoc[]\n",
709 std::process::id()
710 )
711 .expect("write tmp");
712 drop(f);
713
714 let options = Options::default();
715 let result = parse_file(&tmp, &options).expect("should parse");
716 let _ = std::fs::remove_file(&tmp);
717
718 let has_missing_include = result
719 .warnings()
720 .iter()
721 .any(|w| w.kind.to_string().contains("file is missing"));
722 assert!(
723 has_missing_include,
724 "expected missing-include warning, got: {:?}",
725 result.warnings(),
726 );
727 }
728
729 #[test]
733 fn section_level_out_of_sequence_surfaces_on_parse_result() {
734 let input = "= Doc Title\n\n=== Starts at level 2\n\nContent\n";
735 let options = Options::default();
736 let result = parse(input, &options).expect("document should parse");
737
738 assert_eq!(
739 result.warnings().len(),
740 1,
741 "expected exactly one warning, got: {:?}",
742 result.warnings(),
743 );
744 let warning = result.warnings().first().expect("asserted non-empty");
745 assert!(
746 matches!(
747 &warning.kind,
748 WarningKind::SectionLevelOutOfSequence { got: 2, .. },
749 ),
750 "unexpected warning kind: {:?}",
751 warning.kind,
752 );
753 assert!(
754 warning.source_location().is_some(),
755 "warning should carry a source location",
756 );
757 }
758
759 #[test]
762 fn valid_document_has_no_warnings() {
763 let input = "= Doc Title\n\n== First\n\nContent\n";
764 let options = Options::default();
765 let result = parse(input, &options).expect("document should parse");
766 assert!(
767 result.warnings().is_empty(),
768 "expected no warnings, got: {:?}",
769 result.warnings(),
770 );
771 }
772 }
773
774 mod attribute_resolution_tests {
775 use std::borrow::Cow;
776
777 use crate::{AttributeValue, Options, parse};
778
779 #[test]
780 fn test_definition_time_resolution_bar_defined_first() {
781 let input = r":bar: resolved-bar
783:foo: {bar}
784
785{foo}
786";
787 let options = Options::default();
788 let parsed = parse(input, &options).expect("should parse");
789 let doc = parsed.document();
790
791 assert_eq!(
793 doc.attributes.get("foo"),
794 Some(&AttributeValue::String(Cow::Borrowed("resolved-bar")))
795 );
796 }
797
798 #[test]
799 fn test_definition_time_resolution_bar_defined_after() {
800 let input = r":foo: {bar}
802:bar: resolved-bar
803
804{foo}
805";
806 let options = Options::default();
807 let parsed = parse(input, &options).expect("should parse");
808 let doc = parsed.document();
809
810 assert_eq!(
812 doc.attributes.get("foo"),
813 Some(&AttributeValue::String(Cow::Borrowed("{bar}")))
814 );
815 }
816
817 #[test]
818 fn test_chained_attribute_resolution() {
819 let input = r":c: final-value
822:b: {c}
823:a: {b}
824
825{a}
826";
827 let options = Options::default();
828 let parsed = parse(input, &options).expect("should parse");
829 let doc = parsed.document();
830
831 assert_eq!(
833 doc.attributes.get("c"),
834 Some(&AttributeValue::String(Cow::Borrowed("final-value")))
835 );
836 assert_eq!(
837 doc.attributes.get("b"),
838 Some(&AttributeValue::String(Cow::Borrowed("final-value")))
839 );
840 assert_eq!(
841 doc.attributes.get("a"),
842 Some(&AttributeValue::String(Cow::Borrowed("final-value")))
843 );
844 }
845 }
846}