1#![deny(clippy::pedantic)]
2#![warn(clippy::all)]
3use std::{
33 path::{Path, PathBuf},
34 string::ToString,
35};
36
37use tracing::instrument;
38
39mod blocks;
40mod constants;
41mod error;
42pub(crate) mod grammar;
43mod model;
44mod options;
45mod preprocessor;
46mod safe_mode;
47
48pub(crate) use grammar::{InlinePreprocessorParserState, ProcessedContent, inline_preprocessing};
49use preprocessor::Preprocessor;
50
51pub use error::{Error, Positioning, SourceLocation};
52pub use grammar::parse_text_for_quotes;
53pub use model::{
54 Admonition, AdmonitionVariant, Anchor, AttributeName, AttributeValue, Attribution, Audio,
55 Author, Autolink, Block, BlockMetadata, Bold, Button, CalloutList, CalloutListItem, CalloutRef,
56 CalloutRefKind, CiteTitle, ColumnFormat, ColumnStyle, ColumnWidth, Comment, CrossReference,
57 CurvedApostrophe, CurvedQuotation, DelimitedBlock, DelimitedBlockType, DescriptionList,
58 DescriptionListItem, DiscreteHeader, Document, DocumentAttribute, DocumentAttributes,
59 ElementAttributes, Footnote, Form, HEADER, Header, Highlight, HorizontalAlignment, ICON_SIZES,
60 Icon, Image, IndexTerm, IndexTermKind, InlineMacro, InlineNode, Italic, Keyboard, LineBreak,
61 Link, ListItem, ListItemCheckedStatus, Location, MAX_SECTION_LEVELS, MAX_TOC_LEVELS, Mailto,
62 Menu, Monospace, NORMAL, OrderedList, PageBreak, Paragraph, Pass, PassthroughKind, Plain,
63 Position, Raw, Role, Section, Source, SourceUrl, StandaloneCurvedApostrophe, Stem, StemContent,
64 StemNotation, Subscript, Substitution, SubstitutionOp, SubstitutionSpec, Subtitle, Superscript,
65 Table, TableColumn, TableOfContents, TableRow, ThematicBreak, Title, TocEntry,
66 UNNUMBERED_SECTION_STYLES, UnorderedList, Url, VERBATIM, Verbatim, VerticalAlignment, Video,
67 inlines_to_string, substitute,
68};
69pub use options::{Options, OptionsBuilder, SafeMode};
70
71#[derive(Debug)]
115pub struct Parser<'input> {
116 input: &'input str,
117 options: Options,
118}
119
120impl<'input> Parser<'input> {
121 #[must_use]
135 pub fn new(input: &'input str) -> Self {
136 Self {
137 input,
138 options: Options::default(),
139 }
140 }
141
142 #[must_use]
160 pub fn with_options(mut self, options: Options) -> Self {
161 self.options = options;
162 self
163 }
164
165 pub fn parse(self) -> Result<Document, Error> {
180 parse(self.input, &self.options)
181 }
182
183 pub fn parse_inline(self) -> Result<Vec<InlineNode>, Error> {
201 parse_inline(self.input, &self.options)
202 }
203}
204
205#[instrument(skip(reader))]
225pub fn parse_from_reader<R: std::io::Read>(
226 reader: R,
227 options: &Options,
228) -> Result<Document, Error> {
229 let result = Preprocessor.process_reader(reader, options)?;
230 parse_input(
231 &result.text,
232 options,
233 None,
234 result.leveloffset_ranges,
235 result.source_ranges,
236 )
237}
238
239#[instrument]
258pub fn parse(input: &str, options: &Options) -> Result<Document, Error> {
259 let result = Preprocessor.process(input, options)?;
260 parse_input(
261 &result.text,
262 options,
263 None,
264 result.leveloffset_ranges,
265 result.source_ranges,
266 )
267}
268
269#[instrument(skip(file_path))]
289pub fn parse_file<P: AsRef<Path>>(file_path: P, options: &Options) -> Result<Document, Error> {
290 let path = file_path.as_ref().to_path_buf();
291 let result = Preprocessor.process_file(file_path, options)?;
292 parse_input(
293 &result.text,
294 options,
295 Some(path),
296 result.leveloffset_ranges,
297 result.source_ranges,
298 )
299}
300
301fn peg_error_to_source_location(
304 error: &peg::error::ParseError<peg::str::LineCol>,
305 state: &grammar::ParserState,
306) -> SourceLocation {
307 let offset = error.location.offset;
308 if let Some(range) = state
309 .source_ranges
310 .iter()
311 .rev()
312 .find(|r| r.contains(offset))
313 {
314 let line_in_file = state
315 .input
316 .get(range.start_offset..offset)
317 .map_or(0, |s| s.matches('\n').count());
318 SourceLocation {
319 file: Some(range.file.clone()),
320 positioning: Positioning::Position(Position {
321 line: range.start_line + line_in_file,
322 column: error.location.column,
323 }),
324 }
325 } else {
326 SourceLocation {
327 file: state.current_file.clone(),
328 positioning: Positioning::Position(Position {
329 line: error.location.line,
330 column: error.location.column,
331 }),
332 }
333 }
334}
335
336#[instrument]
337fn parse_input(
338 input: &str,
339 options: &Options,
340 file_path: Option<PathBuf>,
341 leveloffset_ranges: Vec<model::LeveloffsetRange>,
342 source_ranges: Vec<model::SourceRange>,
343) -> Result<Document, Error> {
344 tracing::trace!(?input, "post preprocessor");
345 let mut state = grammar::ParserState::new(input);
346 state.document_attributes = options.document_attributes.clone();
347 state.options = options.clone();
348 state.current_file = file_path;
349 state.leveloffset_ranges = leveloffset_ranges;
350 state.source_ranges = source_ranges;
351 let result = match grammar::document_parser::document(input, &mut state) {
352 Ok(doc) => doc,
353 Err(error) => {
354 tracing::error!(?error, "error parsing document content");
355 let source_location = peg_error_to_source_location(&error, &state);
356 Err(Error::Parse(Box::new(source_location), error.to_string()))
357 }
358 };
359 state.emit_warnings();
360 result
361}
362
363#[instrument]
388pub fn parse_inline(input: &str, options: &Options) -> Result<Vec<InlineNode>, Error> {
389 tracing::trace!(?input, "post preprocessor");
390 let mut state = grammar::ParserState::new(input);
391 state.document_attributes = options.document_attributes.clone();
392 state.options = options.clone();
393 let result = match grammar::document_parser::inlines(
394 input,
395 &mut state,
396 0,
397 &grammar::BlockParsingMetadata::default(),
398 ) {
399 Ok(inlines) => Ok(inlines),
400 Err(error) => {
401 tracing::error!(?error, "error parsing inline content");
402 Err(Error::Parse(
403 Box::new(peg_error_to_source_location(&error, &state)),
404 error.to_string(),
405 ))
406 }
407 };
408 state.emit_warnings();
409 result
410}
411
412#[cfg(test)]
413mod proptests;
414
415#[cfg(test)]
416#[allow(clippy::unwrap_used)]
417#[allow(clippy::panic)]
418#[allow(clippy::expect_used)]
419mod tests {
420 use super::*;
421 use pretty_assertions::assert_eq;
422
423 fn read_file_contents_with_extension(
424 path: &std::path::PathBuf,
425 ext: &str,
426 ) -> Result<String, Error> {
427 let test_file_path = path.with_extension(ext);
428 let file_contents = std::fs::read_to_string(&test_file_path).inspect_err(
429 |e| tracing::warn!(?path, ?test_file_path, error = %e, "test file not found"),
430 )?;
431 Ok(file_contents)
432 }
433
434 #[rstest::rstest]
435 #[tracing_test::traced_test]
436 fn test_with_fixtures(
437 #[files("fixtures/tests/**/*.adoc")] path: std::path::PathBuf,
438 ) -> Result<(), Error> {
439 let options = Options::builder().with_safe_mode(SafeMode::Unsafe).build();
440
441 match parse_file(&path, &options) {
442 Ok(result) => {
443 let expected = read_file_contents_with_extension(&path, "json")?;
444 let actual =
445 serde_json::to_string_pretty(&result).expect("could not serialize result");
446 assert_eq!(expected, actual);
447 }
448 Err(e) => {
449 let file_contents = read_file_contents_with_extension(&path, "error")?;
450 let expected = file_contents.trim();
452 assert_eq!(expected, e.to_string());
453 }
454 }
455 Ok(())
456 }
457
458 #[cfg(test)]
459 mod empty_document_tests {
460 use crate::{Options, parse};
461
462 #[test]
463 fn test_whitespace_only_documents() {
464 let test_cases = vec![
465 "\n", "\n\n", "\t", " \n\t\n ", " ",
466 "\n\n\t",
467 ];
468
469 for input in test_cases {
470 let options = Options::default();
471 let result = parse(input, &options);
472
473 match result {
474 Ok(doc) => {
475 assert!(
477 doc.location.absolute_start <= doc.location.absolute_end,
478 "Failed for input {input:?}: absolute_start {} > absolute_end {}",
479 doc.location.absolute_start,
480 doc.location.absolute_end
481 );
482
483 doc.location.validate(input).unwrap_or_else(|e| {
485 panic!("Location validation failed for {input:?}: {e}")
486 });
487 }
488 Err(e) => {
489 panic!("Failed to parse {input:?}: {e}");
490 }
491 }
492 }
493 }
494
495 #[test]
496 fn test_document_with_content_after_whitespace() {
497 let test_cases = vec!["\n\nHello", "\t\tWorld", " \n = Title"];
498
499 for input in test_cases {
500 let options = Options::default();
501 let doc =
502 parse(input, &options).unwrap_or_else(|_| panic!("Should parse {input:?}"));
503
504 assert!(
505 doc.location.absolute_start <= doc.location.absolute_end,
506 "Failed for input {input:?}: absolute_start {} > absolute_end {}",
507 doc.location.absolute_start,
508 doc.location.absolute_end
509 );
510
511 doc.location
513 .validate(input)
514 .unwrap_or_else(|e| panic!("Location validation failed for {input:?}: {e}"));
515 }
516 }
517
518 #[test]
519 fn test_unicode_characters() {
520 let test_cases = vec![
522 "π", "Χ", "Hello δΈη", "\u{200b}", ];
527
528 for input in test_cases {
529 let options = Options::default();
530 let result = parse(input, &options);
531
532 match result {
533 Ok(doc) => {
534 assert!(
536 input.is_char_boundary(doc.location.absolute_start),
537 "Absolute start {} not on UTF-8 boundary for {input:?}",
538 doc.location.absolute_start,
539 );
540 assert!(
541 input.is_char_boundary(doc.location.absolute_end),
542 "Absolute end {} not on UTF-8 boundary for {input:?}",
543 doc.location.absolute_end,
544 );
545
546 doc.location.validate(input).unwrap_or_else(|e| {
548 panic!("Location validation failed for {input:?}: {e}");
549 });
550 }
551 Err(e) => {
552 println!("Failed to parse {input:?}: {e} (this might be expected)",);
555 }
556 }
557 }
558 }
559 }
560
561 mod warning_deduplication_tests {
568 use crate::{Options, parse};
569
570 #[test]
571 #[tracing_test::traced_test]
572 fn counter_reference_emits_single_warning() {
573 let input = "= Title\n\n{counter:hits} then {counter:hits} again";
576 let options = Options::default();
577 let _doc = parse(input, &options).expect("should parse");
578 assert!(logs_contain("Counters"));
579 logs_assert(|lines: &[&str]| {
580 let count = lines
581 .iter()
582 .filter(|l| l.contains("not supported and will be removed"))
583 .count();
584 if count == 1 {
585 Ok(())
586 } else {
587 Err(format!("expected exactly 1 counter warning, got {count}"))
588 }
589 });
590 }
591
592 #[test]
593 #[tracing_test::traced_test]
594 fn distinct_warnings_all_emitted() {
595 let input = "= Title\n\n{counter:a} and {counter2:b}";
597 let options = Options::default();
598 let _doc = parse(input, &options).expect("should parse");
599 assert!(logs_contain(
600 "Counters ({counter:a}) are not supported and will be removed from output"
601 ));
602 assert!(logs_contain(
603 "Counters ({counter2:b}) are not supported and will be removed from output"
604 ));
605 }
606 }
607
608 mod attribute_resolution_tests {
609 use crate::{AttributeValue, Options, parse};
610
611 #[test]
612 fn test_definition_time_resolution_bar_defined_first() {
613 let input = r":bar: resolved-bar
615:foo: {bar}
616
617{foo}
618";
619 let options = Options::default();
620 let doc = parse(input, &options).expect("should parse");
621
622 assert_eq!(
624 doc.attributes.get("foo"),
625 Some(&AttributeValue::String("resolved-bar".to_string()))
626 );
627 }
628
629 #[test]
630 fn test_definition_time_resolution_bar_defined_after() {
631 let input = r":foo: {bar}
633:bar: resolved-bar
634
635{foo}
636";
637 let options = Options::default();
638 let doc = parse(input, &options).expect("should parse");
639
640 assert_eq!(
642 doc.attributes.get("foo"),
643 Some(&AttributeValue::String("{bar}".to_string()))
644 );
645 }
646
647 #[test]
648 fn test_chained_attribute_resolution() {
649 let input = r":c: final-value
652:b: {c}
653:a: {b}
654
655{a}
656";
657 let options = Options::default();
658 let doc = parse(input, &options).expect("should parse");
659
660 assert_eq!(
662 doc.attributes.get("c"),
663 Some(&AttributeValue::String("final-value".to_string()))
664 );
665 assert_eq!(
666 doc.attributes.get("b"),
667 Some(&AttributeValue::String("final-value".to_string()))
668 );
669 assert_eq!(
670 doc.attributes.get("a"),
671 Some(&AttributeValue::String("final-value".to_string()))
672 );
673 }
674 }
675}