markdown_ppp/parser/mod.rs
1//! Markdown parser for CommonMark + GitHub Flavored Markdown (GFM)
2//!
3//! This module provides a comprehensive parser for Markdown documents following the
4//! CommonMark specification with GitHub Flavored Markdown extensions. The parser
5//! converts raw Markdown text into a fully-typed Abstract Syntax Tree (AST).
6//!
7//! # Features
8//!
9//! - **CommonMark compliance**: Full support for CommonMark 1.0 specification
10//! - **GitHub extensions**: Tables, task lists, strikethrough, autolinks, footnotes, alerts
11//! - **Configurable parsing**: Control which elements to parse, skip, or transform
12//! - **Custom parsers**: Register custom block and inline element parsers
13//! - **Error handling**: Comprehensive error reporting with nom-based parsing
14//!
15//! # Basic Usage
16//!
17//! ```rust
18//! use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
19//!
20//! let state = MarkdownParserState::new();
21//! let input = "# Hello World\n\nThis is **bold** text.";
22//!
23//! match parse_markdown(state, input) {
24//! Ok(document) => {
25//! println!("Parsed {} blocks", document.blocks.len());
26//! }
27//! Err(err) => {
28//! eprintln!("Parse error: {:?}", err);
29//! }
30//! }
31//! ```
32//!
33//! # Configuration
34//!
35//! The parser behavior can be extensively customized using configuration:
36//!
37//! ```rust
38//! use markdown_ppp::parser::config::*;
39//!
40//! let config = MarkdownParserConfig::default()
41//! .with_block_thematic_break_behavior(ElementBehavior::Skip)
42//! .with_inline_emphasis_behavior(ElementBehavior::Parse);
43//!
44//! let state = MarkdownParserState::with_config(config);
45//! ```
46
47mod blocks;
48
49/// Configuration options for Markdown parsing behavior.
50pub mod config;
51mod inline;
52mod link_util;
53mod util;
54
55use crate::ast::Document;
56use crate::parser::config::MarkdownParserConfig;
57use nom::{
58 branch::alt,
59 character::complete::{line_ending, space1},
60 combinator::eof,
61 multi::many0,
62 sequence::terminated,
63 Parser,
64};
65use std::rc::Rc;
66
67/// Parser state containing configuration and shared context
68///
69/// This structure holds the parser configuration and provides shared state
70/// during the parsing process. It's designed to be cloned cheaply using
71/// reference counting for the configuration.
72///
73/// # Examples
74///
75/// ```rust
76/// use markdown_ppp::parser::{MarkdownParserState, config::MarkdownParserConfig};
77///
78/// // Create with default configuration
79/// let state = MarkdownParserState::new();
80///
81/// // Create with custom configuration
82/// let config = MarkdownParserConfig::default();
83/// let state = MarkdownParserState::with_config(config);
84/// ```
85pub struct MarkdownParserState {
86 /// The parser configuration (reference-counted for efficient cloning)
87 pub config: Rc<MarkdownParserConfig>,
88}
89
90impl MarkdownParserState {
91 /// Create a new parser state with default configuration
92 ///
93 /// # Examples
94 ///
95 /// ```rust
96 /// use markdown_ppp::parser::MarkdownParserState;
97 ///
98 /// let state = MarkdownParserState::new();
99 /// ```
100 pub fn new() -> Self {
101 Self::default()
102 }
103
104 /// Create a new parser state with the given configuration
105 ///
106 /// # Arguments
107 ///
108 /// * `config` - The parser configuration to use
109 ///
110 /// # Examples
111 ///
112 /// ```rust
113 /// use markdown_ppp::parser::{MarkdownParserState, config::MarkdownParserConfig};
114 ///
115 /// let config = MarkdownParserConfig::default();
116 /// let state = MarkdownParserState::with_config(config);
117 /// ```
118 pub fn with_config(config: MarkdownParserConfig) -> Self {
119 Self {
120 config: Rc::new(config),
121 }
122 }
123}
124
125impl Default for MarkdownParserState {
126 fn default() -> Self {
127 Self::with_config(MarkdownParserConfig::default())
128 }
129}
130
131/// Parse a Markdown string into an Abstract Syntax Tree (AST)
132///
133/// This is the main entry point for parsing Markdown text. It processes the input
134/// according to the CommonMark specification with GitHub Flavored Markdown extensions,
135/// returning a fully-typed AST that can be manipulated, analyzed, or rendered.
136///
137/// # Arguments
138///
139/// * `state` - Parser state containing configuration options
140/// * `input` - The Markdown text to parse
141///
142/// # Returns
143///
144/// Returns a `Result` containing either:
145/// - `Ok(Document)` - Successfully parsed AST document
146/// - `Err(nom::Err)` - Parse error with position and context information
147///
148/// # Examples
149///
150/// Basic parsing:
151/// ```rust
152/// use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
153///
154/// let state = MarkdownParserState::new();
155/// let result = parse_markdown(state, "# Hello\n\nWorld!");
156///
157/// match result {
158/// Ok(doc) => println!("Parsed {} blocks", doc.blocks.len()),
159/// Err(e) => eprintln!("Parse error: {:?}", e),
160/// }
161/// ```
162///
163/// With custom configuration:
164/// ```rust
165/// use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
166/// use markdown_ppp::parser::config::*;
167///
168/// let config = MarkdownParserConfig::default()
169/// .with_block_thematic_break_behavior(ElementBehavior::Skip);
170/// let state = MarkdownParserState::with_config(config);
171///
172/// let doc = parse_markdown(state, "---\n\nContent").unwrap();
173/// ```
174///
175/// # Errors
176///
177/// Returns a parse error if the input contains invalid Markdown syntax
178/// that cannot be recovered from. Most malformed Markdown is handled
179/// gracefully according to CommonMark's error handling rules.
180pub fn parse_markdown(
181 state: MarkdownParserState,
182 input: &str,
183) -> Result<Document, nom::Err<nom::error::Error<&str>>> {
184 let empty_lines = many0(alt((space1, line_ending)));
185 let mut parser = terminated(
186 many0(crate::parser::blocks::block(Rc::new(state))),
187 (empty_lines, eof),
188 );
189 let (_, blocks) = parser.parse(input)?;
190
191 let blocks = blocks.into_iter().flatten().collect();
192
193 Ok(Document { blocks })
194}