markdown_ppp/parser/
mod.rs

1//! Markdown parser for CommonMark + GitHub Flavored Markdown (GFM)
2//!
3//! This module provides a comprehensive parser for Markdown documents following the
4//! CommonMark specification with GitHub Flavored Markdown extensions. The parser
5//! converts raw Markdown text into a fully-typed Abstract Syntax Tree (AST).
6//!
7//! # Features
8//!
9//! - **CommonMark compliance**: Full support for CommonMark 1.0 specification
10//! - **GitHub extensions**: Tables, task lists, strikethrough, autolinks, footnotes, alerts
11//! - **Configurable parsing**: Control which elements to parse, skip, or transform
12//! - **Custom parsers**: Register custom block and inline element parsers
13//! - **Error handling**: Comprehensive error reporting with nom-based parsing
14//!
15//! # Basic Usage
16//!
17//! ```rust
18//! use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
19//!
20//! let state = MarkdownParserState::new();
21//! let input = "# Hello World\n\nThis is **bold** text.";
22//!
23//! match parse_markdown(state, input) {
24//!     Ok(document) => {
25//!         println!("Parsed {} blocks", document.blocks.len());
26//!     }
27//!     Err(err) => {
28//!         eprintln!("Parse error: {:?}", err);
29//!     }
30//! }
31//! ```
32//!
33//! # Configuration
34//!
35//! The parser behavior can be extensively customized using configuration:
36//!
37//! ```rust
38//! use markdown_ppp::parser::{MarkdownParserState, config::*};
39//!
40//! let config = MarkdownParserConfig::default()
41//!     .with_block_thematic_break_behavior(ElementBehavior::Skip)
42//!     .with_inline_emphasis_behavior(ElementBehavior::Parse);
43//!
44//! let state = MarkdownParserState::with_config(config);
45//! ```
46
47mod blocks;
48
49/// Configuration options for Markdown parsing behavior.
50pub mod config;
51mod inline;
52mod link_util;
53mod util;
54
55use crate::ast::Document;
56use crate::parser::config::MarkdownParserConfig;
57use nom::{
58    branch::alt,
59    character::complete::{line_ending, space1},
60    combinator::eof,
61    multi::many0,
62    sequence::terminated,
63    Parser,
64};
65use std::rc::Rc;
66
67/// Parser state containing configuration and shared context
68///
69/// This structure holds the parser configuration and provides shared state
70/// during the parsing process. It's designed to be cloned cheaply using
71/// reference counting for the configuration.
72///
73/// # Examples
74///
75/// ```rust
76/// use markdown_ppp::parser::{MarkdownParserState, config::MarkdownParserConfig};
77///
78/// // Create with default configuration
79/// let state = MarkdownParserState::new();
80///
81/// // Create with custom configuration
82/// let config = MarkdownParserConfig::default();
83/// let state = MarkdownParserState::with_config(config);
84/// ```
85/// Note: This struct is marked `#[non_exhaustive]` to allow adding new fields
86/// in future versions without breaking existing code.
87#[non_exhaustive]
88pub struct MarkdownParserState {
89    /// The parser configuration (reference-counted for efficient cloning)
90    pub config: Rc<MarkdownParserConfig>,
91    /// Whether we are parsing content extracted from a container block (list item, blockquote, etc.)
92    /// When true, fenced code blocks should not strip additional indentation from their content.
93    /// This field is for internal use only.
94    pub(crate) is_nested_block_context: bool,
95}
96
97impl MarkdownParserState {
98    /// Create a new parser state with default configuration
99    ///
100    /// # Examples
101    ///
102    /// ```rust
103    /// use markdown_ppp::parser::MarkdownParserState;
104    ///
105    /// let state = MarkdownParserState::new();
106    /// ```
107    pub fn new() -> Self {
108        Self::default()
109    }
110
111    /// Create a new parser state with the given configuration
112    ///
113    /// # Arguments
114    ///
115    /// * `config` - The parser configuration to use
116    ///
117    /// # Examples
118    ///
119    /// ```rust
120    /// use markdown_ppp::parser::{MarkdownParserState, config::MarkdownParserConfig};
121    ///
122    /// let config = MarkdownParserConfig::default();
123    /// let state = MarkdownParserState::with_config(config);
124    /// ```
125    pub fn with_config(config: MarkdownParserConfig) -> Self {
126        Self {
127            config: Rc::new(config),
128            is_nested_block_context: false,
129        }
130    }
131
132    /// Create a nested parser state for parsing content extracted from container blocks
133    ///
134    /// This method creates a new state that shares the same configuration but marks
135    /// the parsing context as nested. This prevents double-stripping of indentation
136    /// when parsing fenced code blocks inside list items, blockquotes, etc.
137    pub(crate) fn nested(&self) -> Self {
138        Self {
139            config: self.config.clone(),
140            is_nested_block_context: true,
141        }
142    }
143}
144
145impl Default for MarkdownParserState {
146    fn default() -> Self {
147        Self::with_config(MarkdownParserConfig::default())
148    }
149}
150
151/// Parse a Markdown string into an Abstract Syntax Tree (AST)
152///
153/// This is the main entry point for parsing Markdown text. It processes the input
154/// according to the CommonMark specification with GitHub Flavored Markdown extensions,
155/// returning a fully-typed AST that can be manipulated, analyzed, or rendered.
156///
157/// # Arguments
158///
159/// * `state` - Parser state containing configuration options
160/// * `input` - The Markdown text to parse
161///
162/// # Returns
163///
164/// Returns a `Result` containing either:
165/// - `Ok(Document)` - Successfully parsed AST document
166/// - `Err(nom::Err)` - Parse error with position and context information
167///
168/// # Examples
169///
170/// Basic parsing:
171/// ```rust
172/// use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
173///
174/// let state = MarkdownParserState::new();
175/// let result = parse_markdown(state, "# Hello\n\nWorld!");
176///
177/// match result {
178///     Ok(doc) => println!("Parsed {} blocks", doc.blocks.len()),
179///     Err(e) => eprintln!("Parse error: {:?}", e),
180/// }
181/// ```
182///
183/// With custom configuration:
184/// ```rust
185/// use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
186/// use markdown_ppp::parser::config::*;
187///
188/// let config = MarkdownParserConfig::default()
189///     .with_block_thematic_break_behavior(ElementBehavior::Skip);
190/// let state = MarkdownParserState::with_config(config);
191///
192/// let doc = parse_markdown(state, "---\n\nContent").unwrap();
193/// ```
194///
195/// # Errors
196///
197/// Returns a parse error if the input contains invalid Markdown syntax
198/// that cannot be recovered from. Most malformed Markdown is handled
199/// gracefully according to CommonMark's error handling rules.
200pub fn parse_markdown(
201    state: MarkdownParserState,
202    input: &str,
203) -> Result<Document, nom::Err<nom::error::Error<&str>>> {
204    let empty_lines = many0(alt((space1, line_ending)));
205    let mut parser = terminated(
206        many0(crate::parser::blocks::block(Rc::new(state))),
207        (empty_lines, eof),
208    );
209    let (_, blocks) = parser.parse(input)?;
210
211    let blocks = blocks.into_iter().flatten().collect();
212
213    Ok(Document { blocks })
214}