markdown_ppp/parser/
mod.rs

1//! Markdown parser for CommonMark + GitHub Flavored Markdown (GFM)
2//!
3//! This module provides a comprehensive parser for Markdown documents following the
4//! CommonMark specification with GitHub Flavored Markdown extensions. The parser
5//! converts raw Markdown text into a fully-typed Abstract Syntax Tree (AST).
6//!
7//! # Features
8//!
9//! - **CommonMark compliance**: Full support for CommonMark 1.0 specification
10//! - **GitHub extensions**: Tables, task lists, strikethrough, autolinks, footnotes, alerts
11//! - **Configurable parsing**: Control which elements to parse, skip, or transform
12//! - **Custom parsers**: Register custom block and inline element parsers
13//! - **Error handling**: Comprehensive error reporting with nom-based parsing
14//!
15//! # Basic Usage
16//!
17//! ```rust
18//! use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
19//!
20//! let state = MarkdownParserState::new();
21//! let input = "# Hello World\n\nThis is **bold** text.";
22//!
23//! match parse_markdown(state, input) {
24//!     Ok(document) => {
25//!         println!("Parsed {} blocks", document.blocks.len());
26//!     }
27//!     Err(err) => {
28//!         eprintln!("Parse error: {:?}", err);
29//!     }
30//! }
31//! ```
32//!
33//! # Configuration
34//!
35//! The parser behavior can be extensively customized using [`MarkdownParserConfig`]:
36//!
37//! ```rust
38//! use markdown_ppp::parser::config::*;
39//!
40//! let config = MarkdownParserConfig::default()
41//!     .with_block_thematic_break_behavior(ElementBehavior::Skip)
42//!     .with_inline_emphasis_behavior(ElementBehavior::Parse);
43//!
44//! let state = MarkdownParserState::with_config(config);
45//! ```
46
47mod blocks;
48pub mod config;
49mod inline;
50mod link_util;
51mod util;
52
53use crate::ast::Document;
54use crate::parser::config::MarkdownParserConfig;
55use nom::{
56    branch::alt,
57    character::complete::{line_ending, space1},
58    combinator::eof,
59    multi::many0,
60    sequence::terminated,
61    Parser,
62};
63use std::rc::Rc;
64
65/// Parser state containing configuration and shared context
66///
67/// This structure holds the parser configuration and provides shared state
68/// during the parsing process. It's designed to be cloned cheaply using
69/// reference counting for the configuration.
70///
71/// # Examples
72///
73/// ```rust
74/// use markdown_ppp::parser::{MarkdownParserState, config::MarkdownParserConfig};
75///
76/// // Create with default configuration
77/// let state = MarkdownParserState::new();
78///
79/// // Create with custom configuration
80/// let config = MarkdownParserConfig::default();
81/// let state = MarkdownParserState::with_config(config);
82/// ```
83pub struct MarkdownParserState {
84    /// The parser configuration (reference-counted for efficient cloning)
85    pub config: Rc<MarkdownParserConfig>,
86}
87
88impl MarkdownParserState {
89    /// Create a new parser state with default configuration
90    ///
91    /// # Examples
92    ///
93    /// ```rust
94    /// use markdown_ppp::parser::MarkdownParserState;
95    ///
96    /// let state = MarkdownParserState::new();
97    /// ```
98    pub fn new() -> Self {
99        Self::default()
100    }
101
102    /// Create a new parser state with the given configuration
103    ///
104    /// # Arguments
105    ///
106    /// * `config` - The parser configuration to use
107    ///
108    /// # Examples
109    ///
110    /// ```rust
111    /// use markdown_ppp::parser::{MarkdownParserState, config::MarkdownParserConfig};
112    ///
113    /// let config = MarkdownParserConfig::default();
114    /// let state = MarkdownParserState::with_config(config);
115    /// ```
116    pub fn with_config(config: MarkdownParserConfig) -> Self {
117        Self {
118            config: Rc::new(config),
119        }
120    }
121}
122
123impl Default for MarkdownParserState {
124    fn default() -> Self {
125        Self::with_config(MarkdownParserConfig::default())
126    }
127}
128
129/// Parse a Markdown string into an Abstract Syntax Tree (AST)
130///
131/// This is the main entry point for parsing Markdown text. It processes the input
132/// according to the CommonMark specification with GitHub Flavored Markdown extensions,
133/// returning a fully-typed AST that can be manipulated, analyzed, or rendered.
134///
135/// # Arguments
136///
137/// * `state` - Parser state containing configuration options
138/// * `input` - The Markdown text to parse
139///
140/// # Returns
141///
142/// Returns a `Result` containing either:
143/// - `Ok(Document)` - Successfully parsed AST document
144/// - `Err(nom::Err)` - Parse error with position and context information
145///
146/// # Examples
147///
148/// Basic parsing:
149/// ```rust
150/// use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
151///
152/// let state = MarkdownParserState::new();
153/// let result = parse_markdown(state, "# Hello\n\nWorld!");
154///
155/// match result {
156///     Ok(doc) => println!("Parsed {} blocks", doc.blocks.len()),
157///     Err(e) => eprintln!("Parse error: {:?}", e),
158/// }
159/// ```
160///
161/// With custom configuration:
162/// ```rust
163/// use markdown_ppp::parser::{parse_markdown, MarkdownParserState};
164/// use markdown_ppp::parser::config::*;
165///
166/// let config = MarkdownParserConfig::default()
167///     .with_block_thematic_break_behavior(ElementBehavior::Skip);
168/// let state = MarkdownParserState::with_config(config);
169///
170/// let doc = parse_markdown(state, "---\n\nContent").unwrap();
171/// ```
172///
173/// # Errors
174///
175/// Returns a parse error if the input contains invalid Markdown syntax
176/// that cannot be recovered from. Most malformed Markdown is handled
177/// gracefully according to CommonMark's error handling rules.
178pub fn parse_markdown(
179    state: MarkdownParserState,
180    input: &str,
181) -> Result<Document, nom::Err<nom::error::Error<&str>>> {
182    let empty_lines = many0(alt((space1, line_ending)));
183    let mut parser = terminated(
184        many0(crate::parser::blocks::block(Rc::new(state))),
185        (empty_lines, eof),
186    );
187    let (_, blocks) = parser.parse(input)?;
188
189    let blocks = blocks.into_iter().flatten().collect();
190
191    Ok(Document { blocks })
192}