pulldown_cmark_frontmatter/
lib.rs1#![doc = include_str!("../README.md")]
2#![forbid(unsafe_code)]
3#![warn(
4 clippy::cargo,
5 missing_docs,
6 clippy::pedantic,
7 future_incompatible,
8 rust_2018_idioms
9)]
10#![allow(
11 clippy::option_if_let_else,
12 clippy::module_name_repetitions,
13 clippy::missing_errors_doc
14)]
15
16use std::vec;
17
18use pulldown_cmark::{CodeBlockKind, CowStr, Event, DefaultBrokenLinkCallback};
19
20pub struct FrontmatterExtractor<'a, T>
36where
37 T: Iterator<Item = Event<'a>>,
38{
39 pub frontmatter: Option<Frontmatter<'a>>,
41 source: T,
42 state: DocumentAttributeParserState<'a>,
43}
44
45impl<'a, T> FrontmatterExtractor<'a, T>
46where
47 T: Iterator<Item = Event<'a>>,
48{
49 pub fn new(parser: T) -> Self {
52 Self {
53 source: parser,
54 frontmatter: None,
55 state: DocumentAttributeParserState::Parsing,
56 }
57 }
58
59 fn frontmater_mut(&mut self) -> &mut Frontmatter<'a> {
60 if self.frontmatter.is_none() {
61 self.frontmatter = Some(Frontmatter {
62 title: None,
63 code_block: None,
64 });
65 }
66
67 self.frontmatter.as_mut().expect("always initialized")
68 }
69
70 pub fn extract(mut self) -> Option<Frontmatter<'a>> {
76 while let Some(_) = self.next() {
77 if matches!(self.state, DocumentAttributeParserState::InDocument) {
78 break;
79 }
80 }
81
82 self.frontmatter
83 }
84
85 pub fn extract_buffered(&mut self) -> Option<&Frontmatter<'a>> {
92 let mut buffered_events = Vec::new();
93 while let Some(event) = self.next() {
94 buffered_events.push(event);
95 if self.extracted() {
96 break;
97 }
98 }
99
100 self.state = DocumentAttributeParserState::InDocumentBuffered(buffered_events.into_iter());
101
102 self.frontmatter.as_ref()
103 }
104
105 #[must_use]
110 pub const fn extracted(&self) -> bool {
111 matches!(self.state, DocumentAttributeParserState::InDocument)
112 }
113}
114
115impl<'a> FrontmatterExtractor<'a, pulldown_cmark::Parser<'a, DefaultBrokenLinkCallback>> {
116 #[must_use]
119 pub fn from_markdown(markdown: &'a str) -> Self {
120 Self::new(pulldown_cmark::Parser::new(markdown))
121 }
122}
123
124impl<'a, T> Iterator for FrontmatterExtractor<'a, T>
125where
126 T: Iterator<Item = Event<'a>>,
127{
128 type Item = Event<'a>;
129
130 fn next(&mut self) -> Option<Self::Item> {
131 match &mut self.state {
132 DocumentAttributeParserState::InDocumentBuffered(buffered) => {
133 if let Some(event) = buffered.next() {
134 return Some(event);
135 }
136
137 self.state = DocumentAttributeParserState::InDocument;
138 return self.source.next();
139 }
140 DocumentAttributeParserState::InDocument => return self.source.next(),
141 _ => {}
142 }
143
144 loop {
145 match self.source.next()? {
146 Event::Text(text) if self.state.in_document_title() => {
147 self.frontmater_mut().title_mut().push_str(&text);
148 return Some(Event::Text(text));
149 }
150 Event::Text(text) if self.state.in_code() => {
151 let language = match self.state.code_block_kind() {
152 CodeBlockKind::Indented => None,
153 CodeBlockKind::Fenced(language) => Some(language),
154 };
155 let frontmatter = self.frontmater_mut();
156 frontmatter.code_block = Some(CodeBlock {
157 source: text,
158 language,
159 });
160 }
161 Event::Start(pulldown_cmark::Tag::Heading {
162 level: pulldown_cmark::HeadingLevel::H1,
163 id,
164 classes,
165 attrs,
166 }) if !self.state.in_document() => {
167 self.state = DocumentAttributeParserState::InTitle;
168 return Some(Event::Start(pulldown_cmark::Tag::Heading {
169 level: pulldown_cmark::HeadingLevel::H1,
170 id,
171 classes,
172 attrs,
173 }));
174 }
175 Event::End(pulldown_cmark::TagEnd::Heading (
176 pulldown_cmark::HeadingLevel::H1,
177 )) if !self.state.in_document() => {
178 self.state = DocumentAttributeParserState::Parsing;
179 return Some(Event::End(pulldown_cmark::TagEnd::Heading (
180 pulldown_cmark::HeadingLevel::H1,
181 )));
182 }
183 Event::Start(pulldown_cmark::Tag::CodeBlock(kind)) if !self.state.in_document() => {
184 self.state = DocumentAttributeParserState::InAttributeCodeBlock(kind);
185 }
186 Event::End(pulldown_cmark::TagEnd::CodeBlock) if !self.state.in_document() => {
187 self.state = DocumentAttributeParserState::InDocument;
188 }
189 other => {
190 if !self.state.in_document_title() {
191 self.state = DocumentAttributeParserState::InDocument;
192 }
193
194 return Some(other);
195 }
196 }
197 }
198 }
199}
200
201enum DocumentAttributeParserState<'a> {
202 Parsing,
203 InTitle,
204 InAttributeCodeBlock(CodeBlockKind<'a>),
205 InDocumentBuffered(vec::IntoIter<Event<'a>>),
206 InDocument,
207}
208
209impl<'a> DocumentAttributeParserState<'a> {
210 pub fn in_document(&self) -> bool {
211 matches!(self, Self::InDocument)
212 }
213
214 pub fn in_code(&self) -> bool {
215 matches!(self, Self::InAttributeCodeBlock(_))
216 }
217
218 pub fn code_block_kind(&self) -> CodeBlockKind<'a> {
219 if let Self::InAttributeCodeBlock(kind) = self {
220 kind.clone()
221 } else {
222 CodeBlockKind::Indented
223 }
224 }
225
226 pub fn in_document_title(&self) -> bool {
227 matches!(self, Self::InTitle)
228 }
229}
230
231#[derive(Debug, Clone)]
233pub struct Frontmatter<'a> {
234 pub title: Option<String>,
237 pub code_block: Option<CodeBlock<'a>>,
239}
240
241impl<'a> Frontmatter<'a> {
242 fn title_mut(&mut self) -> &mut String {
243 if self.title.is_none() {
244 self.title = Some(String::new());
245 }
246
247 self.title.as_mut().expect("always initialized")
248 }
249}
250
251#[derive(Clone, Debug)]
253pub struct CodeBlock<'a> {
254 pub source: CowStr<'a>,
256 pub language: Option<CowStr<'a>>,
259}
260
261#[test]
262fn attribute_parser_test() {
263 #[derive(serde::Serialize, serde::Deserialize, Debug)]
264 struct Attributes {
265 hello: String,
266 }
267 let source = r#"# My **Document**
268
269```toml
270hello = "world"
271```
272
273This is regular text
274"#;
275 let mut parser = FrontmatterExtractor::from_markdown(source);
276 let mut html = String::new();
277 pulldown_cmark::html::push_html(&mut html, &mut parser);
278 assert_eq!(
279 html,
280 "<h1>My <strong>Document</strong></h1>\n<p>This is regular text</p>\n"
281 );
282
283 let frontmatter = parser.frontmatter.expect("frontmatter not detected");
284
285 assert_eq!(frontmatter.title.as_deref(), Some("My Document"));
286
287 let code_block = frontmatter.code_block.expect("code block not detected");
288 assert_eq!(code_block.language, Some(CowStr::from("toml")));
289 let deserialized: Attributes = toml::from_str(&code_block.source).unwrap();
290
291 assert_eq!(deserialized.hello, "world");
292}
293
294#[test]
295fn extract_buffered() {
296 let mut parser = FrontmatterExtractor::from_markdown("# Heading\n\n hello world\n\nBody");
297 let frontmatter = parser.extract_buffered().unwrap();
298 assert_eq!(frontmatter.title.as_deref(), Some("Heading"));
299 assert_eq!(
300 frontmatter.code_block.as_ref().unwrap().source.as_ref(),
301 "hello world\n"
302 );
303 let mut html = String::new();
304 pulldown_cmark::html::push_html(&mut html, parser);
305 assert_eq!(html, "<h1>Heading</h1>\n<p>Body</p>\n");
306}
307
308#[test]
309fn indented_parse_test() {
310 #[derive(serde::Serialize, serde::Deserialize, Debug)]
311 struct Attributes {
312 hello: String,
313 }
314 let source = r#"# My **Document**
315
316 hello = "world"
317
318This is regular text
319"#;
320 let mut parser = FrontmatterExtractor::from_markdown(source);
321 let mut html = String::new();
322 pulldown_cmark::html::push_html(&mut html, &mut parser);
323 assert_eq!(
324 html,
325 "<h1>My <strong>Document</strong></h1>\n<p>This is regular text</p>\n"
326 );
327
328 let frontmatter = parser.frontmatter.expect("frontmatter not detected");
329
330 assert_eq!(frontmatter.title.as_deref(), Some("My Document"));
331
332 let code_block = frontmatter.code_block.expect("code block not detected");
333 assert_eq!(code_block.language, None);
334 let deserialized: Attributes = toml::from_str(&code_block.source).unwrap();
335
336 assert_eq!(deserialized.hello, "world");
337}