comrak/parser/
mod.rs

1mod autolink;
2mod inlines;
3#[cfg(feature = "shortcodes")]
4pub mod shortcodes;
5mod table;
6
7pub mod alert;
8pub mod math;
9pub mod multiline_block_quote;
10
11use crate::adapters::SyntaxHighlighterAdapter;
12use crate::arena_tree::Node;
13use crate::ctype::{isdigit, isspace};
14use crate::entity;
15use crate::nodes::{self, NodeFootnoteDefinition, Sourcepos};
16use crate::nodes::{
17    Ast, AstNode, ListDelimType, ListType, NodeCodeBlock, NodeDescriptionItem, NodeHeading,
18    NodeHtmlBlock, NodeList, NodeValue,
19};
20use crate::scanners::{self, SetextChar};
21use crate::strings::{self, split_off_front_matter, Case};
22use std::cell::RefCell;
23use std::cmp::{min, Ordering};
24use std::collections::{HashMap, VecDeque};
25use std::fmt::{self, Debug, Formatter};
26use std::mem;
27use std::panic::RefUnwindSafe;
28use std::str;
29use std::sync::Arc;
30use typed_arena::Arena;
31
32use crate::adapters::HeadingAdapter;
33use crate::parser::alert::{AlertType, NodeAlert};
34use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
35
36#[cfg(feature = "bon")]
37use bon::Builder;
38
39use self::inlines::RefMap;
40
41const TAB_STOP: usize = 4;
42const CODE_INDENT: usize = 4;
43
44// Very deeply nested lists can cause quadratic performance issues.
45// This constant is used in open_new_blocks() to limit the nesting
46// depth. It is unlikely that a non-contrived markdown document will
47// be nested this deeply.
48const MAX_LIST_DEPTH: usize = 100;
49
50macro_rules! node_matches {
51    ($node:expr, $( $pat:pat )|+) => {{
52        matches!(
53            $node.data.borrow().value,
54            $( $pat )|+
55        )
56    }};
57}
58
59/// Parse a Markdown document to an AST.
60///
61/// See the documentation of the crate root for an example.
62pub fn parse_document<'a>(
63    arena: &'a Arena<AstNode<'a>>,
64    buffer: &str,
65    options: &Options,
66) -> &'a AstNode<'a> {
67    let root: &'a AstNode<'a> = arena.alloc(Node::new(RefCell::new(Ast {
68        value: NodeValue::Document,
69        content: String::new(),
70        sourcepos: (1, 1, 1, 1).into(),
71        internal_offset: 0,
72        open: true,
73        last_line_blank: false,
74        table_visited: false,
75        line_offsets: Vec::with_capacity(0),
76    })));
77    let mut parser = Parser::new(arena, root, options);
78    let mut linebuf = Vec::with_capacity(buffer.len());
79    parser.feed(&mut linebuf, buffer, true);
80    parser.finish(linebuf)
81}
82
83/// Parse a Markdown document to an AST, specifying
84/// [`ParseOptions::broken_link_callback`].
85#[deprecated(
86    since = "0.25.0",
87    note = "The broken link callback has been moved into ParseOptions."
88)]
89pub fn parse_document_with_broken_link_callback<'a, 'c>(
90    arena: &'a Arena<AstNode<'a>>,
91    buffer: &str,
92    options: &Options,
93    callback: Arc<dyn BrokenLinkCallback + 'c>,
94) -> &'a AstNode<'a> {
95    let mut options_with_callback = options.clone();
96    options_with_callback.parse.broken_link_callback = Some(callback);
97    parse_document(arena, buffer, &options_with_callback)
98}
99
100/// The type of the callback used when a reference link is encountered with no
101/// matching reference.
102///
103/// The details of the broken reference are passed in the
104/// [`BrokenLinkReference`] argument. If a [`ResolvedReference`] is returned, it
105/// is used as the link; otherwise, no link is made and the reference text is
106/// preserved in its entirety.
107pub trait BrokenLinkCallback: RefUnwindSafe + Send + Sync {
108    /// Potentially resolve a single broken link reference.
109    fn resolve(&self, broken_link_reference: BrokenLinkReference) -> Option<ResolvedReference>;
110}
111
112impl<'c> Debug for dyn BrokenLinkCallback + 'c {
113    fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), fmt::Error> {
114        formatter.write_str("<dyn BrokenLinkCallback>")
115    }
116}
117
118impl<F> BrokenLinkCallback for F
119where
120    F: Fn(BrokenLinkReference) -> Option<ResolvedReference>,
121    F: RefUnwindSafe + Send + Sync,
122{
123    fn resolve(&self, broken_link_reference: BrokenLinkReference) -> Option<ResolvedReference> {
124        self(broken_link_reference)
125    }
126}
127
128/// Struct to the broken link callback, containing details on the link reference
129/// which failed to find a match.
130#[derive(Debug)]
131pub struct BrokenLinkReference<'l> {
132    /// The normalized reference link label. Unicode case folding is applied;
133    /// see <https://github.com/commonmark/commonmark-spec/issues/695> for a
134    /// discussion on the details of what this exactly means.
135    pub normalized: &'l str,
136
137    /// The original text in the link label.
138    pub original: &'l str,
139}
140
141pub struct Parser<'a, 'o, 'c> {
142    arena: &'a Arena<AstNode<'a>>,
143    refmap: RefMap,
144    root: &'a AstNode<'a>,
145    current: &'a AstNode<'a>,
146    line_number: usize,
147    offset: usize,
148    column: usize,
149    thematic_break_kill_pos: usize,
150    first_nonspace: usize,
151    first_nonspace_column: usize,
152    indent: usize,
153    blank: bool,
154    partially_consumed_tab: bool,
155    curline_len: usize,
156    curline_end_col: usize,
157    last_line_length: usize,
158    last_buffer_ended_with_cr: bool,
159    total_size: usize,
160    options: &'o Options<'c>,
161}
162
163#[derive(Default, Debug, Clone)]
164#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
165/// Umbrella options struct.
166pub struct Options<'c> {
167    /// Enable CommonMark extensions.
168    pub extension: ExtensionOptions<'c>,
169
170    /// Configure parse-time options.
171    pub parse: ParseOptions<'c>,
172
173    /// Configure render-time options.
174    pub render: RenderOptions,
175}
176
177/// Trait for link and image URL rewrite extensions.
178pub trait URLRewriter: RefUnwindSafe + Send + Sync {
179    /// Converts the given URL from Markdown to its representation when output as HTML.
180    fn to_html(&self, url: &str) -> String;
181}
182
183impl<'c> Debug for dyn URLRewriter + 'c {
184    fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
185        formatter.write_str("<dyn URLRewriter>")
186    }
187}
188
189impl<F> URLRewriter for F
190where
191    F: for<'a> Fn(&'a str) -> String,
192    F: RefUnwindSafe + Send + Sync,
193{
194    fn to_html(&self, url: &str) -> String {
195        self(url)
196    }
197}
198
199#[non_exhaustive]
200#[derive(Debug, Clone, PartialEq, Eq, Copy)]
201#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
202/// Selects between wikilinks with the title first or the URL first.
203pub enum WikiLinksMode {
204    /// Indicates that the URL precedes the title. For example: `[[http://example.com|link
205    /// title]]`.
206    UrlFirst,
207
208    /// Indicates that the title precedes the URL. For example: `[[link title|http://example.com]]`.
209    TitleFirst,
210}
211
212#[derive(Default, Debug, Clone)]
213#[cfg_attr(feature = "bon", derive(Builder))]
214#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
215/// Options to select extensions.
216pub struct ExtensionOptions<'c> {
217    /// Enables the
218    /// [strikethrough extension](https://github.github.com/gfm/#strikethrough-extension-)
219    /// from the GFM spec.
220    ///
221    /// ```
222    /// # use comrak::{markdown_to_html, Options};
223    /// let mut options = Options::default();
224    /// options.extension.strikethrough = true;
225    /// assert_eq!(markdown_to_html("Hello ~world~ there.\n", &options),
226    ///            "<p>Hello <del>world</del> there.</p>\n");
227    /// ```
228    #[cfg_attr(feature = "bon", builder(default))]
229    pub strikethrough: bool,
230
231    /// Enables the
232    /// [tagfilter extension](https://github.github.com/gfm/#disallowed-raw-html-extension-)
233    /// from the GFM spec.
234    ///
235    /// ```
236    /// # use comrak::{markdown_to_html, Options};
237    /// let mut options = Options::default();
238    /// options.extension.tagfilter = true;
239    /// options.render.unsafe_ = true;
240    /// assert_eq!(markdown_to_html("Hello <xmp>.\n\n<xmp>", &options),
241    ///            "<p>Hello &lt;xmp>.</p>\n&lt;xmp>\n");
242    /// ```
243    #[cfg_attr(feature = "bon", builder(default))]
244    pub tagfilter: bool,
245
246    /// Enables the [table extension](https://github.github.com/gfm/#tables-extension-)
247    /// from the GFM spec.
248    ///
249    /// ```
250    /// # use comrak::{markdown_to_html, Options};
251    /// let mut options = Options::default();
252    /// options.extension.table = true;
253    /// assert_eq!(markdown_to_html("| a | b |\n|---|---|\n| c | d |\n", &options),
254    ///            "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n\
255    ///             <tbody>\n<tr>\n<td>c</td>\n<td>d</td>\n</tr>\n</tbody>\n</table>\n");
256    /// ```
257    #[cfg_attr(feature = "bon", builder(default))]
258    pub table: bool,
259
260    /// Enables the [autolink extension](https://github.github.com/gfm/#autolinks-extension-)
261    /// from the GFM spec.
262    ///
263    /// ```
264    /// # use comrak::{markdown_to_html, Options};
265    /// let mut options = Options::default();
266    /// options.extension.autolink = true;
267    /// assert_eq!(markdown_to_html("Hello www.github.com.\n", &options),
268    ///            "<p>Hello <a href=\"http://www.github.com\">www.github.com</a>.</p>\n");
269    /// ```
270    #[cfg_attr(feature = "bon", builder(default))]
271    pub autolink: bool,
272
273    /// Enables the
274    /// [task list items extension](https://github.github.com/gfm/#task-list-items-extension-)
275    /// from the GFM spec.
276    ///
277    /// Note that the spec does not define the precise output, so only the bare essentials are
278    /// rendered.
279    ///
280    /// ```
281    /// # use comrak::{markdown_to_html, Options};
282    /// let mut options = Options::default();
283    /// options.extension.tasklist = true;
284    /// options.render.unsafe_ = true;
285    /// assert_eq!(markdown_to_html("* [x] Done\n* [ ] Not done\n", &options),
286    ///            "<ul>\n<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Done</li>\n\
287    ///            <li><input type=\"checkbox\" disabled=\"\" /> Not done</li>\n</ul>\n");
288    /// ```
289    #[cfg_attr(feature = "bon", builder(default))]
290    pub tasklist: bool,
291
292    /// Enables the superscript Comrak extension.
293    ///
294    /// ```
295    /// # use comrak::{markdown_to_html, Options};
296    /// let mut options = Options::default();
297    /// options.extension.superscript = true;
298    /// assert_eq!(markdown_to_html("e = mc^2^.\n", &options),
299    ///            "<p>e = mc<sup>2</sup>.</p>\n");
300    /// ```
301    #[cfg_attr(feature = "bon", builder(default))]
302    pub superscript: bool,
303
304    /// Enables the header IDs Comrak extension.
305    ///
306    /// ```
307    /// # use comrak::{markdown_to_html, Options};
308    /// let mut options = Options::default();
309    /// options.extension.header_ids = Some("user-content-".to_string());
310    /// assert_eq!(markdown_to_html("# README\n", &options),
311    ///            "<h1><a href=\"#readme\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-readme\"></a>README</h1>\n");
312    /// ```
313    pub header_ids: Option<String>,
314
315    /// Enables the footnotes extension per `cmark-gfm`.
316    ///
317    /// For usage, see `src/tests.rs`.  The extension is modelled after
318    /// [Kramdown](https://kramdown.gettalong.org/syntax.html#footnotes).
319    ///
320    /// ```
321    /// # use comrak::{markdown_to_html, Options};
322    /// let mut options = Options::default();
323    /// options.extension.footnotes = true;
324    /// assert_eq!(markdown_to_html("Hi[^x].\n\n[^x]: A greeting.\n", &options),
325    ///            "<p>Hi<sup class=\"footnote-ref\"><a href=\"#fn-x\" id=\"fnref-x\" data-footnote-ref>1</a></sup>.</p>\n<section class=\"footnotes\" data-footnotes>\n<ol>\n<li id=\"fn-x\">\n<p>A greeting. <a href=\"#fnref-x\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n</li>\n</ol>\n</section>\n");
326    /// ```
327    #[cfg_attr(feature = "bon", builder(default))]
328    pub footnotes: bool,
329
330    /// Enables the description lists extension.
331    ///
332    /// Each term must be defined in one paragraph, followed by a blank line,
333    /// and then by the details.  Details begins with a colon.
334    ///
335    /// Not (yet) compatible with render.sourcepos.
336    ///
337    /// ``` md
338    /// First term
339    ///
340    /// : Details for the **first term**
341    ///
342    /// Second term
343    ///
344    /// : Details for the **second term**
345    ///
346    ///     More details in second paragraph.
347    /// ```
348    ///
349    /// ```
350    /// # use comrak::{markdown_to_html, Options};
351    /// let mut options = Options::default();
352    /// options.extension.description_lists = true;
353    /// assert_eq!(markdown_to_html("Term\n\n: Definition", &options),
354    ///            "<dl>\n<dt>Term</dt>\n<dd>\n<p>Definition</p>\n</dd>\n</dl>\n");
355    /// ```
356    #[cfg_attr(feature = "bon", builder(default))]
357    pub description_lists: bool,
358
359    /// Enables the front matter extension.
360    ///
361    /// Front matter, which begins with the delimiter string at the beginning of the file and ends
362    /// at the end of the next line that contains only the delimiter, is passed through unchanged
363    /// in markdown output and omitted from HTML output.
364    ///
365    /// ``` md
366    /// ---
367    /// layout: post
368    /// title: Formatting Markdown with Comrak
369    /// ---
370    ///
371    /// # Shorter Title
372    ///
373    /// etc.
374    /// ```
375    ///
376    /// ```
377    /// # use comrak::{markdown_to_html, Options};
378    /// let mut options = Options::default();
379    /// options.extension.front_matter_delimiter = Some("---".to_owned());
380    /// assert_eq!(
381    ///     markdown_to_html("---\nlayout: post\n---\nText\n", &options),
382    ///     markdown_to_html("Text\n", &Options::default()));
383    /// ```
384    ///
385    /// ```
386    /// # use comrak::{format_commonmark, Arena, Options};
387    /// use comrak::parse_document;
388    /// let mut options = Options::default();
389    /// options.extension.front_matter_delimiter = Some("---".to_owned());
390    /// let arena = Arena::new();
391    /// let input ="---\nlayout: post\n---\nText\n";
392    /// let root = parse_document(&arena, input, &options);
393    /// let mut buf = Vec::new();
394    /// format_commonmark(&root, &options, &mut buf);
395    /// assert_eq!(&String::from_utf8(buf).unwrap(), input);
396    /// ```
397    pub front_matter_delimiter: Option<String>,
398
399    /// Enables the multiline block quote extension.
400    ///
401    /// Place `>>>` before and after text to make it into
402    /// a block quote.
403    ///
404    /// ``` md
405    /// Paragraph one
406    ///
407    /// >>>
408    /// Paragraph two
409    ///
410    /// - one
411    /// - two
412    /// >>>
413    /// ```
414    ///
415    /// ```
416    /// # use comrak::{markdown_to_html, Options};
417    /// let mut options = Options::default();
418    /// options.extension.multiline_block_quotes = true;
419    /// assert_eq!(markdown_to_html(">>>\nparagraph\n>>>", &options),
420    ///            "<blockquote>\n<p>paragraph</p>\n</blockquote>\n");
421    /// ```
422    #[cfg_attr(feature = "bon", builder(default))]
423    pub multiline_block_quotes: bool,
424
425    /// Enables GitHub style alerts
426    ///
427    /// ```md
428    /// > [!note]
429    /// > Something of note
430    /// ```
431    ///
432    /// ```
433    /// # use comrak::{markdown_to_html, Options};
434    /// let mut options = Options::default();
435    /// options.extension.alerts = true;
436    /// assert_eq!(markdown_to_html("> [!note]\n> Something of note", &options),
437    ///            "<div class=\"markdown-alert markdown-alert-note\">\n<p class=\"markdown-alert-title\">Note</p>\n<p>Something of note</p>\n</div>\n");
438    /// ```
439    #[cfg_attr(feature = "bon", builder(default))]
440    pub alerts: bool,
441
442    /// Enables math using dollar syntax.
443    ///
444    /// ``` md
445    /// Inline math $1 + 2$ and display math $$x + y$$
446    ///
447    /// $$
448    /// x^2
449    /// $$
450    /// ```
451    ///
452    /// ```
453    /// # use comrak::{markdown_to_html, Options};
454    /// let mut options = Options::default();
455    /// options.extension.math_dollars = true;
456    /// assert_eq!(markdown_to_html("$1 + 2$ and $$x = y$$", &options),
457    ///            "<p><span data-math-style=\"inline\">1 + 2</span> and <span data-math-style=\"display\">x = y</span></p>\n");
458    /// assert_eq!(markdown_to_html("$$\nx^2\n$$\n", &options),
459    ///            "<p><span data-math-style=\"display\">\nx^2\n</span></p>\n");
460    /// ```
461    #[cfg_attr(feature = "bon", builder(default))]
462    pub math_dollars: bool,
463
464    /// Enables math using code syntax.
465    ///
466    /// ```` md
467    /// Inline math $`1 + 2`$
468    ///
469    /// ```math
470    /// x^2
471    /// ```
472    /// ````
473    ///
474    /// ```
475    /// # use comrak::{markdown_to_html, Options};
476    /// let mut options = Options::default();
477    /// options.extension.math_code = true;
478    /// assert_eq!(markdown_to_html("$`1 + 2`$", &options),
479    ///            "<p><code data-math-style=\"inline\">1 + 2</code></p>\n");
480    /// assert_eq!(markdown_to_html("```math\nx^2\n```\n", &options),
481    ///            "<pre><code class=\"language-math\" data-math-style=\"display\">x^2\n</code></pre>\n");
482    /// ```
483    #[cfg_attr(feature = "bon", builder(default))]
484    pub math_code: bool,
485
486    #[cfg(feature = "shortcodes")]
487    #[cfg_attr(docsrs, doc(cfg(feature = "shortcodes")))]
488    /// Phrases wrapped inside of ':' blocks will be replaced with emojis.
489    ///
490    /// ```
491    /// # use comrak::{markdown_to_html, Options};
492    /// let mut options = Options::default();
493    /// assert_eq!(markdown_to_html("Happy Friday! :smile:", &options),
494    ///            "<p>Happy Friday! :smile:</p>\n");
495    ///
496    /// options.extension.shortcodes = true;
497    /// assert_eq!(markdown_to_html("Happy Friday! :smile:", &options),
498    ///            "<p>Happy Friday! 😄</p>\n");
499    /// ```
500    #[cfg_attr(feature = "bon", builder(default))]
501    pub shortcodes: bool,
502
503    /// Enables wikilinks using title after pipe syntax
504    ///
505    /// ```` md
506    /// [[url|link label]]
507    /// ````
508    ///
509    /// When both this option and [`wikilinks_title_before_pipe`][0] are enabled, this option takes
510    /// precedence.
511    ///
512    /// [0]: Self::wikilinks_title_before_pipe
513    ///
514    /// ```
515    /// # use comrak::{markdown_to_html, Options};
516    /// let mut options = Options::default();
517    /// options.extension.wikilinks_title_after_pipe = true;
518    /// assert_eq!(markdown_to_html("[[url|link label]]", &options),
519    ///            "<p><a href=\"url\" data-wikilink=\"true\">link label</a></p>\n");
520    /// ```
521    #[cfg_attr(feature = "bon", builder(default))]
522    pub wikilinks_title_after_pipe: bool,
523
524    /// Enables wikilinks using title before pipe syntax
525    ///
526    /// ```` md
527    /// [[link label|url]]
528    /// ````
529    /// When both this option and [`wikilinks_title_after_pipe`][0] are enabled,
530    /// [`wikilinks_title_after_pipe`][0] takes precedence.
531    ///
532    /// [0]: Self::wikilinks_title_after_pipe
533    ///
534    /// ```
535    /// # use comrak::{markdown_to_html, Options};
536    /// let mut options = Options::default();
537    /// options.extension.wikilinks_title_before_pipe = true;
538    /// assert_eq!(markdown_to_html("[[link label|url]]", &options),
539    ///            "<p><a href=\"url\" data-wikilink=\"true\">link label</a></p>\n");
540    /// ```
541    #[cfg_attr(feature = "bon", builder(default))]
542    pub wikilinks_title_before_pipe: bool,
543
544    /// Enables underlines using double underscores
545    ///
546    /// ```md
547    /// __underlined text__
548    /// ```
549    ///
550    /// ```
551    /// # use comrak::{markdown_to_html, Options};
552    /// let mut options = Options::default();
553    /// options.extension.underline = true;
554    ///
555    /// assert_eq!(markdown_to_html("__underlined text__", &options),
556    ///            "<p><u>underlined text</u></p>\n");
557    /// ```
558    #[cfg_attr(feature = "bon", builder(default))]
559    pub underline: bool,
560
561    /// Enables subscript text using single tildes.
562    ///
563    /// If the strikethrough option is also enabled, this overrides the single
564    /// tilde case to output subscript text.
565    ///
566    /// ```md
567    /// H~2~O
568    /// ```
569    ///
570    /// ```
571    /// # use comrak::{markdown_to_html, Options};
572    /// let mut options = Options::default();
573    /// options.extension.subscript = true;
574    ///
575    /// assert_eq!(markdown_to_html("H~2~O", &options),
576    ///            "<p>H<sub>2</sub>O</p>\n");
577    /// ```
578    #[cfg_attr(feature = "bon", builder(default))]
579    pub subscript: bool,
580
581    /// Enables spoilers using double vertical bars
582    ///
583    /// ```md
584    /// Darth Vader is ||Luke's father||
585    /// ```
586    ///
587    /// ```
588    /// # use comrak::{markdown_to_html, Options};
589    /// let mut options = Options::default();
590    /// options.extension.spoiler = true;
591    ///
592    /// assert_eq!(markdown_to_html("Darth Vader is ||Luke's father||", &options),
593    ///            "<p>Darth Vader is <span class=\"spoiler\">Luke's father</span></p>\n");
594    /// ```
595    #[cfg_attr(feature = "bon", builder(default))]
596    pub spoiler: bool,
597
598    /// Requires at least one space after a `>` character to generate a blockquote,
599    /// and restarts blockquote nesting across unique lines of input
600    ///
601    /// ```md
602    /// >implying implications
603    ///
604    /// > one
605    /// > > two
606    /// > three
607    /// ```
608    ///
609    /// ```
610    /// # use comrak::{markdown_to_html, Options};
611    /// let mut options = Options::default();
612    /// options.extension.greentext = true;
613    ///
614    /// assert_eq!(markdown_to_html(">implying implications", &options),
615    ///            "<p>&gt;implying implications</p>\n");
616    ///
617    /// assert_eq!(markdown_to_html("> one\n> > two\n> three", &options),
618    ///            concat!(
619    ///             "<blockquote>\n",
620    ///             "<p>one</p>\n",
621    ///             "<blockquote>\n<p>two</p>\n</blockquote>\n",
622    ///             "<p>three</p>\n",
623    ///             "</blockquote>\n"));
624    /// ```
625    #[cfg_attr(feature = "bon", builder(default))]
626    pub greentext: bool,
627
628    /// Wraps embedded image URLs using a function or custom trait object.
629    ///
630    /// ```
631    /// # use std::sync::Arc;
632    /// # use comrak::{markdown_to_html, ComrakOptions};
633    /// let mut options = ComrakOptions::default();
634    ///
635    /// options.extension.image_url_rewriter = Some(Arc::new(
636    ///     |url: &str| format!("https://safe.example.com?url={}", url)
637    /// ));
638    ///
639    /// assert_eq!(markdown_to_html("![](http://unsafe.example.com/bad.png)", &options),
640    ///            "<p><img src=\"https://safe.example.com?url=http://unsafe.example.com/bad.png\" alt=\"\" /></p>\n");
641    /// ```
642    #[cfg_attr(feature = "arbitrary", arbitrary(value = None))]
643    pub image_url_rewriter: Option<Arc<dyn URLRewriter + 'c>>,
644
645    /// Wraps link URLs using a function or custom trait object.
646    ///
647    /// ```
648    /// # use std::sync::Arc;
649    /// # use comrak::{markdown_to_html, ComrakOptions};
650    /// let mut options = ComrakOptions::default();
651    ///
652    /// options.extension.link_url_rewriter = Some(Arc::new(
653    ///     |url: &str| format!("https://safe.example.com/norefer?url={}", url)
654    /// ));
655    ///
656    /// assert_eq!(markdown_to_html("[my link](http://unsafe.example.com/bad)", &options),
657    ///            "<p><a href=\"https://safe.example.com/norefer?url=http://unsafe.example.com/bad\">my link</a></p>\n");
658    /// ```
659    #[cfg_attr(feature = "arbitrary", arbitrary(value = None))]
660    pub link_url_rewriter: Option<Arc<dyn URLRewriter + 'c>>,
661
662    /// Recognizes many emphasis that appear in CJK contexts but are not recognized by plain CommonMark.
663    ///
664    /// ```md
665    /// **この文は重要です。**但这句话并不重要。
666    /// ```
667    ///
668    /// ```
669    /// # use comrak::{markdown_to_html, Options};
670    /// let mut options = Options::default();
671    /// options.extension.cjk_friendly_emphasis = true;
672    ///
673    /// assert_eq!(markdown_to_html("**この文は重要です。**但这句话并不重要。", &options),
674    ///            "<p><strong>この文は重要です。</strong>但这句话并不重要。</p>\n");
675    /// ```
676    #[cfg_attr(feature = "bon", builder(default))]
677    pub cjk_friendly_emphasis: bool,
678}
679
680impl<'c> ExtensionOptions<'c> {
681    pub(crate) fn wikilinks(&self) -> Option<WikiLinksMode> {
682        match (
683            self.wikilinks_title_before_pipe,
684            self.wikilinks_title_after_pipe,
685        ) {
686            (false, false) => None,
687            (true, false) => Some(WikiLinksMode::TitleFirst),
688            (_, _) => Some(WikiLinksMode::UrlFirst),
689        }
690    }
691}
692
693#[derive(Default, Clone, Debug)]
694#[cfg_attr(feature = "bon", derive(Builder))]
695#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
696/// Options for parser functions.
697pub struct ParseOptions<'c> {
698    /// Punctuation (quotes, full-stops and hyphens) are converted into 'smart' punctuation.
699    ///
700    /// ```
701    /// # use comrak::{markdown_to_html, Options};
702    /// let mut options = Options::default();
703    /// assert_eq!(markdown_to_html("'Hello,' \"world\" ...", &options),
704    ///            "<p>'Hello,' &quot;world&quot; ...</p>\n");
705    ///
706    /// options.parse.smart = true;
707    /// assert_eq!(markdown_to_html("'Hello,' \"world\" ...", &options),
708    ///            "<p>‘Hello,’ “world” …</p>\n");
709    /// ```
710    #[cfg_attr(feature = "bon", builder(default))]
711    pub smart: bool,
712
713    /// The default info string for fenced code blocks.
714    ///
715    /// ```
716    /// # use comrak::{markdown_to_html, Options};
717    /// let mut options = Options::default();
718    /// assert_eq!(markdown_to_html("```\nfn hello();\n```\n", &options),
719    ///            "<pre><code>fn hello();\n</code></pre>\n");
720    ///
721    /// options.parse.default_info_string = Some("rust".into());
722    /// assert_eq!(markdown_to_html("```\nfn hello();\n```\n", &options),
723    ///            "<pre><code class=\"language-rust\">fn hello();\n</code></pre>\n");
724    /// ```
725    pub default_info_string: Option<String>,
726
727    /// Whether or not a simple `x` or `X` is used for tasklist or any other symbol is allowed.
728    #[cfg_attr(feature = "bon", builder(default))]
729    pub relaxed_tasklist_matching: bool,
730
731    /// Relax parsing of autolinks, allow links to be detected inside brackets
732    /// and allow all url schemes. It is intended to allow a very specific type of autolink
733    /// detection, such as `[this http://and.com that]` or `{http://foo.com}`, on a best can basis.
734    ///
735    /// ```
736    /// # use comrak::{markdown_to_html, Options};
737    /// let mut options = Options::default();
738    /// options.extension.autolink = true;
739    /// assert_eq!(markdown_to_html("[https://foo.com]", &options),
740    ///            "<p>[https://foo.com]</p>\n");
741    ///
742    /// options.parse.relaxed_autolinks = true;
743    /// assert_eq!(markdown_to_html("[https://foo.com]", &options),
744    ///            "<p>[<a href=\"https://foo.com\">https://foo.com</a>]</p>\n");
745    /// ```
746    #[cfg_attr(feature = "bon", builder(default))]
747    pub relaxed_autolinks: bool,
748
749    /// In case the parser encounters any potential links that have a broken
750    /// reference (e.g `[foo]` when there is no `[foo]: url` entry at the
751    /// bottom) the provided callback will be called with the reference name,
752    /// both in normalized form and unmodified, and the returned pair will be
753    /// used as the link destination and title if not [`None`].
754    ///
755    /// ```
756    /// # use std::{str, sync::Arc};
757    /// # use comrak::{markdown_to_html, BrokenLinkReference, Options, ResolvedReference};
758    /// let cb = |link_ref: BrokenLinkReference| match link_ref.normalized {
759    ///     "foo" => Some(ResolvedReference {
760    ///         url: "https://www.rust-lang.org/".to_string(),
761    ///         title: "The Rust Language".to_string(),
762    ///     }),
763    ///     _ => None,
764    /// };
765    ///
766    /// let mut options = Options::default();
767    /// options.parse.broken_link_callback = Some(Arc::new(cb));
768    ///
769    /// let output = markdown_to_html(
770    ///     "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.",
771    ///     &options,
772    /// );
773    ///
774    /// assert_eq!(output,
775    ///            "<h1>Cool input!</h1>\n<p>Wow look at this cool \
776    ///            <a href=\"https://www.rust-lang.org/\" title=\"The Rust Language\">link</a>. \
777    ///            A [broken link] renders as text.</p>\n");
778    #[cfg_attr(feature = "arbitrary", arbitrary(default))]
779    pub broken_link_callback: Option<Arc<dyn BrokenLinkCallback + 'c>>,
780}
781
782#[derive(Default, Debug, Clone, Copy)]
783#[cfg_attr(feature = "bon", derive(Builder))]
784#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
785/// Options for formatter functions.
786pub struct RenderOptions {
787    /// [Soft line breaks](http://spec.commonmark.org/0.27/#soft-line-breaks) in the input
788    /// translate into hard line breaks in the output.
789    ///
790    /// ```
791    /// # use comrak::{markdown_to_html, Options};
792    /// let mut options = Options::default();
793    /// assert_eq!(markdown_to_html("Hello.\nWorld.\n", &options),
794    ///            "<p>Hello.\nWorld.</p>\n");
795    ///
796    /// options.render.hardbreaks = true;
797    /// assert_eq!(markdown_to_html("Hello.\nWorld.\n", &options),
798    ///            "<p>Hello.<br />\nWorld.</p>\n");
799    /// ```
800    #[cfg_attr(feature = "bon", builder(default))]
801    pub hardbreaks: bool,
802
803    /// GitHub-style `<pre lang="xyz">` is used for fenced code blocks with info tags.
804    ///
805    /// ```
806    /// # use comrak::{markdown_to_html, Options};
807    /// let mut options = Options::default();
808    /// assert_eq!(markdown_to_html("``` rust\nfn hello();\n```\n", &options),
809    ///            "<pre><code class=\"language-rust\">fn hello();\n</code></pre>\n");
810    ///
811    /// options.render.github_pre_lang = true;
812    /// assert_eq!(markdown_to_html("``` rust\nfn hello();\n```\n", &options),
813    ///            "<pre lang=\"rust\"><code>fn hello();\n</code></pre>\n");
814    /// ```
815    #[cfg_attr(feature = "bon", builder(default))]
816    pub github_pre_lang: bool,
817
818    /// Enable full info strings for code blocks
819    ///
820    /// ```
821    /// # use comrak::{markdown_to_html, Options};
822    /// let mut options = Options::default();
823    /// assert_eq!(markdown_to_html("``` rust extra info\nfn hello();\n```\n", &options),
824    ///            "<pre><code class=\"language-rust\">fn hello();\n</code></pre>\n");
825    ///
826    /// options.render.full_info_string = true;
827    /// let html = markdown_to_html("``` rust extra info\nfn hello();\n```\n", &options);
828    /// assert!(html.contains(r#"data-meta="extra info""#));
829    /// ```
830    #[cfg_attr(feature = "bon", builder(default))]
831    pub full_info_string: bool,
832
833    /// The wrap column when outputting CommonMark.
834    ///
835    /// ```
836    /// # use comrak::{parse_document, Options, format_commonmark};
837    /// # fn main() {
838    /// # let arena = typed_arena::Arena::new();
839    /// let mut options = Options::default();
840    /// let node = parse_document(&arena, "hello hello hello hello hello hello", &options);
841    /// let mut output = vec![];
842    /// format_commonmark(node, &options, &mut output).unwrap();
843    /// assert_eq!(String::from_utf8(output).unwrap(),
844    ///            "hello hello hello hello hello hello\n");
845    ///
846    /// options.render.width = 20;
847    /// let mut output = vec![];
848    /// format_commonmark(node, &options, &mut output).unwrap();
849    /// assert_eq!(String::from_utf8(output).unwrap(),
850    ///            "hello hello hello\nhello hello hello\n");
851    /// # }
852    /// ```
853    #[cfg_attr(feature = "bon", builder(default))]
854    pub width: usize,
855
856    /// Allow rendering of raw HTML and potentially dangerous links.
857    ///
858    /// ```
859    /// # use comrak::{markdown_to_html, Options};
860    /// let mut options = Options::default();
861    /// let input = "<script>\nalert('xyz');\n</script>\n\n\
862    ///              Possibly <marquee>annoying</marquee>.\n\n\
863    ///              [Dangerous](javascript:alert(document.cookie)).\n\n\
864    ///              [Safe](http://commonmark.org).\n";
865    ///
866    /// assert_eq!(markdown_to_html(input, &options),
867    ///            "<!-- raw HTML omitted -->\n\
868    ///             <p>Possibly <!-- raw HTML omitted -->annoying<!-- raw HTML omitted -->.</p>\n\
869    ///             <p><a href=\"\">Dangerous</a>.</p>\n\
870    ///             <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n");
871    ///
872    /// options.render.unsafe_ = true;
873    /// assert_eq!(markdown_to_html(input, &options),
874    ///            "<script>\nalert(\'xyz\');\n</script>\n\
875    ///             <p>Possibly <marquee>annoying</marquee>.</p>\n\
876    ///             <p><a href=\"javascript:alert(document.cookie)\">Dangerous</a>.</p>\n\
877    ///             <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n");
878    /// ```
879    #[cfg_attr(feature = "bon", builder(default))]
880    pub unsafe_: bool,
881
882    /// Escape raw HTML instead of clobbering it.
883    /// ```
884    /// # use comrak::{markdown_to_html, Options};
885    /// let mut options = Options::default();
886    /// let input = "<i>italic text</i>";
887    ///
888    /// assert_eq!(markdown_to_html(input, &options),
889    ///            "<p><!-- raw HTML omitted -->italic text<!-- raw HTML omitted --></p>\n");
890    ///
891    /// options.render.escape = true;
892    /// assert_eq!(markdown_to_html(input, &options),
893    ///            "<p>&lt;i&gt;italic text&lt;/i&gt;</p>\n");
894    /// ```
895    #[cfg_attr(feature = "bon", builder(default))]
896    pub escape: bool,
897
898    /// Set the type of [bullet list marker](https://spec.commonmark.org/0.30/#bullet-list-marker) to use. Options are:
899    ///
900    /// * [`ListStyleType::Dash`] to use `-` (default)
901    /// * [`ListStyleType::Plus`] to use `+`
902    /// * [`ListStyleType::Star`] to use `*`
903    ///
904    /// ```rust
905    /// # use comrak::{markdown_to_commonmark, Options, ListStyleType};
906    /// let mut options = Options::default();
907    /// let input = "- one\n- two\n- three";
908    /// assert_eq!(markdown_to_commonmark(input, &options),
909    ///            "- one\n- two\n- three\n"); // default is Dash
910    ///
911    /// options.render.list_style = ListStyleType::Plus;
912    /// assert_eq!(markdown_to_commonmark(input, &options),
913    ///            "+ one\n+ two\n+ three\n");
914    ///
915    /// options.render.list_style = ListStyleType::Star;
916    /// assert_eq!(markdown_to_commonmark(input, &options),
917    ///            "* one\n* two\n* three\n");
918    /// ```
919    #[cfg_attr(feature = "bon", builder(default))]
920    pub list_style: ListStyleType,
921
922    /// Include source position attributes in HTML and XML output.
923    ///
924    /// Sourcepos information is reliable for core block items excluding
925    /// lists and list items, all inlines, and most extensions.
926    /// The description lists extension still has issues; see
927    /// <https://github.com/kivikakk/comrak/blob/3bb6d4ce/src/tests/description_
928    /// lists.rs#L60-L125>.
929    ///
930    ///
931    /// ```rust
932    /// # use comrak::{markdown_to_html, Options};
933    /// let mut options = Options::default();
934    /// options.render.sourcepos = true;
935    /// let input = "Hello *world*!";
936    /// assert_eq!(markdown_to_html(input, &options),
937    ///            "<p data-sourcepos=\"1:1-1:14\">Hello <em data-sourcepos=\"1:7-1:13\">world</em>!</p>\n");
938    /// ```
939    #[cfg_attr(feature = "bon", builder(default))]
940    pub sourcepos: bool,
941
942    /// Wrap escaped characters in a `<span>` to allow any
943    /// post-processing to recognize them.
944    ///
945    /// ```rust
946    /// # use comrak::{markdown_to_html, Options};
947    /// let mut options = Options::default();
948    /// let input = "Notify user \\@example";
949    ///
950    /// assert_eq!(markdown_to_html(input, &options),
951    ///            "<p>Notify user @example</p>\n");
952    ///
953    /// options.render.escaped_char_spans = true;
954    /// assert_eq!(markdown_to_html(input, &options),
955    ///            "<p>Notify user <span data-escaped-char>@</span>example</p>\n");
956    /// ```
957    #[cfg_attr(feature = "bon", builder(default))]
958    pub escaped_char_spans: bool,
959
960    /// Ignore setext headings in input.
961    ///
962    /// ```rust
963    /// # use comrak::{markdown_to_html, Options};
964    /// let mut options = Options::default();
965    /// let input = "setext heading\n---";
966    ///
967    /// assert_eq!(markdown_to_html(input, &options),
968    ///            "<h2>setext heading</h2>\n");
969    ///
970    /// options.render.ignore_setext = true;
971    /// assert_eq!(markdown_to_html(input, &options),
972    ///            "<p>setext heading</p>\n<hr />\n");
973    /// ```
974    #[cfg_attr(feature = "bon", builder(default))]
975    pub ignore_setext: bool,
976
977    /// Ignore empty links in input.
978    ///
979    /// ```rust
980    /// # use comrak::{markdown_to_html, Options};
981    /// let mut options = Options::default();
982    /// let input = "[]()";
983    ///
984    /// assert_eq!(markdown_to_html(input, &options),
985    ///            "<p><a href=\"\"></a></p>\n");
986    ///
987    /// options.render.ignore_empty_links = true;
988    /// assert_eq!(markdown_to_html(input, &options), "<p>[]()</p>\n");
989    /// ```
990    #[cfg_attr(feature = "bon", builder(default))]
991    pub ignore_empty_links: bool,
992
993    /// Enables GFM quirks in HTML output which break CommonMark compatibility.
994    ///
995    /// ```rust
996    /// # use comrak::{markdown_to_html, Options};
997    /// let mut options = Options::default();
998    /// let input = "****abcd**** *_foo_*";
999    ///
1000    /// assert_eq!(markdown_to_html(input, &options),
1001    ///            "<p><strong><strong>abcd</strong></strong> <em><em>foo</em></em></p>\n");
1002    ///
1003    /// options.render.gfm_quirks = true;
1004    /// assert_eq!(markdown_to_html(input, &options),
1005    ///            "<p><strong>abcd</strong> <em><em>foo</em></em></p>\n");
1006    /// ```
1007    #[cfg_attr(feature = "bon", builder(default))]
1008    pub gfm_quirks: bool,
1009
1010    /// Prefer fenced code blocks when outputting CommonMark.
1011    ///
1012    /// ```rust
1013    /// # use std::str;
1014    /// # use comrak::{Arena, Options, format_commonmark, parse_document};
1015    /// let arena = Arena::new();
1016    /// let mut options = Options::default();
1017    /// let input = "```\nhello\n```\n";
1018    /// let root = parse_document(&arena, input, &options);
1019    ///
1020    /// let mut buf = Vec::new();
1021    /// format_commonmark(&root, &options, &mut buf);
1022    /// assert_eq!(str::from_utf8(&buf).unwrap(), "    hello\n");
1023    ///
1024    /// buf.clear();
1025    /// options.render.prefer_fenced = true;
1026    /// format_commonmark(&root, &options, &mut buf);
1027    /// assert_eq!(str::from_utf8(&buf).unwrap(), "```\nhello\n```\n");
1028    /// ```
1029    #[cfg_attr(feature = "bon", builder(default))]
1030    pub prefer_fenced: bool,
1031
1032    /// Render the image as a figure element with the title as its caption.
1033    ///
1034    /// ```rust
1035    /// # use comrak::{markdown_to_html, Options};
1036    /// let mut options = Options::default();
1037    /// let input = "![image](https://example.com/image.png \"this is an image\")";
1038    ///
1039    /// assert_eq!(markdown_to_html(input, &options),
1040    ///            "<p><img src=\"https://example.com/image.png\" alt=\"image\" title=\"this is an image\" /></p>\n");
1041    ///
1042    /// options.render.figure_with_caption = true;
1043    /// assert_eq!(markdown_to_html(input, &options),
1044    ///            "<p><figure><img src=\"https://example.com/image.png\" alt=\"image\" title=\"this is an image\" /><figcaption>this is an image</figcaption></figure></p>\n");
1045    /// ```
1046    #[cfg_attr(feature = "bon", builder(default))]
1047    pub figure_with_caption: bool,
1048
1049    /// Add classes to the output of the tasklist extension. This allows tasklists to be styled.
1050    ///
1051    /// ```rust
1052    /// # use comrak::{markdown_to_html, Options};
1053    /// let mut options = Options::default();
1054    /// options.extension.tasklist = true;
1055    /// let input = "- [ ] Foo";
1056    ///
1057    /// assert_eq!(markdown_to_html(input, &options),
1058    ///            "<ul>\n<li><input type=\"checkbox\" disabled=\"\" /> Foo</li>\n</ul>\n");
1059    ///
1060    /// options.render.tasklist_classes = true;
1061    /// assert_eq!(markdown_to_html(input, &options),
1062    ///            "<ul class=\"contains-task-list\">\n<li class=\"task-list-item\"><input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled=\"\" /> Foo</li>\n</ul>\n");
1063    /// ```
1064    #[cfg_attr(feature = "bon", builder(default))]
1065    pub tasklist_classes: bool,
1066
1067    /// Render ordered list with a minimum marker width.
1068    /// Having a width lower than 3 doesn't do anything.
1069    ///
1070    /// ```rust
1071    /// # use comrak::{markdown_to_commonmark, Options};
1072    /// let mut options = Options::default();
1073    /// let input = "1. Something";
1074    ///
1075    /// assert_eq!(markdown_to_commonmark(input, &options),
1076    ///            "1. Something\n");
1077    ///
1078    /// options.render.ol_width = 5;
1079    /// assert_eq!(markdown_to_commonmark(input, &options),
1080    ///            "1.   Something\n");
1081    /// ```
1082    #[cfg_attr(feature = "bon", builder(default))]
1083    pub ol_width: usize,
1084
1085    /// Minimise escapes used in CommonMark output (`-t commonmark`) by removing
1086    /// each individually and seeing if the resulting document roundtrips.
1087    /// Brute-force and expensive, but produces nicer output.  Note that the
1088    /// result may not in fact be minimal.
1089    ///
1090    /// ```rust
1091    /// # use comrak::{markdown_to_commonmark, Options};
1092    /// let mut options = Options::default();
1093    /// let input = "__hi";
1094    ///
1095    /// assert_eq!(markdown_to_commonmark(input, &options),
1096    ///            "\\_\\_hi\n");
1097    ///
1098    /// options.render.experimental_minimize_commonmark = true;
1099    /// assert_eq!(markdown_to_commonmark(input, &options),
1100    ///            "__hi\n");
1101    /// ```
1102    #[cfg_attr(feature = "bon", builder(default))]
1103    pub experimental_minimize_commonmark: bool,
1104}
1105
1106#[derive(Default, Debug, Clone)]
1107#[cfg_attr(feature = "bon", derive(Builder))]
1108/// Umbrella plugins struct.
1109pub struct Plugins<'p> {
1110    /// Configure render-time plugins.
1111    #[cfg_attr(feature = "bon", builder(default))]
1112    pub render: RenderPlugins<'p>,
1113}
1114
1115#[derive(Default, Clone)]
1116#[cfg_attr(feature = "bon", derive(Builder))]
1117/// Plugins for alternative rendering.
1118pub struct RenderPlugins<'p> {
1119    /// Provide a syntax highlighter adapter implementation for syntax
1120    /// highlighting of codefence blocks.
1121    /// ```
1122    /// # use comrak::{markdown_to_html, Options, Plugins, markdown_to_html_with_plugins};
1123    /// # use comrak::adapters::SyntaxHighlighterAdapter;
1124    /// use std::collections::HashMap;
1125    /// use std::io::{self, Write};
1126    /// let options = Options::default();
1127    /// let mut plugins = Plugins::default();
1128    /// let input = "```rust\nfn main<'a>();\n```";
1129    ///
1130    /// assert_eq!(markdown_to_html_with_plugins(input, &options, &plugins),
1131    ///            "<pre><code class=\"language-rust\">fn main&lt;'a&gt;();\n</code></pre>\n");
1132    ///
1133    /// pub struct MockAdapter {}
1134    /// impl SyntaxHighlighterAdapter for MockAdapter {
1135    ///     fn write_highlighted(&self, output: &mut dyn Write, lang: Option<&str>, code: &str) -> io::Result<()> {
1136    ///         write!(output, "<span class=\"lang-{}\">{}</span>", lang.unwrap(), code)
1137    ///     }
1138    ///
1139    ///     fn write_pre_tag(&self, output: &mut dyn Write, _attributes: HashMap<String, String>) -> io::Result<()> {
1140    ///         output.write_all(b"<pre lang=\"rust\">")
1141    ///     }
1142    ///
1143    ///     fn write_code_tag(&self, output: &mut dyn Write, _attributes: HashMap<String, String>) -> io::Result<()> {
1144    ///         output.write_all(b"<code class=\"language-rust\">")
1145    ///     }
1146    /// }
1147    ///
1148    /// let adapter = MockAdapter {};
1149    /// plugins.render.codefence_syntax_highlighter = Some(&adapter);
1150    ///
1151    /// assert_eq!(markdown_to_html_with_plugins(input, &options, &plugins),
1152    ///            "<pre lang=\"rust\"><code class=\"language-rust\"><span class=\"lang-rust\">fn main<'a>();\n</span></code></pre>\n");
1153    /// ```
1154    pub codefence_syntax_highlighter: Option<&'p dyn SyntaxHighlighterAdapter>,
1155
1156    /// Optional heading adapter
1157    pub heading_adapter: Option<&'p dyn HeadingAdapter>,
1158}
1159
1160impl Debug for RenderPlugins<'_> {
1161    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1162        f.debug_struct("RenderPlugins")
1163            .field(
1164                "codefence_syntax_highlighter",
1165                &"impl SyntaxHighlighterAdapter",
1166            )
1167            .finish()
1168    }
1169}
1170
1171/// A reference link's resolved details.
1172#[derive(Clone, Debug)]
1173pub struct ResolvedReference {
1174    /// The destination URL of the reference link.
1175    pub url: String,
1176
1177    /// The text of the link.
1178    pub title: String,
1179}
1180
1181struct FootnoteDefinition<'a> {
1182    ix: Option<u32>,
1183    node: &'a AstNode<'a>,
1184    name: String,
1185    total_references: u32,
1186}
1187
1188impl<'a, 'o, 'c> Parser<'a, 'o, 'c>
1189where
1190    'c: 'o,
1191{
1192    fn new(arena: &'a Arena<AstNode<'a>>, root: &'a AstNode<'a>, options: &'o Options<'c>) -> Self {
1193        Parser {
1194            arena,
1195            refmap: RefMap::new(),
1196            root,
1197            current: root,
1198            line_number: 0,
1199            offset: 0,
1200            column: 0,
1201            thematic_break_kill_pos: 0,
1202            first_nonspace: 0,
1203            first_nonspace_column: 0,
1204            indent: 0,
1205            blank: false,
1206            partially_consumed_tab: false,
1207            curline_len: 0,
1208            curline_end_col: 0,
1209            last_line_length: 0,
1210            last_buffer_ended_with_cr: false,
1211            total_size: 0,
1212            options,
1213        }
1214    }
1215
1216    fn feed(&mut self, linebuf: &mut Vec<u8>, mut s: &str, eof: bool) {
1217        if let (0, Some(delimiter)) = (
1218            self.total_size,
1219            &self.options.extension.front_matter_delimiter,
1220        ) {
1221            if let Some((front_matter, rest)) = split_off_front_matter(s, delimiter) {
1222                let lines = front_matter
1223                    .as_bytes()
1224                    .iter()
1225                    .filter(|b| **b == b'\n')
1226                    .count();
1227
1228                let mut stripped_front_matter = front_matter.to_string();
1229                strings::remove_trailing_blank_lines(&mut stripped_front_matter);
1230                let stripped_lines = stripped_front_matter
1231                    .as_bytes()
1232                    .iter()
1233                    .filter(|b| **b == b'\n')
1234                    .count();
1235
1236                let node = self.add_child(
1237                    self.root,
1238                    NodeValue::FrontMatter(front_matter.to_string()),
1239                    1,
1240                );
1241                s = rest;
1242                self.finalize(node).unwrap();
1243
1244                node.data.borrow_mut().sourcepos = Sourcepos {
1245                    start: nodes::LineColumn { line: 1, column: 1 },
1246                    end: nodes::LineColumn {
1247                        line: 1 + stripped_lines,
1248                        column: delimiter.len(),
1249                    },
1250                };
1251                self.line_number += lines;
1252            }
1253        }
1254
1255        let s = s.as_bytes();
1256
1257        if s.len() > usize::MAX - self.total_size {
1258            self.total_size = usize::MAX;
1259        } else {
1260            self.total_size += s.len();
1261        }
1262
1263        let mut buffer = 0;
1264        if self.last_buffer_ended_with_cr && !s.is_empty() && s[0] == b'\n' {
1265            buffer += 1;
1266        }
1267        self.last_buffer_ended_with_cr = false;
1268
1269        let end = s.len();
1270
1271        while buffer < end {
1272            let mut process = false;
1273            let mut eol = buffer;
1274            while eol < end {
1275                if strings::is_line_end_char(s[eol]) {
1276                    process = true;
1277                    break;
1278                }
1279                if s[eol] == 0 {
1280                    break;
1281                }
1282                eol += 1;
1283            }
1284
1285            if eol >= end && eof {
1286                process = true;
1287            }
1288
1289            if process {
1290                if !linebuf.is_empty() {
1291                    linebuf.extend_from_slice(&s[buffer..eol]);
1292                    self.process_line(linebuf);
1293                    linebuf.truncate(0);
1294                } else {
1295                    self.process_line(&s[buffer..eol]);
1296                }
1297            } else if eol < end && s[eol] == b'\0' {
1298                linebuf.extend_from_slice(&s[buffer..eol]);
1299                linebuf.extend_from_slice(&"\u{fffd}".to_string().into_bytes());
1300            } else {
1301                linebuf.extend_from_slice(&s[buffer..eol]);
1302            }
1303
1304            buffer = eol;
1305            if buffer < end {
1306                if s[buffer] == b'\0' {
1307                    buffer += 1;
1308                } else {
1309                    if s[buffer] == b'\r' {
1310                        buffer += 1;
1311                        if buffer == end {
1312                            self.last_buffer_ended_with_cr = true;
1313                        }
1314                    }
1315                    if buffer < end && s[buffer] == b'\n' {
1316                        buffer += 1;
1317                    }
1318                }
1319            }
1320        }
1321    }
1322
1323    fn scan_thematic_break_inner(&mut self, line: &[u8]) -> (usize, bool) {
1324        let mut i = self.first_nonspace;
1325
1326        if i >= line.len() {
1327            return (i, false);
1328        }
1329
1330        let c = line[i];
1331        if c != b'*' && c != b'_' && c != b'-' {
1332            return (i, false);
1333        }
1334
1335        let mut count = 1;
1336        let mut nextc;
1337        loop {
1338            i += 1;
1339            if i >= line.len() {
1340                return (i, false);
1341            }
1342            nextc = line[i];
1343
1344            if nextc == c {
1345                count += 1;
1346            } else if nextc != b' ' && nextc != b'\t' {
1347                break;
1348            }
1349        }
1350
1351        if count >= 3 && (nextc == b'\r' || nextc == b'\n') {
1352            ((i - self.first_nonspace) + 1, true)
1353        } else {
1354            (i, false)
1355        }
1356    }
1357
1358    fn scan_thematic_break(&mut self, line: &[u8]) -> Option<usize> {
1359        let (offset, found) = self.scan_thematic_break_inner(line);
1360        if !found {
1361            self.thematic_break_kill_pos = offset;
1362            None
1363        } else {
1364            Some(offset)
1365        }
1366    }
1367
1368    fn find_first_nonspace(&mut self, line: &[u8]) {
1369        let mut chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
1370
1371        if self.first_nonspace <= self.offset {
1372            self.first_nonspace = self.offset;
1373            self.first_nonspace_column = self.column;
1374
1375            loop {
1376                if self.first_nonspace >= line.len() {
1377                    break;
1378                }
1379                match line[self.first_nonspace] {
1380                    32 => {
1381                        self.first_nonspace += 1;
1382                        self.first_nonspace_column += 1;
1383                        chars_to_tab -= 1;
1384                        if chars_to_tab == 0 {
1385                            chars_to_tab = TAB_STOP;
1386                        }
1387                    }
1388                    9 => {
1389                        self.first_nonspace += 1;
1390                        self.first_nonspace_column += chars_to_tab;
1391                        chars_to_tab = TAB_STOP;
1392                    }
1393                    _ => break,
1394                }
1395            }
1396        }
1397
1398        self.indent = self.first_nonspace_column - self.column;
1399        self.blank = self.first_nonspace < line.len()
1400            && strings::is_line_end_char(line[self.first_nonspace]);
1401    }
1402
1403    fn process_line(&mut self, line: &[u8]) {
1404        let mut new_line: Vec<u8>;
1405        let line = if line.is_empty() || !strings::is_line_end_char(*line.last().unwrap()) {
1406            new_line = line.into();
1407            new_line.push(b'\n');
1408            &new_line
1409        } else {
1410            line
1411        };
1412
1413        self.curline_len = line.len();
1414        self.curline_end_col = line.len();
1415        if self.curline_end_col > 0 && line[self.curline_end_col - 1] == b'\n' {
1416            self.curline_end_col -= 1;
1417        }
1418        if self.curline_end_col > 0 && line[self.curline_end_col - 1] == b'\r' {
1419            self.curline_end_col -= 1;
1420        }
1421
1422        self.offset = 0;
1423        self.column = 0;
1424        self.first_nonspace = 0;
1425        self.first_nonspace_column = 0;
1426        self.indent = 0;
1427        self.thematic_break_kill_pos = 0;
1428        self.blank = false;
1429        self.partially_consumed_tab = false;
1430
1431        if self.line_number == 0
1432            && line.len() >= 3
1433            && unsafe { str::from_utf8_unchecked(line) }.starts_with('\u{feff}')
1434        {
1435            self.offset += 3;
1436        }
1437
1438        self.line_number += 1;
1439
1440        let mut all_matched = true;
1441        if let Some(last_matched_container) = self.check_open_blocks(line, &mut all_matched) {
1442            let mut container = last_matched_container;
1443            let current = self.current;
1444            self.open_new_blocks(&mut container, line, all_matched);
1445
1446            if current.same_node(self.current) {
1447                self.add_text_to_container(container, last_matched_container, line);
1448            }
1449        }
1450
1451        self.last_line_length = self.curline_end_col;
1452
1453        self.curline_len = 0;
1454        self.curline_end_col = 0;
1455    }
1456
1457    fn check_open_blocks(
1458        &mut self,
1459        line: &[u8],
1460        all_matched: &mut bool,
1461    ) -> Option<&'a AstNode<'a>> {
1462        let (new_all_matched, mut container, should_continue) =
1463            self.check_open_blocks_inner(self.root, line);
1464
1465        *all_matched = new_all_matched;
1466        if !*all_matched {
1467            container = container.parent().unwrap();
1468        }
1469
1470        if !should_continue {
1471            None
1472        } else {
1473            Some(container)
1474        }
1475    }
1476
1477    fn check_open_blocks_inner(
1478        &mut self,
1479        mut container: &'a AstNode<'a>,
1480        line: &[u8],
1481    ) -> (bool, &'a AstNode<'a>, bool) {
1482        let mut should_continue = true;
1483
1484        while nodes::last_child_is_open(container) {
1485            container = container.last_child().unwrap();
1486            let ast = &mut *container.data.borrow_mut();
1487
1488            self.find_first_nonspace(line);
1489
1490            match ast.value {
1491                NodeValue::BlockQuote => {
1492                    if !self.parse_block_quote_prefix(line) {
1493                        return (false, container, should_continue);
1494                    }
1495                }
1496                NodeValue::Item(ref nl) => {
1497                    if !self.parse_node_item_prefix(line, container, nl) {
1498                        return (false, container, should_continue);
1499                    }
1500                }
1501                NodeValue::DescriptionItem(ref di) => {
1502                    if !self.parse_description_item_prefix(line, container, di) {
1503                        return (false, container, should_continue);
1504                    }
1505                }
1506                NodeValue::CodeBlock(..) => {
1507                    if !self.parse_code_block_prefix(line, container, ast, &mut should_continue) {
1508                        return (false, container, should_continue);
1509                    }
1510                }
1511                NodeValue::HtmlBlock(ref nhb) => {
1512                    if !self.parse_html_block_prefix(nhb.block_type) {
1513                        return (false, container, should_continue);
1514                    }
1515                }
1516                NodeValue::Paragraph => {
1517                    if self.blank {
1518                        return (false, container, should_continue);
1519                    }
1520                }
1521                NodeValue::Table(..) => {
1522                    if !table::matches(&line[self.first_nonspace..], self.options.extension.spoiler)
1523                    {
1524                        return (false, container, should_continue);
1525                    }
1526                    continue;
1527                }
1528                NodeValue::Heading(..) | NodeValue::TableRow(..) | NodeValue::TableCell => {
1529                    return (false, container, should_continue);
1530                }
1531                NodeValue::FootnoteDefinition(..) => {
1532                    if !self.parse_footnote_definition_block_prefix(line) {
1533                        return (false, container, should_continue);
1534                    }
1535                }
1536                NodeValue::MultilineBlockQuote(..) => {
1537                    if !self.parse_multiline_block_quote_prefix(
1538                        line,
1539                        container,
1540                        ast,
1541                        &mut should_continue,
1542                    ) {
1543                        return (false, container, should_continue);
1544                    }
1545                }
1546                NodeValue::Alert(ref alert) => {
1547                    if alert.multiline {
1548                        if !self.parse_multiline_block_quote_prefix(
1549                            line,
1550                            container,
1551                            ast,
1552                            &mut should_continue,
1553                        ) {
1554                            return (false, container, should_continue);
1555                        }
1556                    } else if !self.parse_block_quote_prefix(line) {
1557                        return (false, container, should_continue);
1558                    }
1559                }
1560                _ => {}
1561            }
1562        }
1563
1564        (true, container, should_continue)
1565    }
1566
1567    fn is_not_greentext(&mut self, line: &[u8]) -> bool {
1568        !self.options.extension.greentext || strings::is_space_or_tab(line[self.first_nonspace + 1])
1569    }
1570
1571    fn setext_heading_line(&mut self, s: &[u8]) -> Option<SetextChar> {
1572        match self.options.render.ignore_setext {
1573            false => scanners::setext_heading_line(s),
1574            true => None,
1575        }
1576    }
1577
1578    fn detect_multiline_blockquote(
1579        &mut self,
1580        line: &[u8],
1581        indented: bool,
1582        matched: &mut usize,
1583    ) -> bool {
1584        !indented
1585            && self.options.extension.multiline_block_quotes
1586            && unwrap_into(
1587                scanners::open_multiline_block_quote_fence(&line[self.first_nonspace..]),
1588                matched,
1589            )
1590    }
1591
1592    fn handle_multiline_blockquote(
1593        &mut self,
1594        container: &mut &'a Node<'a, RefCell<Ast>>,
1595        line: &[u8],
1596        indented: bool,
1597        matched: &mut usize,
1598    ) -> bool {
1599        if !self.detect_multiline_blockquote(line, indented, matched) {
1600            return false;
1601        }
1602
1603        let first_nonspace = self.first_nonspace;
1604        let offset = self.offset;
1605        let nmbc = NodeMultilineBlockQuote {
1606            fence_length: *matched,
1607            fence_offset: first_nonspace - offset,
1608        };
1609
1610        *container = self.add_child(
1611            container,
1612            NodeValue::MultilineBlockQuote(nmbc),
1613            self.first_nonspace + 1,
1614        );
1615
1616        self.advance_offset(line, first_nonspace + *matched - offset, false);
1617
1618        true
1619    }
1620
1621    fn detect_blockquote(&mut self, line: &[u8], indented: bool) -> bool {
1622        !indented && line[self.first_nonspace] == b'>' && self.is_not_greentext(line)
1623    }
1624
1625    fn handle_blockquote(
1626        &mut self,
1627        container: &mut &'a Node<'a, RefCell<Ast>>,
1628        line: &[u8],
1629        indented: bool,
1630    ) -> bool {
1631        if !self.detect_blockquote(line, indented) {
1632            return false;
1633        }
1634
1635        let blockquote_startpos = self.first_nonspace;
1636
1637        let offset = self.first_nonspace + 1 - self.offset;
1638        self.advance_offset(line, offset, false);
1639        if strings::is_space_or_tab(line[self.offset]) {
1640            self.advance_offset(line, 1, true);
1641        }
1642        *container = self.add_child(container, NodeValue::BlockQuote, blockquote_startpos + 1);
1643
1644        true
1645    }
1646
1647    fn detect_atx_heading(&mut self, line: &[u8], indented: bool, matched: &mut usize) -> bool {
1648        !indented
1649            && unwrap_into(
1650                scanners::atx_heading_start(&line[self.first_nonspace..]),
1651                matched,
1652            )
1653    }
1654
1655    fn handle_atx_heading(
1656        &mut self,
1657        container: &mut &'a Node<'a, RefCell<Ast>>,
1658        line: &[u8],
1659        indented: bool,
1660        matched: &mut usize,
1661    ) -> bool {
1662        if !self.detect_atx_heading(line, indented, matched) {
1663            return false;
1664        }
1665
1666        let heading_startpos = self.first_nonspace;
1667        let offset = self.offset;
1668        self.advance_offset(line, heading_startpos + *matched - offset, false);
1669        *container = self.add_child(
1670            container,
1671            NodeValue::Heading(NodeHeading::default()),
1672            heading_startpos + 1,
1673        );
1674
1675        let mut hashpos = line[self.first_nonspace..]
1676            .iter()
1677            .position(|&c| c == b'#')
1678            .unwrap()
1679            + self.first_nonspace;
1680        let mut level = 0;
1681        while line[hashpos] == b'#' {
1682            level += 1;
1683            hashpos += 1;
1684        }
1685
1686        let container_ast = &mut container.data.borrow_mut();
1687        container_ast.value = NodeValue::Heading(NodeHeading {
1688            level,
1689            setext: false,
1690        });
1691        container_ast.internal_offset = *matched;
1692
1693        true
1694    }
1695
1696    fn detect_code_fence(&mut self, line: &[u8], indented: bool, matched: &mut usize) -> bool {
1697        !indented
1698            && unwrap_into(
1699                scanners::open_code_fence(&line[self.first_nonspace..]),
1700                matched,
1701            )
1702    }
1703
1704    fn handle_code_fence(
1705        &mut self,
1706        container: &mut &'a Node<'a, RefCell<Ast>>,
1707        line: &[u8],
1708        indented: bool,
1709        matched: &mut usize,
1710    ) -> bool {
1711        if !self.detect_code_fence(line, indented, matched) {
1712            return false;
1713        }
1714
1715        let first_nonspace = self.first_nonspace;
1716        let offset = self.offset;
1717        let ncb = NodeCodeBlock {
1718            fenced: true,
1719            fence_char: line[first_nonspace],
1720            fence_length: *matched,
1721            fence_offset: first_nonspace - offset,
1722            info: String::with_capacity(10),
1723            literal: String::new(),
1724        };
1725        *container = self.add_child(
1726            container,
1727            NodeValue::CodeBlock(ncb),
1728            self.first_nonspace + 1,
1729        );
1730        self.advance_offset(line, first_nonspace + *matched - offset, false);
1731
1732        true
1733    }
1734
1735    fn detect_html_block(
1736        &mut self,
1737        container: &AstNode,
1738        line: &[u8],
1739        indented: bool,
1740        matched: &mut usize,
1741    ) -> bool {
1742        !indented
1743            && (unwrap_into(
1744                scanners::html_block_start(&line[self.first_nonspace..]),
1745                matched,
1746            ) || (!node_matches!(container, NodeValue::Paragraph)
1747                && unwrap_into(
1748                    scanners::html_block_start_7(&line[self.first_nonspace..]),
1749                    matched,
1750                )))
1751    }
1752
1753    fn handle_html_block(
1754        &mut self,
1755        container: &mut &'a Node<'a, RefCell<Ast>>,
1756        line: &[u8],
1757        indented: bool,
1758        matched: &mut usize,
1759    ) -> bool {
1760        if !self.detect_html_block(container, line, indented, matched) {
1761            return false;
1762        }
1763
1764        let nhb = NodeHtmlBlock {
1765            block_type: *matched as u8,
1766            literal: String::new(),
1767        };
1768
1769        *container = self.add_child(
1770            container,
1771            NodeValue::HtmlBlock(nhb),
1772            self.first_nonspace + 1,
1773        );
1774
1775        true
1776    }
1777
1778    fn detect_setext_heading(
1779        &mut self,
1780        container: &AstNode,
1781        line: &[u8],
1782        indented: bool,
1783        sc: &mut scanners::SetextChar,
1784    ) -> bool {
1785        !indented
1786            && node_matches!(container, NodeValue::Paragraph)
1787            && unwrap_into(self.setext_heading_line(&line[self.first_nonspace..]), sc)
1788    }
1789
1790    fn handle_setext_heading(
1791        &mut self,
1792        container: &mut &'a Node<'a, RefCell<Ast>>,
1793        line: &[u8],
1794        indented: bool,
1795        sc: &mut scanners::SetextChar,
1796    ) -> bool {
1797        if !self.detect_setext_heading(container, line, indented, sc) {
1798            return false;
1799        }
1800
1801        let has_content = {
1802            let mut ast = container.data.borrow_mut();
1803            self.resolve_reference_link_definitions(&mut ast.content)
1804        };
1805        if has_content {
1806            container.data.borrow_mut().value = NodeValue::Heading(NodeHeading {
1807                level: match sc {
1808                    scanners::SetextChar::Equals => 1,
1809                    scanners::SetextChar::Hyphen => 2,
1810                },
1811                setext: true,
1812            });
1813            let adv = line.len() - 1 - self.offset;
1814            self.advance_offset(line, adv, false);
1815        }
1816
1817        true
1818    }
1819
1820    fn detect_thematic_break(
1821        &mut self,
1822        container: &AstNode,
1823        line: &[u8],
1824        indented: bool,
1825        matched: &mut usize,
1826        all_matched: bool,
1827    ) -> bool {
1828        !indented
1829            && !matches!(
1830                (&container.data.borrow().value, all_matched),
1831                (&NodeValue::Paragraph, false)
1832            )
1833            && self.thematic_break_kill_pos <= self.first_nonspace
1834            && unwrap_into(self.scan_thematic_break(line), matched)
1835    }
1836
1837    fn handle_thematic_break(
1838        &mut self,
1839        container: &mut &'a Node<'a, RefCell<Ast>>,
1840        line: &[u8],
1841        indented: bool,
1842        matched: &mut usize,
1843        all_matched: bool,
1844    ) -> bool {
1845        if !self.detect_thematic_break(container, line, indented, matched, all_matched) {
1846            return false;
1847        }
1848
1849        *container = self.add_child(container, NodeValue::ThematicBreak, self.first_nonspace + 1);
1850
1851        let adv = line.len() - 1 - self.offset;
1852        container.data.borrow_mut().sourcepos.end = (self.line_number, adv).into();
1853        self.advance_offset(line, adv, false);
1854
1855        true
1856    }
1857
1858    fn detect_footnote(
1859        &mut self,
1860        line: &[u8],
1861        indented: bool,
1862        matched: &mut usize,
1863        depth: usize,
1864    ) -> bool {
1865        !indented
1866            && self.options.extension.footnotes
1867            && depth < MAX_LIST_DEPTH
1868            && unwrap_into(
1869                scanners::footnote_definition(&line[self.first_nonspace..]),
1870                matched,
1871            )
1872    }
1873
1874    fn handle_footnote(
1875        &mut self,
1876        container: &mut &'a Node<'a, RefCell<Ast>>,
1877        line: &[u8],
1878        indented: bool,
1879        matched: &mut usize,
1880        depth: usize,
1881    ) -> bool {
1882        if !self.detect_footnote(line, indented, matched, depth) {
1883            return false;
1884        }
1885
1886        let mut c = &line[self.first_nonspace + 2..self.first_nonspace + *matched];
1887        c = c.split(|&e| e == b']').next().unwrap();
1888        let offset = self.first_nonspace + *matched - self.offset;
1889        self.advance_offset(line, offset, false);
1890        *container = self.add_child(
1891            container,
1892            NodeValue::FootnoteDefinition(NodeFootnoteDefinition {
1893                name: str::from_utf8(c).unwrap().to_string(),
1894                total_references: 0,
1895            }),
1896            self.first_nonspace + 1,
1897        );
1898        container.data.borrow_mut().internal_offset = *matched;
1899
1900        true
1901    }
1902
1903    fn detect_description_list(
1904        &mut self,
1905        container: &mut &'a Node<'a, RefCell<Ast>>,
1906        line: &[u8],
1907        indented: bool,
1908        matched: &mut usize,
1909    ) -> bool {
1910        !indented
1911            && self.options.extension.description_lists
1912            && unwrap_into(
1913                scanners::description_item_start(&line[self.first_nonspace..]),
1914                matched,
1915            )
1916            && self.parse_desc_list_details(container, *matched)
1917    }
1918
1919    fn handle_description_list(
1920        &mut self,
1921        container: &mut &'a Node<'a, RefCell<Ast>>,
1922        line: &[u8],
1923        indented: bool,
1924        matched: &mut usize,
1925    ) -> bool {
1926        if !self.detect_description_list(container, line, indented, matched) {
1927            return false;
1928        }
1929
1930        let offset = self.first_nonspace + *matched - self.offset;
1931        self.advance_offset(line, offset, false);
1932        if strings::is_space_or_tab(line[self.offset]) {
1933            self.advance_offset(line, 1, true);
1934        }
1935
1936        true
1937    }
1938
1939    fn detect_list(
1940        &mut self,
1941        container: &AstNode,
1942        line: &[u8],
1943        indented: bool,
1944        matched: &mut usize,
1945        depth: usize,
1946        nl: &mut NodeList,
1947    ) -> bool {
1948        (!indented || node_matches!(container, NodeValue::List(..)))
1949            && self.indent < 4
1950            && depth < MAX_LIST_DEPTH
1951            && unwrap_into_2(
1952                parse_list_marker(
1953                    line,
1954                    self.first_nonspace,
1955                    node_matches!(container, NodeValue::Paragraph),
1956                ),
1957                matched,
1958                nl,
1959            )
1960    }
1961
1962    fn handle_list(
1963        &mut self,
1964        container: &mut &'a Node<'a, RefCell<Ast>>,
1965        line: &[u8],
1966        indented: bool,
1967        matched: &mut usize,
1968        depth: usize,
1969        nl: &mut NodeList,
1970    ) -> bool {
1971        if !self.detect_list(container, line, indented, matched, depth, nl) {
1972            return false;
1973        }
1974
1975        let offset = self.first_nonspace + *matched - self.offset;
1976        self.advance_offset(line, offset, false);
1977        let (save_partially_consumed_tab, save_offset, save_column) =
1978            (self.partially_consumed_tab, self.offset, self.column);
1979
1980        while self.column - save_column <= 5 && strings::is_space_or_tab(line[self.offset]) {
1981            self.advance_offset(line, 1, true);
1982        }
1983
1984        let i = self.column - save_column;
1985        if !(1..5).contains(&i) || strings::is_line_end_char(line[self.offset]) {
1986            nl.padding = *matched + 1;
1987            self.offset = save_offset;
1988            self.column = save_column;
1989            self.partially_consumed_tab = save_partially_consumed_tab;
1990            if i > 0 {
1991                self.advance_offset(line, 1, true);
1992            }
1993        } else {
1994            nl.padding = *matched + i;
1995        }
1996
1997        nl.marker_offset = self.indent;
1998
1999        if match container.data.borrow().value {
2000            NodeValue::List(ref mnl) => !lists_match(nl, mnl),
2001            _ => true,
2002        } {
2003            *container = self.add_child(container, NodeValue::List(*nl), self.first_nonspace + 1);
2004        }
2005
2006        *container = self.add_child(container, NodeValue::Item(*nl), self.first_nonspace + 1);
2007
2008        true
2009    }
2010
2011    fn detect_code_block(&mut self, indented: bool, maybe_lazy: bool) -> bool {
2012        indented && !maybe_lazy && !self.blank
2013    }
2014
2015    fn handle_code_block(
2016        &mut self,
2017        container: &mut &'a Node<'a, RefCell<Ast>>,
2018        line: &[u8],
2019        indented: bool,
2020        maybe_lazy: bool,
2021    ) -> bool {
2022        if !self.detect_code_block(indented, maybe_lazy) {
2023            return false;
2024        }
2025
2026        self.advance_offset(line, CODE_INDENT, true);
2027        let ncb = NodeCodeBlock {
2028            fenced: false,
2029            fence_char: 0,
2030            fence_length: 0,
2031            fence_offset: 0,
2032            info: String::new(),
2033            literal: String::new(),
2034        };
2035        *container = self.add_child(container, NodeValue::CodeBlock(ncb), self.offset + 1);
2036
2037        true
2038    }
2039
2040    fn detect_alert(&mut self, line: &[u8], indented: bool, alert_type: &mut AlertType) -> bool {
2041        !indented
2042            && self.options.extension.alerts
2043            && line[self.first_nonspace] == b'>'
2044            && unwrap_into(
2045                scanners::alert_start(&line[self.first_nonspace..]),
2046                alert_type,
2047            )
2048    }
2049
2050    fn handle_alert(
2051        &mut self,
2052        container: &mut &'a Node<'a, RefCell<Ast>>,
2053        line: &[u8],
2054        indented: bool,
2055    ) -> bool {
2056        let mut alert_type: AlertType = Default::default();
2057
2058        if !self.detect_alert(line, indented, &mut alert_type) {
2059            return false;
2060        }
2061
2062        let alert_startpos = self.first_nonspace;
2063        let mut title_startpos = self.first_nonspace;
2064        let mut fence_length = 0;
2065
2066        while line[title_startpos] != b']' {
2067            if line[title_startpos] == b'>' {
2068                fence_length += 1
2069            }
2070            title_startpos += 1;
2071        }
2072        title_startpos += 1;
2073
2074        if fence_length == 2
2075            || (fence_length >= 3 && !self.options.extension.multiline_block_quotes)
2076        {
2077            return false;
2078        }
2079
2080        // anything remaining on this line is considered an alert title
2081        let mut tmp = entity::unescape_html(&line[title_startpos..]);
2082        strings::trim(&mut tmp);
2083        strings::unescape(&mut tmp);
2084
2085        let na = NodeAlert {
2086            alert_type,
2087            multiline: fence_length >= 3,
2088            fence_length,
2089            fence_offset: self.first_nonspace - self.offset,
2090            title: if tmp.is_empty() {
2091                None
2092            } else {
2093                Some(String::from_utf8(tmp).unwrap())
2094            },
2095        };
2096
2097        let offset = self.curline_len - self.offset - 1;
2098        self.advance_offset(line, offset, false);
2099
2100        *container = self.add_child(container, NodeValue::Alert(na), alert_startpos + 1);
2101
2102        true
2103    }
2104
2105    fn open_new_blocks(&mut self, container: &mut &'a AstNode<'a>, line: &[u8], all_matched: bool) {
2106        let mut matched: usize = 0;
2107        let mut nl: NodeList = NodeList::default();
2108        let mut sc: scanners::SetextChar = scanners::SetextChar::Equals;
2109        let mut maybe_lazy = node_matches!(self.current, NodeValue::Paragraph);
2110        let mut depth = 0;
2111
2112        while !node_matches!(
2113            container,
2114            NodeValue::CodeBlock(..) | NodeValue::HtmlBlock(..)
2115        ) {
2116            depth += 1;
2117            self.find_first_nonspace(line);
2118            let indented = self.indent >= CODE_INDENT;
2119
2120            if self.handle_alert(container, line, indented)
2121                || self.handle_multiline_blockquote(container, line, indented, &mut matched)
2122                || self.handle_blockquote(container, line, indented)
2123                || self.handle_atx_heading(container, line, indented, &mut matched)
2124                || self.handle_code_fence(container, line, indented, &mut matched)
2125                || self.handle_html_block(container, line, indented, &mut matched)
2126                || self.handle_setext_heading(container, line, indented, &mut sc)
2127                || self.handle_thematic_break(container, line, indented, &mut matched, all_matched)
2128                || self.handle_footnote(container, line, indented, &mut matched, depth)
2129                || self.handle_description_list(container, line, indented, &mut matched)
2130                || self.handle_list(container, line, indented, &mut matched, depth, &mut nl)
2131                || self.handle_code_block(container, line, indented, maybe_lazy)
2132            {
2133                // block handled
2134            } else {
2135                let new_container = if !indented && self.options.extension.table {
2136                    table::try_opening_block(self, container, line)
2137                } else {
2138                    None
2139                };
2140
2141                match new_container {
2142                    Some((new_container, replace, mark_visited)) => {
2143                        if replace {
2144                            container.insert_after(new_container);
2145                            container.detach();
2146                            *container = new_container;
2147                        } else {
2148                            *container = new_container;
2149                        }
2150                        if mark_visited {
2151                            container.data.borrow_mut().table_visited = true;
2152                        }
2153                    }
2154                    _ => break,
2155                }
2156            }
2157
2158            if container.data.borrow().value.accepts_lines() {
2159                break;
2160            }
2161
2162            maybe_lazy = false;
2163        }
2164    }
2165
2166    fn advance_offset(&mut self, line: &[u8], mut count: usize, columns: bool) {
2167        while count > 0 {
2168            match line[self.offset] {
2169                9 => {
2170                    let chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
2171                    if columns {
2172                        self.partially_consumed_tab = chars_to_tab > count;
2173                        let chars_to_advance = min(count, chars_to_tab);
2174                        self.column += chars_to_advance;
2175                        self.offset += if self.partially_consumed_tab { 0 } else { 1 };
2176                        count -= chars_to_advance;
2177                    } else {
2178                        self.partially_consumed_tab = false;
2179                        self.column += chars_to_tab;
2180                        self.offset += 1;
2181                        count -= 1;
2182                    }
2183                }
2184                _ => {
2185                    self.partially_consumed_tab = false;
2186                    self.offset += 1;
2187                    self.column += 1;
2188                    count -= 1;
2189                }
2190            }
2191        }
2192    }
2193
2194    fn parse_block_quote_prefix(&mut self, line: &[u8]) -> bool {
2195        let indent = self.indent;
2196        if indent <= 3 && line[self.first_nonspace] == b'>' && self.is_not_greentext(line) {
2197            self.advance_offset(line, indent + 1, true);
2198
2199            if strings::is_space_or_tab(line[self.offset]) {
2200                self.advance_offset(line, 1, true);
2201            }
2202
2203            return true;
2204        }
2205
2206        false
2207    }
2208
2209    fn parse_footnote_definition_block_prefix(&mut self, line: &[u8]) -> bool {
2210        if self.indent >= 4 {
2211            self.advance_offset(line, 4, true);
2212            true
2213        } else {
2214            line == b"\n" || line == b"\r\n"
2215        }
2216    }
2217
2218    fn parse_node_item_prefix(
2219        &mut self,
2220        line: &[u8],
2221        container: &'a AstNode<'a>,
2222        nl: &NodeList,
2223    ) -> bool {
2224        if self.indent >= nl.marker_offset + nl.padding {
2225            self.advance_offset(line, nl.marker_offset + nl.padding, true);
2226            true
2227        } else if self.blank && container.first_child().is_some() {
2228            let offset = self.first_nonspace - self.offset;
2229            self.advance_offset(line, offset, false);
2230            true
2231        } else {
2232            false
2233        }
2234    }
2235
2236    fn parse_description_item_prefix(
2237        &mut self,
2238        line: &[u8],
2239        container: &'a AstNode<'a>,
2240        di: &NodeDescriptionItem,
2241    ) -> bool {
2242        if self.indent >= di.marker_offset + di.padding {
2243            self.advance_offset(line, di.marker_offset + di.padding, true);
2244            true
2245        } else if self.blank && container.first_child().is_some() {
2246            let offset = self.first_nonspace - self.offset;
2247            self.advance_offset(line, offset, false);
2248            true
2249        } else {
2250            false
2251        }
2252    }
2253
2254    fn parse_code_block_prefix(
2255        &mut self,
2256        line: &[u8],
2257        container: &'a AstNode<'a>,
2258        ast: &mut Ast,
2259        should_continue: &mut bool,
2260    ) -> bool {
2261        let (fenced, fence_char, fence_length, fence_offset) = match ast.value {
2262            NodeValue::CodeBlock(ref ncb) => (
2263                ncb.fenced,
2264                ncb.fence_char,
2265                ncb.fence_length,
2266                ncb.fence_offset,
2267            ),
2268            _ => unreachable!(),
2269        };
2270
2271        if !fenced {
2272            if self.indent >= CODE_INDENT {
2273                self.advance_offset(line, CODE_INDENT, true);
2274                return true;
2275            } else if self.blank {
2276                let offset = self.first_nonspace - self.offset;
2277                self.advance_offset(line, offset, false);
2278                return true;
2279            }
2280            return false;
2281        }
2282
2283        let matched = if self.indent <= 3 && line[self.first_nonspace] == fence_char {
2284            scanners::close_code_fence(&line[self.first_nonspace..]).unwrap_or(0)
2285        } else {
2286            0
2287        };
2288
2289        if matched >= fence_length {
2290            *should_continue = false;
2291            self.advance_offset(line, matched, false);
2292            self.current = self.finalize_borrowed(container, ast).unwrap();
2293            return false;
2294        }
2295
2296        let mut i = fence_offset;
2297        while i > 0 && strings::is_space_or_tab(line[self.offset]) {
2298            self.advance_offset(line, 1, true);
2299            i -= 1;
2300        }
2301        true
2302    }
2303
2304    fn parse_html_block_prefix(&mut self, t: u8) -> bool {
2305        match t {
2306            1..=5 => true,
2307            6 | 7 => !self.blank,
2308            _ => unreachable!(),
2309        }
2310    }
2311
2312    fn parse_desc_list_details(&mut self, container: &mut &'a AstNode<'a>, matched: usize) -> bool {
2313        let mut tight = false;
2314        let last_child = match container.last_child() {
2315            Some(lc) => lc,
2316            None => {
2317                // Happens when the detail line is directly after the term,
2318                // without a blank line between.
2319                if !node_matches!(container, NodeValue::Paragraph) {
2320                    // If the container is not a paragraph, then this can't
2321                    // be a description list item.
2322                    return false;
2323                }
2324
2325                let parent = container.parent();
2326                if parent.is_none() {
2327                    return false;
2328                }
2329
2330                tight = true;
2331                *container = parent.unwrap();
2332                container.last_child().unwrap()
2333            }
2334        };
2335
2336        if node_matches!(last_child, NodeValue::Paragraph) {
2337            // We have found the details after the paragraph for the term.
2338            //
2339            // This paragraph is moved as a child of a new DescriptionTerm node.
2340            //
2341            // If the node before the paragraph is a description list, the item
2342            // is added to it. If not, create a new list.
2343
2344            last_child.detach();
2345            let last_child_sourcepos = last_child.data.borrow().sourcepos;
2346
2347            // TODO: description list sourcepos has issues.
2348            //
2349            // DescriptionItem:
2350            //   For all but the last, the end line/col is wrong.
2351            //   Where it should be l:c, it gives (l+1):0.
2352            //
2353            // DescriptionTerm:
2354            //   All are incorrect; they all give the start line/col of
2355            //   the DescriptionDetails, and the end line/col is completely off.
2356            //
2357            // DescriptionDetails:
2358            //   Same as the DescriptionItem.  All but last, the end line/col
2359            //   is (l+1):0.
2360            //
2361            // See crate::tests::description_lists::sourcepos.
2362            let list = match container.last_child() {
2363                Some(lc) if node_matches!(lc, NodeValue::DescriptionList) => {
2364                    reopen_ast_nodes(lc);
2365                    lc
2366                }
2367                _ => {
2368                    let list = self.add_child(
2369                        container,
2370                        NodeValue::DescriptionList,
2371                        self.first_nonspace + 1,
2372                    );
2373                    list.data.borrow_mut().sourcepos.start = last_child_sourcepos.start;
2374                    list
2375                }
2376            };
2377
2378            let metadata = NodeDescriptionItem {
2379                marker_offset: self.indent,
2380                padding: matched,
2381                tight,
2382            };
2383
2384            let item = self.add_child(
2385                list,
2386                NodeValue::DescriptionItem(metadata),
2387                self.first_nonspace + 1,
2388            );
2389            item.data.borrow_mut().sourcepos.start = last_child_sourcepos.start;
2390            let term = self.add_child(item, NodeValue::DescriptionTerm, self.first_nonspace + 1);
2391            let details =
2392                self.add_child(item, NodeValue::DescriptionDetails, self.first_nonspace + 1);
2393
2394            term.append(last_child);
2395
2396            *container = details;
2397
2398            true
2399        } else if node_matches!(last_child, NodeValue::DescriptionItem(..)) {
2400            let parent = last_child.parent().unwrap();
2401            let tight = match last_child.data.borrow().value {
2402                NodeValue::DescriptionItem(ref ndi) => ndi.tight,
2403                _ => false,
2404            };
2405
2406            let metadata = NodeDescriptionItem {
2407                marker_offset: self.indent,
2408                padding: matched,
2409                tight,
2410            };
2411
2412            let item = self.add_child(
2413                parent,
2414                NodeValue::DescriptionItem(metadata),
2415                self.first_nonspace + 1,
2416            );
2417
2418            let details =
2419                self.add_child(item, NodeValue::DescriptionDetails, self.first_nonspace + 1);
2420
2421            *container = details;
2422
2423            true
2424        } else {
2425            false
2426        }
2427    }
2428
2429    fn parse_multiline_block_quote_prefix(
2430        &mut self,
2431        line: &[u8],
2432        container: &'a AstNode<'a>,
2433        ast: &mut Ast,
2434        should_continue: &mut bool,
2435    ) -> bool {
2436        let (fence_length, fence_offset) = match ast.value {
2437            NodeValue::MultilineBlockQuote(ref node_value) => {
2438                (node_value.fence_length, node_value.fence_offset)
2439            }
2440            NodeValue::Alert(ref node_value) => (node_value.fence_length, node_value.fence_offset),
2441            _ => unreachable!(),
2442        };
2443
2444        let matched = if self.indent <= 3 && line[self.first_nonspace] == b'>' {
2445            scanners::close_multiline_block_quote_fence(&line[self.first_nonspace..]).unwrap_or(0)
2446        } else {
2447            0
2448        };
2449
2450        if matched >= fence_length {
2451            *should_continue = false;
2452            self.advance_offset(line, matched, false);
2453
2454            // The last child, like an indented codeblock, could be left open.
2455            // Make sure it's finalized.
2456            if nodes::last_child_is_open(container) {
2457                let child = container.last_child().unwrap();
2458                let child_ast = &mut *child.data.borrow_mut();
2459
2460                self.finalize_borrowed(child, child_ast).unwrap();
2461            }
2462
2463            self.current = self.finalize_borrowed(container, ast).unwrap();
2464            return false;
2465        }
2466
2467        let mut i = fence_offset;
2468        while i > 0 && strings::is_space_or_tab(line[self.offset]) {
2469            self.advance_offset(line, 1, true);
2470            i -= 1;
2471        }
2472        true
2473    }
2474
2475    fn add_child(
2476        &mut self,
2477        mut parent: &'a AstNode<'a>,
2478        value: NodeValue,
2479        start_column: usize,
2480    ) -> &'a AstNode<'a> {
2481        while !nodes::can_contain_type(parent, &value) {
2482            parent = self.finalize(parent).unwrap();
2483        }
2484
2485        assert!(start_column > 0);
2486
2487        let child = Ast::new(value, (self.line_number, start_column).into());
2488        let node = self.arena.alloc(Node::new(RefCell::new(child)));
2489        parent.append(node);
2490        node
2491    }
2492
2493    fn add_text_to_container(
2494        &mut self,
2495        mut container: &'a AstNode<'a>,
2496        last_matched_container: &'a AstNode<'a>,
2497        line: &[u8],
2498    ) {
2499        self.find_first_nonspace(line);
2500
2501        if self.blank {
2502            if let Some(last_child) = container.last_child() {
2503                last_child.data.borrow_mut().last_line_blank = true;
2504            }
2505        }
2506
2507        container.data.borrow_mut().last_line_blank = self.blank
2508            && match container.data.borrow().value {
2509                NodeValue::BlockQuote | NodeValue::Heading(..) | NodeValue::ThematicBreak => false,
2510                NodeValue::CodeBlock(ref ncb) => !ncb.fenced,
2511                NodeValue::Item(..) => {
2512                    container.first_child().is_some()
2513                        || container.data.borrow().sourcepos.start.line != self.line_number
2514                }
2515                NodeValue::MultilineBlockQuote(..) => false,
2516                NodeValue::Alert(..) => false,
2517                _ => true,
2518            };
2519
2520        let mut tmp = container;
2521        while let Some(parent) = tmp.parent() {
2522            parent.data.borrow_mut().last_line_blank = false;
2523            tmp = parent;
2524        }
2525
2526        if !self.current.same_node(last_matched_container)
2527            && container.same_node(last_matched_container)
2528            && !self.blank
2529            && (!self.options.extension.greentext
2530                || !matches!(
2531                    container.data.borrow().value,
2532                    NodeValue::BlockQuote | NodeValue::Document
2533                ))
2534            && node_matches!(self.current, NodeValue::Paragraph)
2535        {
2536            self.add_line(self.current, line);
2537        } else {
2538            while !self.current.same_node(last_matched_container) {
2539                self.current = self.finalize(self.current).unwrap();
2540            }
2541
2542            let add_text_result = match container.data.borrow().value {
2543                NodeValue::CodeBlock(..) => AddTextResult::LiteralText,
2544                NodeValue::HtmlBlock(ref nhb) => AddTextResult::HtmlBlock(nhb.block_type),
2545                _ => AddTextResult::Otherwise,
2546            };
2547
2548            match add_text_result {
2549                AddTextResult::LiteralText => {
2550                    self.add_line(container, line);
2551                }
2552                AddTextResult::HtmlBlock(block_type) => {
2553                    self.add_line(container, line);
2554
2555                    let matches_end_condition = match block_type {
2556                        1 => scanners::html_block_end_1(&line[self.first_nonspace..]),
2557                        2 => scanners::html_block_end_2(&line[self.first_nonspace..]),
2558                        3 => scanners::html_block_end_3(&line[self.first_nonspace..]),
2559                        4 => scanners::html_block_end_4(&line[self.first_nonspace..]),
2560                        5 => scanners::html_block_end_5(&line[self.first_nonspace..]),
2561                        _ => false,
2562                    };
2563
2564                    if matches_end_condition {
2565                        container = self.finalize(container).unwrap();
2566                    }
2567                }
2568                _ => {
2569                    if self.blank {
2570                        // do nothing
2571                    } else if container.data.borrow().value.accepts_lines() {
2572                        let mut line: Vec<u8> = line.into();
2573                        if let NodeValue::Heading(ref nh) = container.data.borrow().value {
2574                            if !nh.setext {
2575                                strings::chop_trailing_hashtags(&mut line);
2576                            }
2577                        };
2578                        let count = self.first_nonspace - self.offset;
2579
2580                        // In a rare case the above `chop` operation can leave
2581                        // the line shorter than the recorded `first_nonspace`
2582                        // This happens with ATX headers containing no header
2583                        // text, multiple spaces and trailing hashes, e.g
2584                        //
2585                        // ###     ###
2586                        //
2587                        // In this case `first_nonspace` indexes into the second
2588                        // set of hashes, while `chop_trailing_hashtags` truncates
2589                        // `line` to just `###` (the first three hashes).
2590                        // In this case there's no text to add, and no further
2591                        // processing to be done.
2592                        let have_line_text = self.first_nonspace <= line.len();
2593
2594                        if have_line_text {
2595                            self.advance_offset(&line, count, false);
2596                            self.add_line(container, &line);
2597                        }
2598                    } else {
2599                        container = self.add_child(
2600                            container,
2601                            NodeValue::Paragraph,
2602                            self.first_nonspace + 1,
2603                        );
2604                        let count = self.first_nonspace - self.offset;
2605                        self.advance_offset(line, count, false);
2606                        self.add_line(container, line);
2607                    }
2608                }
2609            }
2610
2611            self.current = container;
2612        }
2613    }
2614
2615    fn add_line(&mut self, node: &'a AstNode<'a>, line: &[u8]) {
2616        let mut ast = node.data.borrow_mut();
2617        assert!(ast.open);
2618        if self.partially_consumed_tab {
2619            self.offset += 1;
2620            let chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
2621            for _ in 0..chars_to_tab {
2622                ast.content.push(' ');
2623            }
2624        }
2625        if self.offset < line.len() {
2626            // since whitespace is stripped off the beginning of lines, we need to keep
2627            // track of how much was stripped off. This allows us to properly calculate
2628            // inline sourcepos during inline processing.
2629            ast.line_offsets.push(self.offset);
2630
2631            ast.content
2632                .push_str(str::from_utf8(&line[self.offset..]).unwrap());
2633        }
2634    }
2635
2636    fn finish(&mut self, remaining: Vec<u8>) -> &'a AstNode<'a> {
2637        if !remaining.is_empty() {
2638            self.process_line(&remaining);
2639        }
2640
2641        self.finalize_document();
2642        self.postprocess_text_nodes(self.root);
2643        self.root
2644    }
2645
2646    fn finalize_document(&mut self) {
2647        while !self.current.same_node(self.root) {
2648            self.current = self.finalize(self.current).unwrap();
2649        }
2650
2651        self.finalize(self.root);
2652
2653        self.refmap.max_ref_size = if self.total_size > 100000 {
2654            self.total_size
2655        } else {
2656            100000
2657        };
2658
2659        self.process_inlines();
2660        if self.options.extension.footnotes {
2661            self.process_footnotes();
2662        }
2663    }
2664
2665    fn finalize(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
2666        self.finalize_borrowed(node, &mut node.data.borrow_mut())
2667    }
2668
2669    fn resolve_reference_link_definitions(&mut self, content: &mut String) -> bool {
2670        let mut seeked = 0;
2671        {
2672            let mut pos = 0;
2673            let mut seek: &[u8] = content.as_bytes();
2674            while !seek.is_empty()
2675                && seek[0] == b'['
2676                && unwrap_into(self.parse_reference_inline(seek), &mut pos)
2677            {
2678                seek = &seek[pos..];
2679                seeked += pos;
2680            }
2681        }
2682
2683        if seeked != 0 {
2684            *content = content[seeked..].to_string();
2685        }
2686
2687        !strings::is_blank(content.as_bytes())
2688    }
2689
2690    fn finalize_borrowed(
2691        &mut self,
2692        node: &'a AstNode<'a>,
2693        ast: &mut Ast,
2694    ) -> Option<&'a AstNode<'a>> {
2695        assert!(ast.open);
2696        ast.open = false;
2697
2698        let content = &mut ast.content;
2699        let parent = node.parent();
2700
2701        if self.curline_len == 0 {
2702            ast.sourcepos.end = (self.line_number, self.last_line_length).into();
2703        } else if match ast.value {
2704            NodeValue::Document => true,
2705            NodeValue::CodeBlock(ref ncb) => ncb.fenced,
2706            NodeValue::MultilineBlockQuote(..) => true,
2707            _ => false,
2708        } {
2709            ast.sourcepos.end = (self.line_number, self.curline_end_col).into();
2710        } else if matches!(ast.value, NodeValue::ThematicBreak) {
2711            // sourcepos.end set during opening.
2712        } else {
2713            ast.sourcepos.end = (self.line_number - 1, self.last_line_length).into();
2714        }
2715
2716        match ast.value {
2717            NodeValue::Paragraph => {
2718                let has_content = self.resolve_reference_link_definitions(content);
2719                if !has_content {
2720                    node.detach();
2721                }
2722            }
2723            NodeValue::CodeBlock(ref mut ncb) => {
2724                if !ncb.fenced {
2725                    strings::remove_trailing_blank_lines(content);
2726                    content.push('\n');
2727                } else {
2728                    let mut pos = 0;
2729                    while pos < content.len() {
2730                        if strings::is_line_end_char(content.as_bytes()[pos]) {
2731                            break;
2732                        }
2733                        pos += 1;
2734                    }
2735                    assert!(pos < content.len());
2736
2737                    let mut tmp = entity::unescape_html(&content.as_bytes()[..pos]);
2738                    strings::trim(&mut tmp);
2739                    strings::unescape(&mut tmp);
2740                    if tmp.is_empty() {
2741                        ncb.info = self
2742                            .options
2743                            .parse
2744                            .default_info_string
2745                            .as_ref()
2746                            .map_or(String::new(), |s| s.clone());
2747                    } else {
2748                        ncb.info = String::from_utf8(tmp).unwrap();
2749                    }
2750
2751                    if content.as_bytes()[pos] == b'\r' {
2752                        pos += 1;
2753                    }
2754                    if content.as_bytes()[pos] == b'\n' {
2755                        pos += 1;
2756                    }
2757
2758                    content.drain(..pos);
2759                }
2760                mem::swap(&mut ncb.literal, content);
2761            }
2762            NodeValue::HtmlBlock(ref mut nhb) => {
2763                mem::swap(&mut nhb.literal, content);
2764            }
2765            NodeValue::List(ref mut nl) => {
2766                nl.tight = true;
2767                let mut ch = node.first_child();
2768
2769                while let Some(item) = ch {
2770                    if item.data.borrow().last_line_blank && item.next_sibling().is_some() {
2771                        nl.tight = false;
2772                        break;
2773                    }
2774
2775                    let mut subch = item.first_child();
2776                    while let Some(subitem) = subch {
2777                        if (item.next_sibling().is_some() || subitem.next_sibling().is_some())
2778                            && nodes::ends_with_blank_line(subitem)
2779                        {
2780                            nl.tight = false;
2781                            break;
2782                        }
2783                        subch = subitem.next_sibling();
2784                    }
2785
2786                    if !nl.tight {
2787                        break;
2788                    }
2789
2790                    ch = item.next_sibling();
2791                }
2792            }
2793            _ => (),
2794        }
2795
2796        parent
2797    }
2798
2799    fn process_inlines(&mut self) {
2800        self.process_inlines_node(self.root);
2801    }
2802
2803    fn process_inlines_node(&mut self, node: &'a AstNode<'a>) {
2804        for node in node.descendants() {
2805            if node.data.borrow().value.contains_inlines() {
2806                self.parse_inlines(node);
2807            }
2808        }
2809    }
2810
2811    fn parse_inlines(&mut self, node: &'a AstNode<'a>) {
2812        let delimiter_arena = Arena::new();
2813        let node_data = node.data.borrow();
2814        let content = strings::rtrim_slice(node_data.content.as_bytes());
2815        let mut subj = inlines::Subject::new(
2816            self.arena,
2817            self.options,
2818            content,
2819            node_data.sourcepos.start.line,
2820            &mut self.refmap,
2821            &delimiter_arena,
2822        );
2823
2824        while subj.parse_inline(node) {}
2825
2826        subj.process_emphasis(0);
2827
2828        while subj.pop_bracket() {}
2829    }
2830
2831    fn process_footnotes(&mut self) {
2832        let mut map = HashMap::new();
2833        Self::find_footnote_definitions(self.root, &mut map);
2834
2835        let mut ix = 0;
2836        Self::find_footnote_references(self.root, &mut map, &mut ix);
2837
2838        if !map.is_empty() {
2839            // In order for references to be found inside footnote definitions,
2840            // such as `[^1]: another reference[^2]`,
2841            // the node needed to remain in the AST. Now we can remove them.
2842            Self::cleanup_footnote_definitions(self.root);
2843        }
2844
2845        if ix > 0 {
2846            let mut v = map.into_values().collect::<Vec<_>>();
2847            v.sort_unstable_by(|a, b| a.ix.cmp(&b.ix));
2848            for f in v {
2849                if f.ix.is_some() {
2850                    match f.node.data.borrow_mut().value {
2851                        NodeValue::FootnoteDefinition(ref mut nfd) => {
2852                            nfd.name = f.name.to_string();
2853                            nfd.total_references = f.total_references;
2854                        }
2855                        _ => unreachable!(),
2856                    }
2857                    self.root.append(f.node);
2858                }
2859            }
2860        }
2861    }
2862
2863    fn find_footnote_definitions(
2864        node: &'a AstNode<'a>,
2865        map: &mut HashMap<String, FootnoteDefinition<'a>>,
2866    ) {
2867        match node.data.borrow().value {
2868            NodeValue::FootnoteDefinition(ref nfd) => {
2869                map.insert(
2870                    strings::normalize_label(&nfd.name, Case::Fold),
2871                    FootnoteDefinition {
2872                        ix: None,
2873                        node,
2874                        name: strings::normalize_label(&nfd.name, Case::Preserve),
2875                        total_references: 0,
2876                    },
2877                );
2878            }
2879            _ => {
2880                for n in node.children() {
2881                    Self::find_footnote_definitions(n, map);
2882                }
2883            }
2884        }
2885    }
2886
2887    fn find_footnote_references(
2888        node: &'a AstNode<'a>,
2889        map: &mut HashMap<String, FootnoteDefinition>,
2890        ixp: &mut u32,
2891    ) {
2892        let mut ast = node.data.borrow_mut();
2893        let mut replace = None;
2894        match ast.value {
2895            NodeValue::FootnoteReference(ref mut nfr) => {
2896                let normalized = strings::normalize_label(&nfr.name, Case::Fold);
2897                if let Some(ref mut footnote) = map.get_mut(&normalized) {
2898                    let ix = match footnote.ix {
2899                        Some(ix) => ix,
2900                        None => {
2901                            *ixp += 1;
2902                            footnote.ix = Some(*ixp);
2903                            *ixp
2904                        }
2905                    };
2906                    footnote.total_references += 1;
2907                    nfr.ref_num = footnote.total_references;
2908                    nfr.ix = ix;
2909                    nfr.name = strings::normalize_label(&footnote.name, Case::Preserve);
2910                } else {
2911                    replace = Some(nfr.name.clone());
2912                }
2913            }
2914            _ => {
2915                for n in node.children() {
2916                    Self::find_footnote_references(n, map, ixp);
2917                }
2918            }
2919        }
2920
2921        if let Some(mut label) = replace {
2922            label.insert_str(0, "[^");
2923            label.push(']');
2924            ast.value = NodeValue::Text(label);
2925        }
2926    }
2927
2928    fn cleanup_footnote_definitions(node: &'a AstNode<'a>) {
2929        match node.data.borrow().value {
2930            NodeValue::FootnoteDefinition(_) => {
2931                node.detach();
2932            }
2933            _ => {
2934                for n in node.children() {
2935                    Self::cleanup_footnote_definitions(n);
2936                }
2937            }
2938        }
2939    }
2940
2941    fn postprocess_text_nodes(&mut self, node: &'a AstNode<'a>) {
2942        self.postprocess_text_nodes_with_context(node, false);
2943    }
2944
2945    fn postprocess_text_nodes_with_context(
2946        &mut self,
2947        node: &'a AstNode<'a>,
2948        in_bracket_context: bool,
2949    ) {
2950        let mut stack = vec![(node, in_bracket_context)];
2951        let mut children = vec![];
2952
2953        while let Some((node, in_bracket_context)) = stack.pop() {
2954            let mut nch = node.first_child();
2955
2956            while let Some(n) = nch {
2957                let mut child_in_bracket_context = in_bracket_context;
2958                let mut emptied = false;
2959                let n_ast = &mut n.data.borrow_mut();
2960                let mut sourcepos = n_ast.sourcepos;
2961
2962                match n_ast.value {
2963                    NodeValue::Text(ref mut root) => {
2964                        // Join adjacent text nodes together, then post-process.
2965                        // Record the original list of sourcepos and bytecounts
2966                        // for the post-processing step.
2967                        let mut spxv = VecDeque::new();
2968                        spxv.push_back((sourcepos, root.len()));
2969                        while let Some(ns) = n.next_sibling() {
2970                            match ns.data.borrow().value {
2971                                NodeValue::Text(ref adj) => {
2972                                    root.push_str(adj);
2973                                    let sp = ns.data.borrow().sourcepos;
2974                                    spxv.push_back((sp, adj.len()));
2975                                    sourcepos.end.column = sp.end.column;
2976                                    ns.detach();
2977                                }
2978                                _ => break,
2979                            }
2980                        }
2981
2982                        self.postprocess_text_node_with_context(
2983                            n,
2984                            root,
2985                            &mut sourcepos,
2986                            spxv,
2987                            in_bracket_context,
2988                        );
2989                        emptied = root.is_empty();
2990                    }
2991                    NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
2992                        // Recurse into links, images, and wikilinks to join adjacent text nodes,
2993                        // but mark the context so autolinks won't be generated within them.
2994                        child_in_bracket_context = true;
2995                    }
2996                    _ => {}
2997                }
2998
2999                n_ast.sourcepos = sourcepos;
3000
3001                if !emptied {
3002                    children.push((n, child_in_bracket_context));
3003                }
3004
3005                nch = n.next_sibling();
3006
3007                if emptied {
3008                    n.detach();
3009                }
3010            }
3011
3012            // Push children onto work stack in reverse order so they are
3013            // traversed in order
3014            stack.extend(children.drain(..).rev());
3015        }
3016    }
3017
3018    fn postprocess_text_node_with_context(
3019        &mut self,
3020        node: &'a AstNode<'a>,
3021        text: &mut String,
3022        sourcepos: &mut Sourcepos,
3023        spxv: VecDeque<(Sourcepos, usize)>,
3024        in_bracket_context: bool,
3025    ) {
3026        let mut spx = Spx(spxv);
3027        if self.options.extension.tasklist {
3028            self.process_tasklist(node, text, sourcepos, &mut spx);
3029        }
3030
3031        if self.options.extension.autolink && !in_bracket_context {
3032            autolink::process_email_autolinks(
3033                self.arena,
3034                node,
3035                text,
3036                self.options.parse.relaxed_autolinks,
3037                sourcepos,
3038                &mut spx,
3039            );
3040        }
3041    }
3042
3043    fn process_tasklist(
3044        &mut self,
3045        node: &'a AstNode<'a>,
3046        text: &mut String,
3047        sourcepos: &mut Sourcepos,
3048        spx: &mut Spx,
3049    ) {
3050        let (end, symbol) = match scanners::tasklist(text.as_bytes()) {
3051            Some(p) => p,
3052            None => return,
3053        };
3054
3055        let symbol = symbol as char;
3056
3057        if !self.options.parse.relaxed_tasklist_matching && !matches!(symbol, ' ' | 'x' | 'X') {
3058            return;
3059        }
3060
3061        let parent = node.parent().unwrap();
3062        if node.previous_sibling().is_some() || parent.previous_sibling().is_some() {
3063            return;
3064        }
3065
3066        if !node_matches!(parent, NodeValue::Paragraph) {
3067            return;
3068        }
3069
3070        let grandparent = parent.parent().unwrap();
3071        if !node_matches!(grandparent, NodeValue::Item(..)) {
3072            return;
3073        }
3074
3075        let great_grandparent = grandparent.parent().unwrap();
3076        if !node_matches!(great_grandparent, NodeValue::List(..)) {
3077            return;
3078        }
3079
3080        // These are sound only because the exact text that we've matched and
3081        // the count thereof (i.e. "end") will precisely map to characters in
3082        // the source document.
3083        text.drain(..end);
3084
3085        let adjust = spx.consume(end) + 1;
3086        assert_eq!(
3087            sourcepos.start.column,
3088            parent.data.borrow().sourcepos.start.column
3089        );
3090
3091        // See tests::fuzz::echaw9. The paragraph doesn't exist in the source,
3092        // so we remove it.
3093        if sourcepos.end.column < adjust && node.next_sibling().is_none() {
3094            parent.detach();
3095        } else {
3096            sourcepos.start.column = adjust;
3097            parent.data.borrow_mut().sourcepos.start.column = adjust;
3098        }
3099
3100        grandparent.data.borrow_mut().value =
3101            NodeValue::TaskItem(if symbol == ' ' { None } else { Some(symbol) });
3102
3103        if let NodeValue::List(ref mut list) = &mut great_grandparent.data.borrow_mut().value {
3104            list.is_task_list = true;
3105        }
3106    }
3107
3108    fn parse_reference_inline(&mut self, content: &[u8]) -> Option<usize> {
3109        // In this case reference inlines rarely have delimiters
3110        // so we often just need the minimal case
3111        let delimiter_arena = Arena::with_capacity(0);
3112        let mut subj = inlines::Subject::new(
3113            self.arena,
3114            self.options,
3115            content,
3116            0, // XXX -1 in upstream; never used?
3117            &mut self.refmap,
3118            &delimiter_arena,
3119        );
3120
3121        let mut lab: String = match subj.link_label() {
3122            Some(lab) if !lab.is_empty() => lab.to_string(),
3123            _ => return None,
3124        };
3125
3126        if subj.peek_char() != Some(&(b':')) {
3127            return None;
3128        }
3129
3130        subj.pos += 1;
3131        subj.spnl();
3132        let (url, matchlen) = match inlines::manual_scan_link_url(&subj.input[subj.pos..]) {
3133            Some((url, matchlen)) => (url, matchlen),
3134            None => return None,
3135        };
3136        subj.pos += matchlen;
3137
3138        let beforetitle = subj.pos;
3139        subj.spnl();
3140        let title_search = if subj.pos == beforetitle {
3141            None
3142        } else {
3143            scanners::link_title(&subj.input[subj.pos..])
3144        };
3145        let title = match title_search {
3146            Some(matchlen) => {
3147                let t = &subj.input[subj.pos..subj.pos + matchlen];
3148                subj.pos += matchlen;
3149                t.to_vec()
3150            }
3151            _ => {
3152                subj.pos = beforetitle;
3153                vec![]
3154            }
3155        };
3156
3157        subj.skip_spaces();
3158        if !subj.skip_line_end() {
3159            if !title.is_empty() {
3160                subj.pos = beforetitle;
3161                subj.skip_spaces();
3162                if !subj.skip_line_end() {
3163                    return None;
3164                }
3165            } else {
3166                return None;
3167            }
3168        }
3169
3170        lab = strings::normalize_label(&lab, Case::Fold);
3171        if !lab.is_empty() {
3172            subj.refmap.map.entry(lab).or_insert(ResolvedReference {
3173                url: String::from_utf8(strings::clean_url(url)).unwrap(),
3174                title: String::from_utf8(strings::clean_title(&title)).unwrap(),
3175            });
3176        }
3177        Some(subj.pos)
3178    }
3179}
3180
3181enum AddTextResult {
3182    LiteralText,
3183    HtmlBlock(u8),
3184    Otherwise,
3185}
3186
3187fn parse_list_marker(
3188    line: &[u8],
3189    mut pos: usize,
3190    interrupts_paragraph: bool,
3191) -> Option<(usize, NodeList)> {
3192    let mut c = line[pos];
3193    let startpos = pos;
3194
3195    if c == b'*' || c == b'-' || c == b'+' {
3196        pos += 1;
3197        if !isspace(line[pos]) {
3198            return None;
3199        }
3200
3201        if interrupts_paragraph {
3202            let mut i = pos;
3203            while strings::is_space_or_tab(line[i]) {
3204                i += 1;
3205            }
3206            if line[i] == b'\n' {
3207                return None;
3208            }
3209        }
3210
3211        return Some((
3212            pos - startpos,
3213            NodeList {
3214                list_type: ListType::Bullet,
3215                marker_offset: 0,
3216                padding: 0,
3217                start: 1,
3218                delimiter: ListDelimType::Period,
3219                bullet_char: c,
3220                tight: false,
3221                is_task_list: false,
3222            },
3223        ));
3224    } else if isdigit(c) {
3225        let mut start: usize = 0;
3226        let mut digits = 0;
3227
3228        loop {
3229            start = (10 * start) + (line[pos] - b'0') as usize;
3230            pos += 1;
3231            digits += 1;
3232
3233            if !(digits < 9 && isdigit(line[pos])) {
3234                break;
3235            }
3236        }
3237
3238        if interrupts_paragraph && start != 1 {
3239            return None;
3240        }
3241
3242        c = line[pos];
3243        if c != b'.' && c != b')' {
3244            return None;
3245        }
3246
3247        pos += 1;
3248
3249        if !isspace(line[pos]) {
3250            return None;
3251        }
3252
3253        if interrupts_paragraph {
3254            let mut i = pos;
3255            while strings::is_space_or_tab(line[i]) {
3256                i += 1;
3257            }
3258            if strings::is_line_end_char(line[i]) {
3259                return None;
3260            }
3261        }
3262
3263        return Some((
3264            pos - startpos,
3265            NodeList {
3266                list_type: ListType::Ordered,
3267                marker_offset: 0,
3268                padding: 0,
3269                start,
3270                delimiter: if c == b'.' {
3271                    ListDelimType::Period
3272                } else {
3273                    ListDelimType::Paren
3274                },
3275                bullet_char: 0,
3276                tight: false,
3277                is_task_list: false,
3278            },
3279        ));
3280    }
3281
3282    None
3283}
3284
3285pub fn unwrap_into<T>(t: Option<T>, out: &mut T) -> bool {
3286    match t {
3287        Some(v) => {
3288            *out = v;
3289            true
3290        }
3291        _ => false,
3292    }
3293}
3294
3295pub fn unwrap_into_copy<T: Copy>(t: Option<&T>, out: &mut T) -> bool {
3296    match t {
3297        Some(v) => {
3298            *out = *v;
3299            true
3300        }
3301        _ => false,
3302    }
3303}
3304
3305fn unwrap_into_2<T, U>(tu: Option<(T, U)>, out_t: &mut T, out_u: &mut U) -> bool {
3306    match tu {
3307        Some((t, u)) => {
3308            *out_t = t;
3309            *out_u = u;
3310            true
3311        }
3312        _ => false,
3313    }
3314}
3315
3316fn lists_match(list_data: &NodeList, item_data: &NodeList) -> bool {
3317    list_data.list_type == item_data.list_type
3318        && list_data.delimiter == item_data.delimiter
3319        && list_data.bullet_char == item_data.bullet_char
3320}
3321
3322fn reopen_ast_nodes<'a>(mut ast: &'a AstNode<'a>) {
3323    loop {
3324        ast.data.borrow_mut().open = true;
3325        ast = match ast.parent() {
3326            Some(p) => p,
3327            None => return,
3328        }
3329    }
3330}
3331
3332#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3333pub enum AutolinkType {
3334    Uri,
3335    Email,
3336}
3337
3338#[derive(Debug, Clone, Copy, Default)]
3339#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
3340/// Options for bulleted list redering in markdown. See `link_style` in [`RenderOptions`] for more details.
3341pub enum ListStyleType {
3342    /// The `-` character
3343    #[default]
3344    Dash = 45,
3345    /// The `+` character
3346    Plus = 43,
3347    /// The `*` character
3348    Star = 42,
3349}
3350
3351pub(crate) struct Spx(VecDeque<(Sourcepos, usize)>);
3352
3353impl Spx {
3354    // Sourcepos end column `e` of a node determined by advancing through `spx`
3355    // until `i` bytes of input are seen.
3356    //
3357    // For each element `(sp, x)` in `spx`:
3358    // - if remaining `i` is greater than the byte count `x`,
3359    //     set `i -= x` and continue.
3360    // - if remaining `i` is equal to the byte count `x`,
3361    //     set `e = sp.end.column` and finish.
3362    // - if remaining `i` is less than the byte count `x`,
3363    //     assert `sp.end.column - sp.start.column + 1 == x || i == 0` (1),
3364    //     set `e = sp.start.column + i - 1` and finish.
3365    //
3366    // (1) If `x` doesn't equal the range covered between the start and end column,
3367    //     there's no way to determine sourcepos within the range. This is a bug if
3368    //     it happens; it suggests we've matched an email autolink with some smart
3369    //     punctuation in it, or worse.
3370    //
3371    //     The one exception is if `i == 0`. Given nothing to consume, we can
3372    //     happily restore what we popped, returning `sp.start.column - 1` for the
3373    //     end column of the original node.
3374    pub(crate) fn consume(&mut self, mut rem: usize) -> usize {
3375        while let Some((sp, x)) = self.0.pop_front() {
3376            match rem.cmp(&x) {
3377                Ordering::Greater => rem -= x,
3378                Ordering::Equal => return sp.end.column,
3379                Ordering::Less => {
3380                    assert!((sp.end.column - sp.start.column + 1 == x) || rem == 0);
3381                    self.0.push_front((
3382                        (
3383                            sp.start.line,
3384                            sp.start.column + rem,
3385                            sp.end.line,
3386                            sp.end.column,
3387                        )
3388                            .into(),
3389                        x - rem,
3390                    ));
3391                    return sp.start.column + rem - 1;
3392                }
3393            }
3394        }
3395        unreachable!();
3396    }
3397}