markdown/construct/
code_indented.rs

1//! Code (indented) occurs in the [flow][] content type.
2//!
3//! ## Grammar
4//!
5//! Code (indented) forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! code_indented ::= filled_line *( eol *( blank_line eol ) filled_line )
10//!
11//! ; Restriction: at least one `line` byte must be `text`.
12//! filled_line ::= 4(space_or_tab) *line
13//! blank_line ::= *space_or_tab
14//! ```
15//!
16//! As this construct occurs in flow, like all flow constructs, it must be
17//! followed by an eol (line ending) or eof (end of file).
18//!
19//! In markdown, it is also possible to use [code (text)][raw_text] in the
20//! [text][] content type.
21//! It is also possible to create code with the [code (fenced)][raw_flow]
22//! construct.
23//!
24//! ## HTML
25//!
26//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
27//! HTML.
28//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
29//! element*][html_code] in the HTML spec for more info.
30//!
31//! ## Recommendation
32//!
33//! It is recommended to use code (fenced) instead of code (indented).
34//! Code (fenced) is more explicit, similar to code (text), and has support
35//! for specifying the programming language.
36//!
37//! ## Tokens
38//!
39//! * [`CodeIndented`][Name::CodeIndented]
40//! * [`CodeFlowChunk`][Name::CodeFlowChunk]
41//! * [`LineEnding`][Name::LineEnding]
42//! * [`SpaceOrTab`][Name::SpaceOrTab]
43//!
44//! ## References
45//!
46//! * [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
47//! * [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.31/#indented-code-blocks)
48//!
49//! [flow]: crate::construct::flow
50//! [text]: crate::construct::text
51//! [raw_flow]: crate::construct::raw_flow
52//! [raw_text]: crate::construct::raw_text
53//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
54//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
55
56use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
57use crate::event::Name;
58use crate::state::{Name as StateName, State};
59use crate::tokenizer::Tokenizer;
60use crate::util::constant::TAB_SIZE;
61
62/// Start of code (indented).
63///
64/// > **Parsing note**: it is not needed to check if this first line is a
65/// > filled line (that it has a non-whitespace character), because blank lines
66/// > are parsed already, so we never run into that.
67///
68/// ```markdown
69/// > |     aaa
70///     ^
71/// ```
72pub fn start(tokenizer: &mut Tokenizer) -> State {
73    // Do not interrupt paragraphs.
74    if !tokenizer.interrupt
75        && tokenizer.parse_state.options.constructs.code_indented
76        && matches!(tokenizer.current, Some(b'\t' | b' '))
77    {
78        tokenizer.enter(Name::CodeIndented);
79        tokenizer.attempt(State::Next(StateName::CodeIndentedAtBreak), State::Nok);
80        State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
81    } else {
82        State::Nok
83    }
84}
85
86/// At a break.
87///
88/// ```markdown
89/// > |     aaa
90///         ^  ^
91/// ```
92pub fn at_break(tokenizer: &mut Tokenizer) -> State {
93    match tokenizer.current {
94        None => State::Retry(StateName::CodeIndentedAfter),
95        Some(b'\n') => {
96            tokenizer.attempt(
97                State::Next(StateName::CodeIndentedAtBreak),
98                State::Next(StateName::CodeIndentedAfter),
99            );
100            State::Retry(StateName::CodeIndentedFurtherStart)
101        }
102        _ => {
103            tokenizer.enter(Name::CodeFlowChunk);
104            State::Retry(StateName::CodeIndentedInside)
105        }
106    }
107}
108
109/// In code content.
110///
111/// ```markdown
112/// > |     aaa
113///         ^^^^
114/// ```
115pub fn inside(tokenizer: &mut Tokenizer) -> State {
116    match tokenizer.current {
117        None | Some(b'\n') => {
118            tokenizer.exit(Name::CodeFlowChunk);
119            State::Retry(StateName::CodeIndentedAtBreak)
120        }
121        _ => {
122            tokenizer.consume();
123            State::Next(StateName::CodeIndentedInside)
124        }
125    }
126}
127
128/// After indented code.
129///
130/// ```markdown
131/// > |     aaa
132///            ^
133/// ```
134pub fn after(tokenizer: &mut Tokenizer) -> State {
135    tokenizer.exit(Name::CodeIndented);
136    // Feel free to interrupt.
137    tokenizer.interrupt = false;
138    State::Ok
139}
140
141/// At eol, trying to parse another indent.
142///
143/// ```markdown
144/// > |     aaa
145///            ^
146///   |     bbb
147/// ```
148pub fn further_start(tokenizer: &mut Tokenizer) -> State {
149    if tokenizer.lazy || tokenizer.pierce {
150        return State::Nok;
151    }
152
153    if tokenizer.current == Some(b'\n') {
154        tokenizer.enter(Name::LineEnding);
155        tokenizer.consume();
156        tokenizer.exit(Name::LineEnding);
157        State::Next(StateName::CodeIndentedFurtherStart)
158    } else {
159        tokenizer.attempt(State::Ok, State::Next(StateName::CodeIndentedFurtherBegin));
160        State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
161    }
162}
163
164/// At the beginning of a line that is not indented enough.
165///
166/// ```markdown
167///   |     aaa
168/// > |   bbb
169///     ^
170/// ```
171pub fn further_begin(tokenizer: &mut Tokenizer) -> State {
172    if matches!(tokenizer.current, Some(b'\t' | b' ')) {
173        tokenizer.attempt(State::Next(StateName::CodeIndentedFurtherAfter), State::Nok);
174        State::Retry(space_or_tab(tokenizer))
175    } else {
176        State::Nok
177    }
178}
179
180/// After whitespace, not indented enough.
181///
182/// ```markdown
183///   |     aaa
184/// > |   bbb
185///       ^
186/// ```
187pub fn further_after(tokenizer: &mut Tokenizer) -> State {
188    match tokenizer.current {
189        Some(b'\n') => State::Retry(StateName::CodeIndentedFurtherStart),
190        _ => State::Nok,
191    }
192}