markdown/construct/code_indented.rs
1//! Code (indented) occurs in the [flow][] content type.
2//!
3//! ## Grammar
4//!
5//! Code (indented) forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! code_indented ::= filled_line *( eol *( blank_line eol ) filled_line )
10//!
11//! ; Restriction: at least one `line` byte must be `text`.
12//! filled_line ::= 4(space_or_tab) *line
13//! blank_line ::= *space_or_tab
14//! ```
15//!
16//! As this construct occurs in flow, like all flow constructs, it must be
17//! followed by an eol (line ending) or eof (end of file).
18//!
19//! In markdown, it is also possible to use [code (text)][raw_text] in the
20//! [text][] content type.
21//! It is also possible to create code with the [code (fenced)][raw_flow]
22//! construct.
23//!
24//! ## HTML
25//!
26//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
27//! HTML.
28//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
29//! element*][html_code] in the HTML spec for more info.
30//!
31//! ## Recommendation
32//!
33//! It is recommended to use code (fenced) instead of code (indented).
34//! Code (fenced) is more explicit, similar to code (text), and has support
35//! for specifying the programming language.
36//!
37//! ## Tokens
38//!
39//! * [`CodeIndented`][Name::CodeIndented]
40//! * [`CodeFlowChunk`][Name::CodeFlowChunk]
41//! * [`LineEnding`][Name::LineEnding]
42//! * [`SpaceOrTab`][Name::SpaceOrTab]
43//!
44//! ## References
45//!
46//! * [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
47//! * [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.31/#indented-code-blocks)
48//!
49//! [flow]: crate::construct::flow
50//! [text]: crate::construct::text
51//! [raw_flow]: crate::construct::raw_flow
52//! [raw_text]: crate::construct::raw_text
53//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
54//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
55
56use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
57use crate::event::Name;
58use crate::state::{Name as StateName, State};
59use crate::tokenizer::Tokenizer;
60use crate::util::constant::TAB_SIZE;
61
62/// Start of code (indented).
63///
64/// > **Parsing note**: it is not needed to check if this first line is a
65/// > filled line (that it has a non-whitespace character), because blank lines
66/// > are parsed already, so we never run into that.
67///
68/// ```markdown
69/// > | aaa
70/// ^
71/// ```
72pub fn start(tokenizer: &mut Tokenizer) -> State {
73 // Do not interrupt paragraphs.
74 if !tokenizer.interrupt
75 && tokenizer.parse_state.options.constructs.code_indented
76 && matches!(tokenizer.current, Some(b'\t' | b' '))
77 {
78 tokenizer.enter(Name::CodeIndented);
79 tokenizer.attempt(State::Next(StateName::CodeIndentedAtBreak), State::Nok);
80 State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
81 } else {
82 State::Nok
83 }
84}
85
86/// At a break.
87///
88/// ```markdown
89/// > | aaa
90/// ^ ^
91/// ```
92pub fn at_break(tokenizer: &mut Tokenizer) -> State {
93 match tokenizer.current {
94 None => State::Retry(StateName::CodeIndentedAfter),
95 Some(b'\n') => {
96 tokenizer.attempt(
97 State::Next(StateName::CodeIndentedAtBreak),
98 State::Next(StateName::CodeIndentedAfter),
99 );
100 State::Retry(StateName::CodeIndentedFurtherStart)
101 }
102 _ => {
103 tokenizer.enter(Name::CodeFlowChunk);
104 State::Retry(StateName::CodeIndentedInside)
105 }
106 }
107}
108
109/// In code content.
110///
111/// ```markdown
112/// > | aaa
113/// ^^^^
114/// ```
115pub fn inside(tokenizer: &mut Tokenizer) -> State {
116 match tokenizer.current {
117 None | Some(b'\n') => {
118 tokenizer.exit(Name::CodeFlowChunk);
119 State::Retry(StateName::CodeIndentedAtBreak)
120 }
121 _ => {
122 tokenizer.consume();
123 State::Next(StateName::CodeIndentedInside)
124 }
125 }
126}
127
128/// After indented code.
129///
130/// ```markdown
131/// > | aaa
132/// ^
133/// ```
134pub fn after(tokenizer: &mut Tokenizer) -> State {
135 tokenizer.exit(Name::CodeIndented);
136 // Feel free to interrupt.
137 tokenizer.interrupt = false;
138 State::Ok
139}
140
141/// At eol, trying to parse another indent.
142///
143/// ```markdown
144/// > | aaa
145/// ^
146/// | bbb
147/// ```
148pub fn further_start(tokenizer: &mut Tokenizer) -> State {
149 if tokenizer.lazy || tokenizer.pierce {
150 return State::Nok;
151 }
152
153 if tokenizer.current == Some(b'\n') {
154 tokenizer.enter(Name::LineEnding);
155 tokenizer.consume();
156 tokenizer.exit(Name::LineEnding);
157 State::Next(StateName::CodeIndentedFurtherStart)
158 } else {
159 tokenizer.attempt(State::Ok, State::Next(StateName::CodeIndentedFurtherBegin));
160 State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
161 }
162}
163
164/// At the beginning of a line that is not indented enough.
165///
166/// ```markdown
167/// | aaa
168/// > | bbb
169/// ^
170/// ```
171pub fn further_begin(tokenizer: &mut Tokenizer) -> State {
172 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
173 tokenizer.attempt(State::Next(StateName::CodeIndentedFurtherAfter), State::Nok);
174 State::Retry(space_or_tab(tokenizer))
175 } else {
176 State::Nok
177 }
178}
179
180/// After whitespace, not indented enough.
181///
182/// ```markdown
183/// | aaa
184/// > | bbb
185/// ^
186/// ```
187pub fn further_after(tokenizer: &mut Tokenizer) -> State {
188 match tokenizer.current {
189 Some(b'\n') => State::Retry(StateName::CodeIndentedFurtherStart),
190 _ => State::Nok,
191 }
192}