Skip to main content

oak_markdown/lexer/
block.rs

1use crate::lexer::{MarkdownLexer, State, token_type::MarkdownTokenType};
2use oak_core::Source;
3
4impl<'config> MarkdownLexer<'config> {
5    /// Handles headings.
6    pub fn lex_heading<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
7        let start_pos = state.get_position();
8
9        if start_pos > 0 {
10            if let Some(prev_char) = state.source().get_char_at(start_pos - 1) {
11                if prev_char != '\n' && prev_char != '\r' {
12                    return false;
13                }
14            }
15        }
16
17        if let Some('#') = state.peek() {
18            let mut level = 0;
19            let mut pos = start_pos;
20
21            while let Some('#') = state.source().get_char_at(pos) {
22                level += 1;
23                pos += 1;
24                if level > 6 {
25                    return false;
26                }
27            }
28
29            if let Some(ch) = state.source().get_char_at(pos) {
30                if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' {
31                    return false;
32                }
33            }
34
35            state.advance(level);
36
37            let heading_kind = match level {
38                1 => MarkdownTokenType::Heading1,
39                2 => MarkdownTokenType::Heading2,
40                3 => MarkdownTokenType::Heading3,
41                4 => MarkdownTokenType::Heading4,
42                5 => MarkdownTokenType::Heading5,
43                6 => MarkdownTokenType::Heading6,
44                _ => return false,
45            };
46
47            state.add_token(heading_kind, start_pos, state.get_position());
48            true
49        }
50        else {
51            false
52        }
53    }
54
55    /// Handles code blocks.
56    pub fn lex_code_block<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
57        let start_pos = state.get_position();
58
59        if start_pos > 0 {
60            if let Some(prev_char) = state.source().get_char_at(start_pos - 1) {
61                if prev_char != '\n' && prev_char != '\r' {
62                    return false;
63                }
64            }
65        }
66
67        let fence_char = if let Some('`') = state.peek() {
68            '`'
69        }
70        else if let Some('~') = state.peek() {
71            '~'
72        }
73        else {
74            return false;
75        };
76
77        let mut fence_count = 0;
78        let mut pos = start_pos;
79
80        while let Some(ch) = state.source().get_char_at(pos) {
81            if ch == fence_char {
82                fence_count += 1;
83                pos += 1;
84            }
85            else {
86                break;
87            }
88        }
89
90        if fence_count < 3 {
91            return false;
92        }
93
94        state.advance(fence_count);
95        state.add_token(MarkdownTokenType::CodeFence, start_pos, state.get_position());
96
97        let lang_start = state.get_position();
98        while let Some(ch) = state.peek() {
99            if ch == '\n' || ch == '\r' {
100                break;
101            }
102            else if ch != ' ' && ch != '\t' {
103                state.advance(ch.len_utf8());
104            }
105            else {
106                break;
107            }
108        }
109
110        if state.get_position() > lang_start {
111            state.add_token(MarkdownTokenType::CodeLanguage, lang_start, state.get_position());
112        }
113
114        true
115    }
116
117    /// Lexes blockquotes.
118    pub fn lex_blockquote<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
119        let start_pos = state.get_position();
120
121        let mut check_pos = start_pos;
122        while check_pos > 0 {
123            check_pos -= 1;
124            if let Some(ch) = state.source().get_char_at(check_pos) {
125                if ch == '\n' || ch == '\r' {
126                    break;
127                }
128                else if ch != ' ' && ch != '\t' {
129                    return false;
130                }
131            }
132        }
133
134        if let Some('>') = state.peek() {
135            state.advance(1);
136            state.add_token(MarkdownTokenType::BlockquoteMarker, start_pos, state.get_position());
137            true
138        }
139        else {
140            false
141        }
142    }
143
144    /// Lexes horizontal rules.
145    pub fn lex_horizontal_rule<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
146        let start_pos = state.get_position();
147
148        let mut check_pos = start_pos;
149        while check_pos > 0 {
150            check_pos -= 1;
151            if let Some(ch) = state.source().get_char_at(check_pos) {
152                if ch == '\n' || ch == '\r' {
153                    break;
154                }
155                else if ch != ' ' && ch != '\t' {
156                    return false;
157                }
158            }
159        }
160
161        if let Some(ch) = state.peek() {
162            if ch == '-' || ch == '*' || ch == '_' {
163                let rule_char = ch;
164                let mut count = 0;
165                let mut pos = start_pos;
166
167                while let Some(current_ch) = state.source().get_char_at(pos) {
168                    if current_ch == rule_char {
169                        count += 1;
170                        pos += 1
171                    }
172                    else if current_ch == ' ' || current_ch == '\t' {
173                        pos += 1;
174                    }
175                    else {
176                        break;
177                    }
178                }
179
180                if count >= 3 {
181                    while let Some(current_ch) = state.source().get_char_at(pos) {
182                        if current_ch == '\n' || current_ch == '\r' {
183                            break;
184                        }
185                        else if current_ch == ' ' || current_ch == '\t' {
186                            pos += 1
187                        }
188                        else {
189                            return false;
190                        }
191                    }
192
193                    state.set_position(pos);
194                    state.add_token(MarkdownTokenType::HorizontalRule, start_pos, state.get_position());
195                    return true;
196                }
197            }
198        }
199        false
200    }
201
202    /// Lexes front matter.
203    pub fn lex_front_matter<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
204        let start_pos = state.get_position();
205
206        if start_pos != 0 {
207            return false;
208        }
209
210        if state.peek() == Some('-') && state.source().get_char_at(1) == Some('-') && state.source().get_char_at(2) == Some('-') {
211            state.advance(3);
212            let mut found_end = false;
213            while state.not_at_end() {
214                if state.peek() == Some('\n') || state.peek() == Some('\r') {
215                    state.advance(1);
216                    if state.peek() == Some('\n') {
217                        state.advance(1)
218                    }
219                    if state.peek() == Some('-') && state.source().get_char_at(state.get_position() + 1) == Some('-') && state.source().get_char_at(state.get_position() + 2) == Some('-') {
220                        state.advance(3);
221                        found_end = true;
222                        break;
223                    }
224                }
225                else {
226                    state.advance(1)
227                }
228            }
229
230            if found_end {
231                state.add_token(MarkdownTokenType::FrontMatter, start_pos, state.get_position());
232                true
233            }
234            else {
235                state.set_position(start_pos);
236                false
237            }
238        }
239        else {
240            false
241        }
242    }
243
244    /// Handles indented code blocks.
245    pub fn lex_indented_code_block<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
246        let start_pos = state.get_position();
247
248        if start_pos > 0 {
249            if let Some(prev_char) = state.source().get_char_at(start_pos - 1) {
250                if prev_char != '\n' && prev_char != '\r' {
251                    return false;
252                }
253            }
254        }
255
256        let mut indent_count = 0;
257        let mut pos = start_pos;
258        while let Some(ch) = state.source().get_char_at(pos) {
259            if ch == ' ' {
260                indent_count += 1;
261                pos += 1;
262                if indent_count == 4 {
263                    break;
264                }
265            }
266            else if ch == '\t' {
267                indent_count = 4;
268                pos += 1;
269                break;
270            }
271            else {
272                break;
273            }
274        }
275
276        if indent_count >= 4 {
277            state.set_position(pos);
278            state.add_token(MarkdownTokenType::CodeBlock, start_pos, state.get_position());
279            true
280        }
281        else {
282            false
283        }
284    }
285}