1use crate::{Arena, Document, RopeSliceExt, Section, SectionData};
2
3use ropey::{Rope, RopeSlice};
4
5pub fn lex_level(line: &RopeSlice) -> u16 {
6 headline_level(line, 0)
7}
8
9pub fn lex_level_str(line: &str) -> u16 {
10 headline_level_str(line, 0)
11}
12
13pub fn headline_level(input: &RopeSlice, offset: usize) -> u16 {
20 for (i, c) in input.bytes_at(offset).enumerate() {
21 match c {
22 b'*' => {}
23 b' ' if i > 0 => return i as u16,
24 _ => return 0,
25 }
26 }
27 0
28}
29
30pub fn headline_level_str(input: &str, offset: usize) -> u16 {
31 for (i, c) in input[offset..].as_bytes().iter().enumerate() {
32 match c {
33 b'*' => {}
34 b' ' if i > 0 => return i as u16,
35 _ => return 0,
36 }
37 }
38 0
39}
40
41pub fn line<'a>(input: &'a RopeSlice<'a>) -> (RopeSlice<'a>, RopeSlice<'a>) {
45 let split = next_line(input, 0);
46 (input.slice_bytes(..split), input.slice_bytes(split..))
47}
48
49pub fn consuming_line<'a>(input: &'a RopeSlice<'a>) -> (RopeSlice<'a>, RopeSlice<'a>) {
52 let split = next_line(input, 0);
53 let line = input.slice_bytes(..split);
54 let rest = input.slice_bytes(split..);
55 match rest.get_char(0) {
56 Some('\n') => (line, rest.slice(1..)),
57 _ => (line, rest),
58 }
59}
60
61pub fn next_line(input: &RopeSlice, offset: usize) -> usize {
63 input.memchr(b'\n', offset)
64}
65
66pub(crate) fn parse_document(arena: &mut Arena, input: &RopeSlice) -> Document {
67 let mut offset = 0;
68
69 if input.is_empty() {
71 let root_id = arena.arena.new_node(SectionData {
72 level: 0,
73 text: Rope::default(),
74 });
75
76 return Document {
77 root: Section { id: root_id },
78 terminal_newline: false,
79 empty_root_section: true,
80 };
81 }
82
83 let (new_offset, end) = parse_section(input, offset);
84 let empty_root_section = new_offset == end && offset == end;
85 let root_id = arena.arena.new_node(SectionData {
86 level: 0,
87 text: Rope::from(input.slice_bytes(offset..end)),
88 });
89 offset = new_offset;
90
91 let mut stack = vec![root_id];
92
93 let mut level = headline_level(input, offset);
94 while level > 0 {
95 let (new_offset, end) = parse_section(input, next_line(input, offset));
96 let section = SectionData {
97 text: Rope::from(input.slice_bytes(offset..end)),
98 level,
99 };
100 offset = new_offset;
101
102 while level
103 <= arena.arena[*stack.last().expect("stack never empty")]
104 .get()
105 .level
106 {
107 stack.pop().expect("stack never empty");
108 }
109
110 let node_id = arena.arena.new_node(section);
111
112 stack
113 .last()
114 .expect("stack never empty")
115 .append(node_id, &mut arena.arena);
116
117 stack.push(node_id);
118
119 level = headline_level(input, offset);
120 }
121
122 assert_eq!(input.len_bytes(), offset);
123
124 let re = regex::Regex::new("(^|.*\n)\\*\\** .*").expect("failed to assemble headline regex");
126
127 fn checker(re: ®ex::Regex, node: Section, arena: &Arena, input: &RopeSlice) {
129 let level = node.level(&arena);
130 let text = node.text(&arena);
131 let lexed_level = lex_level(&text.slice(..));
132 if lexed_level != level
133 || text.len_bytes() >= level as usize
134 && re.is_match(&text.to_contiguous()[(level as usize)..])
135 {
136 panic!("Check failed");
141 }
142 assert_eq!(lexed_level, level);
143 for node in node.children(&arena) {
144 checker(re, node, arena, input);
145 }
146 }
147
148 checker(&re, Section { id: root_id }, &arena, input);
150
151 Document {
152 root: Section { id: root_id },
153 terminal_newline: input.bytes().last() == Some(b'\n'),
154 empty_root_section,
155 }
156}
157
158fn parse_section(input: &RopeSlice, offset: usize) -> (usize, usize) {
162 let mut last = offset;
164 while last < input.len_bytes() {
165 let i = input.memchr(b'\n', last);
166 if i >= input.len_bytes() || input.byte(last) == b'*' && headline_level(input, last) != 0 {
168 break;
169 }
170 last = i + 1;
171 }
172
173 let last = if last < input.len_bytes() && headline_level(input, last) == 0 {
174 input.len_bytes()
175 } else {
176 last
177 };
178 if last > offset && last <= input.len_bytes() && input.byte(last - 1) == b'\n' {
179 (last, last - 1)
180 } else {
181 (last, last)
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 fn next_line(s: &[u8], offset: usize) -> usize {
190 let r = Rope::from(std::str::from_utf8(s).unwrap());
191 let r = r.slice(..);
192 crate::parser::structure::next_line(&r, offset)
193 }
194
195 fn parse_section(s: &[u8], offset: usize) -> (usize, usize) {
196 let r = Rope::from(std::str::from_utf8(s).unwrap());
197 let r = r.slice(..);
198 crate::parser::structure::parse_section(&r, offset)
199 }
200
201 fn headline_level(s: &[u8], offset: usize) -> u16 {
202 let r = Rope::from(std::str::from_utf8(s).unwrap());
203 let r = r.slice(..);
204 crate::parser::structure::headline_level(&r, offset)
205 }
206
207 #[test]
208 fn test_line() {
209 let empty = Rope::default();
210 assert_eq!((empty.slice(..), empty.slice(..)), line(&empty.slice(..)));
211
212 let newline = Rope::from("\n");
213 assert_eq!(
214 (empty.slice(..), newline.slice(..)),
215 line(&newline.slice(..))
216 );
217
218 let term = Rope::from("* Hello\n");
219 assert_eq!(
220 (term.slice(..term.len_chars() - 1), newline.slice(..)),
221 line(&term.slice(..))
222 );
223
224 let multi = Rope::from("* Hello\nWorld");
225 assert_eq!(
226 (
227 Rope::from("* Hello").slice(..),
228 Rope::from("\nWorld").slice(..)
229 ),
230 line(&multi.slice(..))
231 );
232 }
233
234 #[test]
235 fn test_consuming_line() {
236 let empty = Rope::default();
237 assert_eq!(
238 (empty.slice(..), empty.slice(..)),
239 consuming_line(&empty.slice(..))
240 );
241
242 let newline = Rope::from("\n");
243 assert_eq!(
244 (empty.slice(..), empty.slice(..)),
245 consuming_line(&newline.slice(..))
246 );
247
248 let term = Rope::from("* Hello\n");
249 assert_eq!(
250 (term.slice(..term.len_chars() - 1), empty.slice(..)),
251 consuming_line(&term.slice(..))
252 );
253
254 let multi = Rope::from("* Hello\nWorld");
255 assert_eq!(
256 (
257 Rope::from("* Hello").slice(..),
258 Rope::from("World").slice(..)
259 ),
260 consuming_line(&multi.slice(..))
261 );
262
263 let many = Rope::from("* Hello\n\nWorld");
264 assert_eq!(
265 (
266 Rope::from("* Hello").slice(..),
267 Rope::from("\nWorld").slice(..)
268 ),
269 consuming_line(&many.slice(..))
270 );
271 }
272
273 #[test]
274 fn test_next_line() {
275 assert_eq!(0, next_line(b"", 0));
276 assert_eq!(1, next_line(b" ", 0));
277 assert_eq!(1, next_line(b" ", 1));
278 assert_eq!(0, next_line(b"\n", 0));
279 assert_eq!(1, next_line(b"\n", 1));
280 assert_eq!(1, next_line(b" \n", 0));
281 assert_eq!(1, next_line(b" \n", 1));
282 assert_eq!(0, next_line(b"\n ", 0));
283 assert_eq!(2, next_line(b"\n ", 1));
284 assert_eq!(0, next_line(b"\ntest\n", 0));
285 assert_eq!(5, next_line(b"\ntest\n", 1));
286 assert_eq!(0, next_line(b"\n\na\n", 0));
287 assert_eq!(1, next_line(b"\n\na\n", 1));
288 assert_eq!(3, next_line(b"\n\na\n", 2));
289 assert_eq!(3, next_line(b"\n\na\n", 3));
290 }
291
292 #[test]
293 fn test_parse_section() {
294 assert_eq!((0, 0), parse_section(b"", 0));
295 assert_eq!((1, 1), parse_section(b"*", 0));
296 assert_eq!((1, 1), parse_section(b"*", 1));
297 assert_eq!((0, 0), parse_section(b"* ", 0));
298 assert_eq!((2, 2), parse_section(b"* ", 1));
299 assert_eq!((2, 2), parse_section(b"* ", 2));
300 assert_eq!((1, 0), parse_section(b"\n", 0));
301 assert_eq!((1, 1), parse_section(b"\n", 1));
302 assert_eq!((0, 0), parse_section(b"* \n", 0));
303 assert_eq!((3, 2), parse_section(b"* \n", 1));
304 assert_eq!((3, 2), parse_section(b"* \n", 2));
305 assert_eq!((1, 0), parse_section(b"\n*** \n", 0));
306 assert_eq!((1, 1), parse_section(b"\n*** \n", 1));
307 assert_eq!((2, 2), parse_section(b"\n*** \n", 2));
308 assert_eq!((3, 3), parse_section(b"\n*** \n", 3));
309 assert_eq!((6, 5), parse_section(b"\n*** \n", 4));
310 assert_eq!((3, 2), parse_section(b"Hi\n*** \n", 0));
311 }
312
313 #[test]
314 fn test_headline_level() {
315 assert_eq!(0, headline_level(b"", 0));
316
317 assert_eq!(0, headline_level(b" ", 0));
318 assert_eq!(0, headline_level(b"*", 0));
319 assert_eq!(0, headline_level(b"a", 0));
320
321 assert_eq!(0, headline_level(b" ", 0));
322 assert_eq!(1, headline_level(b"* ", 0));
323 assert_eq!(0, headline_level(b"a ", 0));
324 assert_eq!(0, headline_level(b" *", 0));
325 assert_eq!(0, headline_level(b"**", 0));
326 assert_eq!(0, headline_level(b"a*", 0));
327 assert_eq!(0, headline_level(b" a", 0));
328 assert_eq!(0, headline_level(b"*a", 0));
329 assert_eq!(0, headline_level(b"aa", 0));
330
331 assert_eq!(0, headline_level(b" ", 0));
332 assert_eq!(1, headline_level(b"* ", 0));
333 assert_eq!(0, headline_level(b"a ", 0));
334 assert_eq!(0, headline_level(b" * ", 0));
335 assert_eq!(2, headline_level(b"** ", 0));
336 assert_eq!(0, headline_level(b"a* ", 0));
337 assert_eq!(0, headline_level(b" a ", 0));
338 assert_eq!(0, headline_level(b"*a ", 0));
339 assert_eq!(0, headline_level(b"aa ", 0));
340
341 assert_eq!(0, headline_level(b" *", 0));
342 assert_eq!(1, headline_level(b"* *", 0));
343 assert_eq!(0, headline_level(b"a *", 0));
344 assert_eq!(0, headline_level(b" **", 0));
345 assert_eq!(0, headline_level(b"***", 0));
346 assert_eq!(0, headline_level(b"a**", 0));
347 assert_eq!(0, headline_level(b" a*", 0));
348 assert_eq!(0, headline_level(b"*a*", 0));
349 assert_eq!(0, headline_level(b"aa*", 0));
350
351 assert_eq!(0, headline_level(b" a", 0));
352 assert_eq!(1, headline_level(b"* a", 0));
353 assert_eq!(0, headline_level(b"a a", 0));
354 assert_eq!(0, headline_level(b" *a", 0));
355 assert_eq!(0, headline_level(b"**a", 0));
356 assert_eq!(0, headline_level(b"a*a", 0));
357 assert_eq!(0, headline_level(b" aa", 0));
358 assert_eq!(0, headline_level(b"*aa", 0));
359 assert_eq!(0, headline_level(b"aaa", 0));
360
361 assert_eq!(0, headline_level(b"***", 0));
362 assert_eq!(3, headline_level(b"*** ", 0));
363 assert_eq!(3, headline_level(b"*** ", 0));
364 assert_eq!(0, headline_level(b"***a", 0));
365 assert_eq!(3, headline_level(b"*** a", 0));
366 assert_eq!(3, headline_level(b"*** aaaaa", 0));
367 }
368}