use crate::{Arena, Document, RopeSliceExt, Section, SectionData};
use ropey::{Rope, RopeSlice};
pub fn lex_level(line: &RopeSlice) -> u16 {
headline_level(line, 0)
}
pub fn lex_level_str(line: &str) -> u16 {
headline_level_str(line, 0)
}
pub fn headline_level(input: &RopeSlice, offset: usize) -> u16 {
for (i, c) in input.bytes_at(offset).enumerate() {
match c {
b'*' => {}
b' ' if i > 0 => return i as u16,
_ => return 0,
}
}
0
}
pub fn headline_level_str(input: &str, offset: usize) -> u16 {
for (i, c) in input[offset..].as_bytes().iter().enumerate() {
match c {
b'*' => {}
b' ' if i > 0 => return i as u16,
_ => return 0,
}
}
0
}
pub fn line<'a>(input: &'a RopeSlice<'a>) -> (RopeSlice<'a>, RopeSlice<'a>) {
let split = next_line(input, 0);
(input.slice_bytes(..split), input.slice_bytes(split..))
}
pub fn consuming_line<'a>(input: &'a RopeSlice<'a>) -> (RopeSlice<'a>, RopeSlice<'a>) {
let split = next_line(input, 0);
let line = input.slice_bytes(..split);
let rest = input.slice_bytes(split..);
match rest.get_char(0) {
Some('\n') => (line, rest.slice(1..)),
_ => (line, rest),
}
}
pub fn next_line(input: &RopeSlice, offset: usize) -> usize {
input.memchr(b'\n', offset)
}
pub(crate) fn parse_document(arena: &mut Arena, input: &RopeSlice) -> Document {
let mut offset = 0;
if input.is_empty() {
let root_id = arena.arena.new_node(SectionData {
level: 0,
text: Rope::default(),
});
return Document {
root: Section { id: root_id },
terminal_newline: false,
empty_root_section: true,
};
}
let (new_offset, end) = parse_section(input, offset);
let empty_root_section = new_offset == end && offset == end;
let root_id = arena.arena.new_node(SectionData {
level: 0,
text: Rope::from(input.slice_bytes(offset..end)),
});
offset = new_offset;
let mut stack = vec![root_id];
let mut level = headline_level(input, offset);
while level > 0 {
let (new_offset, end) = parse_section(input, next_line(input, offset));
let section = SectionData {
text: Rope::from(input.slice_bytes(offset..end)),
level,
};
offset = new_offset;
while level
<= arena.arena[*stack.last().expect("stack never empty")]
.get()
.level
{
stack.pop().expect("stack never empty");
}
let node_id = arena.arena.new_node(section);
stack
.last()
.expect("stack never empty")
.append(node_id, &mut arena.arena);
stack.push(node_id);
level = headline_level(input, offset);
}
assert_eq!(input.len_bytes(), offset);
let re = regex::Regex::new("(^|.*\n)\\*\\** .*").expect("failed to assemble headline regex");
fn checker(re: ®ex::Regex, node: Section, arena: &Arena, input: &RopeSlice) {
let level = node.level(&arena);
let text = node.text(&arena);
let lexed_level = lex_level(&text.slice(..));
if lexed_level != level
|| text.len_bytes() >= level as usize
&& re.is_match(&text.to_contiguous()[(level as usize)..])
{
panic!("Check failed");
}
assert_eq!(lexed_level, level);
for node in node.children(&arena) {
checker(re, node, arena, input);
}
}
checker(&re, Section { id: root_id }, &arena, input);
Document {
root: Section { id: root_id },
terminal_newline: input.bytes().last() == Some(b'\n'),
empty_root_section,
}
}
fn parse_section(input: &RopeSlice, offset: usize) -> (usize, usize) {
let mut last = offset;
while last < input.len_bytes() {
let i = input.memchr(b'\n', last);
if i >= input.len_bytes() || input.byte(last) == b'*' && headline_level(input, last) != 0 {
break;
}
last = i + 1;
}
let last = if last < input.len_bytes() && headline_level(input, last) == 0 {
input.len_bytes()
} else {
last
};
if last > offset && last <= input.len_bytes() && input.byte(last - 1) == b'\n' {
(last, last - 1)
} else {
(last, last)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn next_line(s: &[u8], offset: usize) -> usize {
let r = Rope::from(std::str::from_utf8(s).unwrap());
let r = r.slice(..);
crate::parser::structure::next_line(&r, offset)
}
fn parse_section(s: &[u8], offset: usize) -> (usize, usize) {
let r = Rope::from(std::str::from_utf8(s).unwrap());
let r = r.slice(..);
crate::parser::structure::parse_section(&r, offset)
}
fn headline_level(s: &[u8], offset: usize) -> u16 {
let r = Rope::from(std::str::from_utf8(s).unwrap());
let r = r.slice(..);
crate::parser::structure::headline_level(&r, offset)
}
#[test]
fn test_line() {
let empty = Rope::default();
assert_eq!((empty.slice(..), empty.slice(..)), line(&empty.slice(..)));
let newline = Rope::from("\n");
assert_eq!(
(empty.slice(..), newline.slice(..)),
line(&newline.slice(..))
);
let term = Rope::from("* Hello\n");
assert_eq!(
(term.slice(..term.len_chars() - 1), newline.slice(..)),
line(&term.slice(..))
);
let multi = Rope::from("* Hello\nWorld");
assert_eq!(
(
Rope::from("* Hello").slice(..),
Rope::from("\nWorld").slice(..)
),
line(&multi.slice(..))
);
}
#[test]
fn test_consuming_line() {
let empty = Rope::default();
assert_eq!(
(empty.slice(..), empty.slice(..)),
consuming_line(&empty.slice(..))
);
let newline = Rope::from("\n");
assert_eq!(
(empty.slice(..), empty.slice(..)),
consuming_line(&newline.slice(..))
);
let term = Rope::from("* Hello\n");
assert_eq!(
(term.slice(..term.len_chars() - 1), empty.slice(..)),
consuming_line(&term.slice(..))
);
let multi = Rope::from("* Hello\nWorld");
assert_eq!(
(
Rope::from("* Hello").slice(..),
Rope::from("World").slice(..)
),
consuming_line(&multi.slice(..))
);
let many = Rope::from("* Hello\n\nWorld");
assert_eq!(
(
Rope::from("* Hello").slice(..),
Rope::from("\nWorld").slice(..)
),
consuming_line(&many.slice(..))
);
}
#[test]
fn test_next_line() {
assert_eq!(0, next_line(b"", 0));
assert_eq!(1, next_line(b" ", 0));
assert_eq!(1, next_line(b" ", 1));
assert_eq!(0, next_line(b"\n", 0));
assert_eq!(1, next_line(b"\n", 1));
assert_eq!(1, next_line(b" \n", 0));
assert_eq!(1, next_line(b" \n", 1));
assert_eq!(0, next_line(b"\n ", 0));
assert_eq!(2, next_line(b"\n ", 1));
assert_eq!(0, next_line(b"\ntest\n", 0));
assert_eq!(5, next_line(b"\ntest\n", 1));
assert_eq!(0, next_line(b"\n\na\n", 0));
assert_eq!(1, next_line(b"\n\na\n", 1));
assert_eq!(3, next_line(b"\n\na\n", 2));
assert_eq!(3, next_line(b"\n\na\n", 3));
}
#[test]
fn test_parse_section() {
assert_eq!((0, 0), parse_section(b"", 0));
assert_eq!((1, 1), parse_section(b"*", 0));
assert_eq!((1, 1), parse_section(b"*", 1));
assert_eq!((0, 0), parse_section(b"* ", 0));
assert_eq!((2, 2), parse_section(b"* ", 1));
assert_eq!((2, 2), parse_section(b"* ", 2));
assert_eq!((1, 0), parse_section(b"\n", 0));
assert_eq!((1, 1), parse_section(b"\n", 1));
assert_eq!((0, 0), parse_section(b"* \n", 0));
assert_eq!((3, 2), parse_section(b"* \n", 1));
assert_eq!((3, 2), parse_section(b"* \n", 2));
assert_eq!((1, 0), parse_section(b"\n*** \n", 0));
assert_eq!((1, 1), parse_section(b"\n*** \n", 1));
assert_eq!((2, 2), parse_section(b"\n*** \n", 2));
assert_eq!((3, 3), parse_section(b"\n*** \n", 3));
assert_eq!((6, 5), parse_section(b"\n*** \n", 4));
assert_eq!((3, 2), parse_section(b"Hi\n*** \n", 0));
}
#[test]
fn test_headline_level() {
assert_eq!(0, headline_level(b"", 0));
assert_eq!(0, headline_level(b" ", 0));
assert_eq!(0, headline_level(b"*", 0));
assert_eq!(0, headline_level(b"a", 0));
assert_eq!(0, headline_level(b" ", 0));
assert_eq!(1, headline_level(b"* ", 0));
assert_eq!(0, headline_level(b"a ", 0));
assert_eq!(0, headline_level(b" *", 0));
assert_eq!(0, headline_level(b"**", 0));
assert_eq!(0, headline_level(b"a*", 0));
assert_eq!(0, headline_level(b" a", 0));
assert_eq!(0, headline_level(b"*a", 0));
assert_eq!(0, headline_level(b"aa", 0));
assert_eq!(0, headline_level(b" ", 0));
assert_eq!(1, headline_level(b"* ", 0));
assert_eq!(0, headline_level(b"a ", 0));
assert_eq!(0, headline_level(b" * ", 0));
assert_eq!(2, headline_level(b"** ", 0));
assert_eq!(0, headline_level(b"a* ", 0));
assert_eq!(0, headline_level(b" a ", 0));
assert_eq!(0, headline_level(b"*a ", 0));
assert_eq!(0, headline_level(b"aa ", 0));
assert_eq!(0, headline_level(b" *", 0));
assert_eq!(1, headline_level(b"* *", 0));
assert_eq!(0, headline_level(b"a *", 0));
assert_eq!(0, headline_level(b" **", 0));
assert_eq!(0, headline_level(b"***", 0));
assert_eq!(0, headline_level(b"a**", 0));
assert_eq!(0, headline_level(b" a*", 0));
assert_eq!(0, headline_level(b"*a*", 0));
assert_eq!(0, headline_level(b"aa*", 0));
assert_eq!(0, headline_level(b" a", 0));
assert_eq!(1, headline_level(b"* a", 0));
assert_eq!(0, headline_level(b"a a", 0));
assert_eq!(0, headline_level(b" *a", 0));
assert_eq!(0, headline_level(b"**a", 0));
assert_eq!(0, headline_level(b"a*a", 0));
assert_eq!(0, headline_level(b" aa", 0));
assert_eq!(0, headline_level(b"*aa", 0));
assert_eq!(0, headline_level(b"aaa", 0));
assert_eq!(0, headline_level(b"***", 0));
assert_eq!(3, headline_level(b"*** ", 0));
assert_eq!(3, headline_level(b"*** ", 0));
assert_eq!(0, headline_level(b"***a", 0));
assert_eq!(3, headline_level(b"*** a", 0));
assert_eq!(3, headline_level(b"*** aaaaa", 0));
}
}