use crate::{kind::NoteSyntaxKind, language::NotedownLanguage};
use oak_core::{IncrementalCache, Lexer, LexerState, lexer::LexOutput, source::Source};
type State<S> = LexerState<S, NotedownLanguage>;
#[derive(Clone, Debug)]
pub struct NotedownLexer<'config> {
config: &'config NotedownLanguage,
}
impl<'config> NotedownLexer<'config> {
pub fn new(config: &'config NotedownLanguage) -> Self {
Self { config }
}
fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
while let Some(ch) = state.peek() {
if ch == ' ' || ch == '\t' {
state.advance(ch.len_utf8());
}
else {
break;
}
}
if state.get_position() > start_pos {
state.add_token(NoteSyntaxKind::Whitespace, start_pos, state.get_position());
true
}
else {
false
}
}
fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if let Some('\n') = state.peek() {
state.advance(1);
state.add_token(NoteSyntaxKind::Newline, start_pos, state.get_position());
true
}
else if let Some('\r') = state.peek() {
state.advance(1);
if let Some('\n') = state.peek() {
state.advance(1);
}
state.add_token(NoteSyntaxKind::Newline, start_pos, state.get_position());
true
}
else {
false
}
}
fn lex_heading<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if start_pos > 0 {
if let Some(prev_char) = state.get_char_at(start_pos - 1) {
if prev_char != '\n' && prev_char != '\r' {
return false;
}
}
}
if let Some('#') = state.peek() {
let mut level = 0;
let mut pos = start_pos;
while let Some('#') = state.get_char_at(pos) {
level += 1;
pos += 1;
if level > 6 {
return false; }
}
if let Some(ch) = state.get_char_at(pos) {
if ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' {
return false;
}
}
state.advance(level);
let heading_kind = match level {
1 => NoteSyntaxKind::Heading1,
2 => NoteSyntaxKind::Heading2,
3 => NoteSyntaxKind::Heading3,
4 => NoteSyntaxKind::Heading4,
5 => NoteSyntaxKind::Heading5,
6 => NoteSyntaxKind::Heading6,
_ => return false,
};
state.add_token(heading_kind, start_pos, state.get_position());
true
}
else {
false
}
}
fn lex_inline_code<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if let Some('`') = state.peek() {
state.advance(1);
let mut found_end = false;
while let Some(ch) = state.peek() {
if ch == '`' {
state.advance(1);
found_end = true;
break;
}
else if ch == '\n' || ch == '\r' {
break; }
else {
state.advance(ch.len_utf8());
}
}
if found_end {
state.add_token(NoteSyntaxKind::InlineCode, start_pos, state.get_position());
true
}
else {
state.set_position(start_pos);
false
}
}
else {
false
}
}
fn lex_code_block<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if start_pos > 0 {
if let Some(prev_char) = state.get_char_at(start_pos - 1) {
if prev_char != '\n' && prev_char != '\r' {
return false;
}
}
}
let fence_char = if let Some('`') = state.peek() {
'`'
}
else if let Some('~') = state.peek() {
'~'
}
else {
return false;
};
let mut fence_count = 0;
let mut pos = start_pos;
while let Some(ch) = state.get_char_at(pos) {
if ch == fence_char {
fence_count += 1;
pos += 1;
}
else {
break;
}
}
if fence_count < 3 {
return false; }
state.advance(fence_count);
state.add_token(NoteSyntaxKind::CodeFence, start_pos, state.get_position());
let lang_start = state.get_position();
while let Some(ch) = state.peek() {
if ch == '\n' || ch == '\r' {
break;
}
else if ch != ' ' && ch != '\t' {
state.advance(ch.len_utf8());
}
else {
break;
}
}
if state.get_position() > lang_start {
state.add_token(NoteSyntaxKind::CodeLanguage, lang_start, state.get_position());
}
true
}
fn lex_emphasis<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
let marker_char = if let Some('*') = state.peek() {
'*'
}
else if let Some('_') = state.peek() {
'_'
}
else {
return false;
};
let mut marker_count = 0;
let mut pos = start_pos;
while let Some(ch) = state.get_char_at(pos) {
if ch == marker_char {
marker_count += 1;
pos += 1;
}
else {
break;
}
}
if marker_count == 0 {
return false;
}
state.advance(marker_count);
let token_kind = if marker_count >= 2 { NoteSyntaxKind::Strong } else { NoteSyntaxKind::Emphasis };
state.add_token(token_kind, start_pos, state.get_position());
true
}
fn lex_strikethrough<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if let Some('~') = state.peek() {
if let Some('~') = state.get_char_at(start_pos + 1) {
state.advance(2);
state.add_token(NoteSyntaxKind::Strikethrough, start_pos, state.get_position());
true
}
else {
false
}
}
else {
false
}
}
fn lex_link_or_image<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
let is_image = if let Some('!') = state.peek() {
state.advance(1);
true
}
else {
false
};
if let Some('[') = state.peek() {
state.advance(1);
let token_kind = if is_image { NoteSyntaxKind::Image } else { NoteSyntaxKind::Link };
state.add_token(token_kind, start_pos, state.get_position());
true
}
else {
if is_image {
state.set_position(start_pos);
}
false
}
}
fn lex_list_marker<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
let mut check_pos = start_pos;
while check_pos > 0 {
check_pos -= 1;
if let Some(ch) = state.get_char_at(check_pos) {
if ch == '\n' || ch == '\r' {
break;
}
else if ch != ' ' && ch != '\t' {
return false; }
}
}
if let Some(ch) = state.peek() {
match ch {
'-' | '*' | '+' => {
state.advance(1);
if let Some(next_ch) = state.peek() {
if next_ch == ' ' || next_ch == '\t' {
state.add_token(NoteSyntaxKind::ListMarker, start_pos, state.get_position());
return true;
}
}
state.set_position(start_pos);
false
}
'0'..='9' => {
while let Some(digit) = state.peek() {
if digit.is_ascii_digit() {
state.advance(1);
}
else {
break;
}
}
if let Some('.') = state.peek() {
state.advance(1);
if let Some(next_ch) = state.peek() {
if next_ch == ' ' || next_ch == '\t' {
state.add_token(NoteSyntaxKind::ListMarker, start_pos, state.get_position());
return true;
}
}
}
state.set_position(start_pos);
false
}
_ => false,
}
}
else {
false
}
}
fn lex_task_marker<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if let Some('[') = state.peek() {
state.advance(1);
if let Some(ch) = state.peek() {
if ch == ' ' || ch == 'x' || ch == 'X' {
state.advance(1);
if let Some(']') = state.peek() {
state.advance(1);
state.add_token(NoteSyntaxKind::TaskMarker, start_pos, state.get_position());
return true;
}
}
}
state.set_position(start_pos);
}
false
}
fn lex_blockquote<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
let mut check_pos = start_pos;
while check_pos > 0 {
check_pos -= 1;
if let Some(ch) = state.get_char_at(check_pos) {
if ch == '\n' || ch == '\r' {
break;
}
else if ch != ' ' && ch != '\t' {
return false;
}
}
}
if let Some('>') = state.peek() {
state.advance(1);
state.add_token(NoteSyntaxKind::BlockquoteMarker, start_pos, state.get_position());
true
}
else {
false
}
}
fn lex_horizontal_rule<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
let mut check_pos = start_pos;
while check_pos > 0 {
check_pos -= 1;
if let Some(ch) = state.get_char_at(check_pos) {
if ch == '\n' || ch == '\r' {
break;
}
else if ch != ' ' && ch != '\t' {
return false;
}
}
}
if let Some(ch) = state.peek() {
if ch == '-' || ch == '*' || ch == '_' {
let rule_char = ch;
let mut count = 0;
let mut pos = start_pos;
while let Some(current_ch) = state.get_char_at(pos) {
if current_ch == rule_char {
count += 1;
pos += 1;
}
else if current_ch == ' ' || current_ch == '\t' {
pos += 1; }
else {
break;
}
}
if count >= 3 {
while let Some(current_ch) = state.get_char_at(pos) {
if current_ch == '\n' || current_ch == '\r' {
break;
}
else if current_ch == ' ' || current_ch == '\t' {
pos += 1;
}
else {
return false; }
}
state.set_position(pos);
state.add_token(NoteSyntaxKind::HorizontalRule, start_pos, state.get_position());
return true;
}
}
}
false
}
fn lex_special_char<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
if let Some(ch) = state.peek() {
let token_kind = match ch {
'[' => NoteSyntaxKind::LeftBracket,
']' => NoteSyntaxKind::RightBracket,
'(' => NoteSyntaxKind::LeftParen,
')' => NoteSyntaxKind::RightParen,
'<' => NoteSyntaxKind::LeftAngle,
'>' => NoteSyntaxKind::RightAngle,
'*' => NoteSyntaxKind::Asterisk,
'_' => NoteSyntaxKind::Underscore,
'`' => NoteSyntaxKind::Backtick,
'~' => NoteSyntaxKind::Tilde,
'#' => NoteSyntaxKind::Hash,
'|' => NoteSyntaxKind::Pipe,
'-' => NoteSyntaxKind::Dash,
'+' => NoteSyntaxKind::Plus,
'.' => NoteSyntaxKind::Dot,
':' => NoteSyntaxKind::Colon,
'!' => NoteSyntaxKind::Exclamation,
'\\' => NoteSyntaxKind::Escape,
_ => return false,
};
state.advance(ch.len_utf8());
state.add_token(token_kind, start_pos, state.get_position());
true
}
else {
false
}
}
fn lex_text<S: Source>(&self, state: &mut State<S>) -> bool {
let start_pos = state.get_position();
while let Some(ch) = state.peek() {
match ch {
' ' | '\t' | '\n' | '\r' | '#' | '*' | '_' | '`' | '~' | '[' | ']' | '(' | ')' | '<' | '>' | '|' | '-'
| '+' | '.' | ':' | '!' | '\\' => break,
_ => {
state.advance(ch.len_utf8());
}
}
}
if state.get_position() > start_pos {
state.add_token(NoteSyntaxKind::Text, start_pos, state.get_position());
true
}
else {
false
}
}
}
impl<'config> Lexer<NotedownLanguage> for NotedownLexer<'config> {
fn lex_incremental(
&self,
source: impl Source,
changed: usize,
cache: IncrementalCache<NotedownLanguage>,
) -> LexOutput<NotedownLanguage> {
let mut state = LexerState::new_with_cache(source, changed, cache);
while state.not_at_end() {
if self.skip_whitespace(&mut state) {
continue;
}
if self.lex_newline(&mut state) {
continue;
}
if self.lex_heading(&mut state) {
continue;
}
if self.lex_code_block(&mut state) {
continue;
}
if self.lex_inline_code(&mut state) {
continue;
}
if self.lex_strikethrough(&mut state) {
continue;
}
if self.lex_emphasis(&mut state) {
continue;
}
if self.lex_link_or_image(&mut state) {
continue;
}
if self.lex_task_marker(&mut state) {
continue;
}
if self.lex_list_marker(&mut state) {
continue;
}
if self.lex_blockquote(&mut state) {
continue;
}
if self.lex_horizontal_rule(&mut state) {
continue;
}
if self.lex_special_char(&mut state) {
continue;
}
if self.lex_text(&mut state) {
continue;
}
let start_pos = state.get_position();
if let Some(ch) = state.peek() {
state.advance(ch.len_utf8());
state.add_token(NoteSyntaxKind::Error, start_pos, state.get_position());
}
}
let eof_pos = state.get_position();
state.add_token(NoteSyntaxKind::Eof, eof_pos, eof_pos);
state.finish(Ok(()))
}
}
impl<'config> NotedownLexer<'config> {
fn lex_internal<S: Source>(&self, source: S) -> LexOutput<NotedownLanguage> {
let mut state = State::new(source);
while state.not_at_end() {
if self.skip_whitespace(&mut state) {
continue;
}
if self.lex_newline(&mut state) {
continue;
}
if self.lex_heading(&mut state) {
continue;
}
if self.lex_code_block(&mut state) {
continue;
}
if self.lex_inline_code(&mut state) {
continue;
}
if self.lex_strikethrough(&mut state) {
continue;
}
if self.lex_emphasis(&mut state) {
continue;
}
if self.lex_link_or_image(&mut state) {
continue;
}
if self.lex_task_marker(&mut state) {
continue;
}
if self.lex_list_marker(&mut state) {
continue;
}
if self.lex_blockquote(&mut state) {
continue;
}
if self.lex_horizontal_rule(&mut state) {
continue;
}
if self.lex_special_char(&mut state) {
continue;
}
if self.lex_text(&mut state) {
continue;
}
let start_pos = state.get_position();
if let Some(ch) = state.peek() {
state.advance(ch.len_utf8());
state.add_token(NoteSyntaxKind::Error, start_pos, state.get_position());
}
}
let eof_pos = state.get_position();
state.add_token(NoteSyntaxKind::Eof, eof_pos, eof_pos);
state.finish(Ok(()))
}
}