#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum StringBlockError {
UnexpectedEnd,
MissingNewLine,
MissingTermination,
MissingIndent,
}
use StringBlockError::*;
use logos::Lexer;
use crate::generated::syntax_kinds::SyntaxKind;
pub(crate) fn lex_str_block_test(lex: &mut Lexer<'_, SyntaxKind>) {
let _ = lex_str_block(lex);
}
pub(crate) struct Context<'a> {
source: &'a str,
index: usize,
}
impl<'a> Context<'a> {
fn rest(&self) -> &'a str {
&self.source[self.index..]
}
fn next(&mut self) -> Option<char> {
if self.index == self.source.len() {
return None;
}
match self.rest().chars().next() {
None => None,
Some(c) => {
self.index += c.len_utf8();
Some(c)
}
}
}
fn peek(&self) -> Option<char> {
if self.index == self.source.len() {
return None;
}
self.rest().chars().next()
}
fn eat_if(&mut self, f: impl Fn(char) -> bool) -> usize {
if self.peek().is_some_and(f) {
self.index += 1;
return 1;
}
0
}
fn eat_while(&mut self, f: impl Fn(char) -> bool) -> usize {
if self.index == self.source.len() {
return 0;
}
let next_char = self.rest().char_indices().find(|(_, c)| !f(*c));
match next_char {
None => {
let diff = self.source.len() - self.index;
self.index = self.source.len();
diff
}
Some((idx, _)) => {
self.index += idx;
idx
}
}
}
fn skip(&mut self, len: usize) {
self.index = match self.index + len {
n if n > self.source.len() => self.source.len(),
n => n,
};
}
}
fn check_whitespace(a: &str, b: &str) -> usize {
let a = a.as_bytes();
let b = b.as_bytes();
for i in 0..a.len() {
if a[i] != b' ' && a[i] != b'\t' {
return i;
}
if i >= b.len() {
return 0;
}
if a[i] != b[i] {
return 0;
}
}
a.len()
}
pub(crate) trait StrBlockLexCtx<'d> {
fn remainder(&self) -> &'d str;
fn eat_error(&mut self, ctx: &Context<'d>);
fn bump_pos(&mut self, s: usize);
fn mark_truncating(&mut self);
fn mark_line(&mut self, line: &'d str);
}
impl<'d> StrBlockLexCtx<'d> for Lexer<'d, SyntaxKind> {
fn remainder(&self) -> &'d str {
self.remainder()
}
fn eat_error(&mut self, ctx: &Context<'d>) {
let end_index = ctx
.rest()
.find("|||")
.map_or_else(|| ctx.rest().len(), |v| v + 3);
self.bump(ctx.index + end_index);
}
fn bump_pos(&mut self, s: usize) {
self.bump(s);
}
fn mark_truncating(&mut self) {
}
fn mark_line(&mut self, _line: &'d str) {
}
}
pub fn collect_lexed_str_block(input: &str) -> Result<CollectStrBlock<'_>, StringBlockError> {
let mut collect = CollectStrBlock {
truncate: false,
lines: vec![],
input,
offset: 0,
};
lex_str_block(&mut collect)?;
Ok(collect)
}
pub struct CollectStrBlock<'s> {
pub truncate: bool,
pub lines: Vec<&'s str>,
input: &'s str,
offset: usize,
}
impl<'d> StrBlockLexCtx<'d> for CollectStrBlock<'d> {
fn remainder(&self) -> &'d str {
self.input
}
fn eat_error(&mut self, _ctx: &Context<'d>) {
}
fn bump_pos(&mut self, s: usize) {
self.offset += s;
}
fn mark_truncating(&mut self) {
self.truncate = true;
}
fn mark_line(&mut self, line: &'d str) {
self.lines.push(line);
}
}
pub(crate) fn lex_str_block<'a>(lex: &mut impl StrBlockLexCtx<'a>) -> Result<(), StringBlockError> {
let mut ctx = Context::<'a> {
source: lex.remainder(),
index: 0,
};
if ctx.eat_if(|v| v == '-') != 0 {
lex.mark_truncating();
}
ctx.eat_while(|r| r == ' ' || r == '\t' || r == '\r');
match ctx.next() {
Some('\n') => (),
None => {
lex.eat_error(&ctx);
return Err(UnexpectedEnd);
}
Some(_) => {
lex.eat_error(&ctx);
return Err(MissingNewLine);
}
}
while ctx.peek() == Some('\n') {
lex.mark_line("");
ctx.next();
}
let mut num_whitespace = check_whitespace(ctx.rest(), ctx.rest());
let str_block_indent = &ctx.rest()[..num_whitespace];
if num_whitespace == 0 {
lex.eat_error(&ctx);
return Err(MissingIndent);
}
loop {
debug_assert_ne!(num_whitespace, 0, "Unexpected value for num_whitespace");
ctx.skip(num_whitespace);
let rest = ctx.rest();
match rest.find('\n') {
None => {
ctx.index = ctx.source.len();
lex.eat_error(&ctx);
return Err(UnexpectedEnd);
}
Some(nl_pos) => {
lex.mark_line(&rest[..nl_pos]);
ctx.index += nl_pos + 1;
}
}
while ctx.peek() == Some('\n') {
lex.mark_line("");
ctx.next();
}
num_whitespace = check_whitespace(str_block_indent, ctx.rest());
if num_whitespace == 0 {
while let Some(' ' | '\t') = ctx.peek() {
ctx.next().unwrap();
}
if !ctx.rest().starts_with("|||") {
if ctx.rest().is_empty() {
lex.bump_pos(ctx.index);
return Err(UnexpectedEnd);
}
lex.eat_error(&ctx);
return Err(MissingTermination);
}
ctx.skip(3);
break;
}
}
lex.bump_pos(ctx.index);
Ok(())
}