use crate::spec::Spec;
use std::{
vec::Vec,
option::Option,
};
use unicode_segmentation::UnicodeSegmentation;
pub struct Tokenizer<'a, S> where S: Spec {
spec: &'a S,
result: Vec<Comment>,
comment: bool,
last: Comment,
}
#[derive(Clone)]
pub struct Comment {
pub line: usize,
pub start: usize,
pub text: String,
inline: bool,
}
fn lines_ws_offset(lines: &Vec<&str>) -> usize {
let mut lo = 0;
for (n, l) in lines.iter().enumerate() {
let mut cnt = 0;
while l[cnt..].starts_with(" ") {
cnt += 1;
}
if n == 0 || cnt < lo {
lo = cnt;
}
if lo == 0 {
break;
}
}
lo
}
impl Comment {
fn new() -> Self {
Comment{
text: String::new(),
line: 0, start: 0, inline: false,
}
}
fn begin(&mut self, line: usize, start: usize) {
self.text.clear();
self.line = line;
self.start = start;
self.inline = false;
}
fn write(&mut self, buf: &str) {
self.text.push_str(buf);
}
pub fn trim<'a, S: Spec>(&mut self, spec: &'a S) {
let mut lines = Vec::new();
for l in self.text.split("\n") {
if lines.is_empty() && l.is_empty() {
continue;
}
lines.push(l.trim_end());
}
while let Some(l) = lines.last() {
if l.is_empty() {
lines.truncate(lines.len() - 1);
} else {
break;
}
}
let offset = lines_ws_offset(&lines);
if offset > 0 {
for l in lines.iter_mut() {
*l = l[offset..].as_ref();
}
}
for l in lines.iter_mut() {
*l = spec.trim(l);
}
let offset = lines_ws_offset(&lines);
if offset > 0 {
for l in lines.iter_mut() {
*l = l[offset..].as_ref();
}
}
self.text = lines.join("\n");
}
}
impl<'a, S: Spec> Tokenizer<'a, S> {
pub fn new(spec: &'a S) -> Self {
let r = Vec::new();
Tokenizer{
spec,
result: r,
comment: false,
last: Comment::new(),
}
}
pub fn update(&mut self, line: usize, buf: &str) {
let mut iter = buf.grapheme_indices(true);
let mut cnt = 0;
loop {
let tail = iter.as_str();
if !self.comment {
if let Some(o) = self.spec.is_begin(tail) {
self.comment = true;
self.last.begin(line, cnt);
cnt += o;
for _ in 1..o {
iter.next();
}
if let None = iter.next() {
break;
}
continue;
}
if let Some(o) = self.spec.is_inline(tail) {
if !self.last.inline {
self.last.begin(line, cnt+o);
self.last.inline = true;
} else {
self.result.remove(0);
}
self.last.write(tail[o..].as_ref());
self.result.insert(0, self.last.clone());
break;
}
}
if self.comment {
let mut sub = tail.grapheme_indices(true);
let mut sub_cnt = 0;
loop {
let sub_ref = sub.as_str();
if let Some(o) = self.spec.is_end(sub_ref) {
if let Some(offset) = tail.find(sub_ref) {
self.last.write(tail[..offset].as_ref());
} else {
panic!("string tail is not a substring!Oo");
}
self.result.insert(0, self.last.clone());
self.comment = false;
for _ in 1..o {
iter.next();
}
cnt += sub_cnt+o;
break;
}
sub_cnt += 1;
if let None = sub.next() {
break;
}
}
if !self.comment {
if let None = iter.next() {
break;
}
continue;
}
self.last.write(tail);
break;
}
if let None = iter.next() {
break;
}
cnt += 1;
}
}
pub fn update_ascii(&mut self, line: usize, buf: &str) {
let mut i = 0;
while i < buf.len() {
let tail = buf[i..].as_ref();
if !self.comment {
if let Some(o) = self.spec.is_begin(tail) {
self.comment = true;
self.last.begin(line, i+o);
i += o;
continue;
}
if let Some(o) = self.spec.is_inline(tail) {
if !self.last.inline {
self.last.begin(line, i+o);
self.last.inline = true;
} else {
self.result.pop();
}
self.last.write(buf[i+o..].as_ref());
self.result.insert(0, self.last.clone());
break;
}
}
if self.comment {
for sub in 0..tail.len() {
let sub_ref = tail[sub..].as_ref();
if let Some(o) = self.spec.is_end(sub_ref) {
self.last.write(tail[..sub].as_ref());
self.result.insert(0, self.last.clone());
self.comment = false;
i += sub+o;
break;
}
}
if !self.comment {
continue;
}
self.last.write(tail);
break;
}
i += 1;
}
}
pub fn finish(&mut self) {
self.result.insert(0, Comment::new())
}
pub fn take(&mut self) -> Option<Comment> {
if self.result.len() > 1 {
return self.result.pop();
}
None
}
}