#![feature(test)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unsafe_code)]
#![warn(unused_import_braces)]
pub mod decoder;
use enso_prelude::*;
use crate::decoder::Char;
use crate::decoder::InvalidChar;
use decoder::Decoder;
pub trait Read {
type Item;
fn read(&mut self,buffer:&mut [Self::Item]) -> usize;
}
impl<R:std::io::Read> Read for R {
type Item = u8;
fn read(&mut self,mut buffer:&mut [u8]) -> usize {
let length = buffer.len();
while !buffer.is_empty() {
match self.read(buffer) {
Err(_) => break,
Ok (0) => break,
Ok (n) => {
buffer = &mut buffer[n..];
}
}
}
length - buffer.len()
}
}
#[derive(Debug,Clone,Copy,PartialEq,Eq)]
pub enum Error {
EOF,
InvalidChar,
EndOfGroup,
}
impl Error {
pub const END_OF_FILE:u32 = u32::max_value();
pub const INVALID_CHAR:u32 = 0xFFFF;
pub const END_OF_GROUP:u32 = u32::max_value() - 1;
pub const END_OF_FILE_64:u64 = u64::max_value();
pub const INVALID_CHAR_64:u64 = 0xFFFF;
pub const END_OF_GROUP_64:u64 = u64::max_value() - 1;
}
impl From<decoder::Char<decoder::InvalidChar>> for decoder::Char<Error> {
fn from(char:Char<InvalidChar>) -> Self {
let size = char.size;
let char = match char.char {
Ok(char) => Ok(char),
Err(_) => Err(Error::InvalidChar),
};
decoder::Char{char,size}
}
}
impl From<decoder::Char<Error>> for u32 {
fn from(char:decoder::Char<Error>) -> Self {
match char.char {
Ok (char) => char as u32,
Err(Error::EOF) => Error::END_OF_FILE,
Err(Error::InvalidChar) => Error::INVALID_CHAR,
Err(Error::EndOfGroup) => Error::END_OF_GROUP,
}
}
}
impl From<decoder::Char<Error>> for u64 {
fn from(char:decoder::Char<Error>) -> Self {
match char.char {
Ok (char) => char as u64,
Err(Error::EOF) => Error::END_OF_FILE_64,
Err(Error::InvalidChar) => Error::INVALID_CHAR_64,
Err(Error::EndOfGroup) => Error::END_OF_GROUP_64,
}
}
}
#[derive(Debug,Clone,Copy,PartialEq)]
pub struct BookmarkId {
#[allow(missing_docs)]
id: usize
}
impl BookmarkId {
pub fn new(id:usize) -> BookmarkId {
BookmarkId{id}
}
}
pub trait ReaderOps {
fn next_char(&mut self, bookmarks:&mut BookmarkManager) -> Result<char,Error>;
fn advance_char(&mut self, bookmarks:&mut BookmarkManager);
fn character(&self) -> decoder::Char<Error>;
fn finished(&self, bookmarks:&BookmarkManager) -> bool;
fn empty(&self) -> bool;
fn fill(&mut self, bookmarks:&mut BookmarkManager);
fn max_possible_rewind_len(&self, bookmarks:&BookmarkManager) -> usize;
fn append_result(&mut self, char:char);
fn pop_result(&mut self) -> String;
fn offset(&self) -> usize;
fn result(&self) -> &String;
fn result_mut(&mut self) -> &mut String;
fn buffer_len(&self) -> usize;
fn set_offset(&mut self, off:usize);
fn truncate_match(&mut self, len:usize);
}
pub const BUFFER_SIZE: usize = 32768;
#[derive(Debug,Clone,PartialEq)]
pub struct Reader<D:Decoder,Read> {
pub reader: Read,
pub buffer: Vec<D::Word>,
pub result: String,
pub offset: usize,
pub length: usize,
pub character: decoder::Char<Error>,
}
impl<D:Decoder,R:Read<Item=D::Word>> Reader<D,R> {
pub fn new(reader:R, _decoder:D) -> Self {
let mut reader = Reader::<D,R> {
reader,
buffer : vec![D::Word::default(); BUFFER_SIZE],
result : String::from(""),
offset : 0,
length : 0,
character : decoder::Char{char:Err(Error::EOF), size:0},
};
reader.length = reader.reader.read(&mut reader.buffer[..]);
reader
}
}
impl<D:Decoder, R:Read<Item=D::Word>> ReaderOps for Reader<D,R> {
fn next_char(&mut self, bookmarks:&mut BookmarkManager) -> Result<char,Error> {
if self.empty() { self.character.char = Err(Error::EOF); return Err(Error::EOF) }
if self.offset >= self.buffer.len() - D::MAX_CODEPOINT_LEN {
self.fill(bookmarks);
}
self.character = D::decode(&self.buffer[self.offset..]).into();
self.offset += self.character.size;
self.character.char
}
fn advance_char(&mut self, bookmarks:&mut BookmarkManager) {
let _ = self.next_char(bookmarks);
}
fn character(&self) -> Char<Error> {
self.character
}
fn finished(&self, _bookmarks:&BookmarkManager) -> bool {
let rewinded = self.max_possible_rewind_len(_bookmarks) != 0;
self.empty() && rewinded
}
fn empty(&self) -> bool {
self.length < self.buffer.len() && self.length <= self.offset
}
fn fill(&mut self, bookmarks:&mut BookmarkManager) {
let len = self.buffer.len();
let words = len - self.offset;
self.offset = self.max_possible_rewind_len(bookmarks);
if self.offset == len {
panic!("Rewind won't be possible. Buffer is too small.")
}
bookmarks.decrease_bookmark_offsets(len - self.offset);
for i in 1..=self.offset {
self.buffer[self.offset - i] = self.buffer[len - i];
}
self.length = self.offset + self.reader.read(&mut self.buffer[self.offset..]);
self.offset -= words;
}
fn max_possible_rewind_len(&self, bookmarks:&BookmarkManager) -> usize {
if let Some(offset) = bookmarks.min_offset() {
return self.buffer_len() - offset
}
D::MAX_CODEPOINT_LEN
}
fn append_result(&mut self,char:char) {
self.result.push(char);
}
fn pop_result(&mut self) -> String {
let str = self.result.clone();
self.result.truncate(0);
str
}
fn offset(&self) -> usize {
self.offset
}
fn result(&self) -> &String {
&self.result
}
fn result_mut(&mut self) -> &mut String {
&mut self.result
}
fn buffer_len(&self) -> usize {
self.buffer.len()
}
fn set_offset(&mut self, off: usize) {
self.offset = off;
}
fn truncate_match(&mut self, len: usize) {
self.result.truncate(len);
}
}
#[derive(Debug,Clone,Copy,Default,PartialEq)]
pub struct Bookmark {
offset: usize,
length: usize,
set:bool
}
#[allow(missing_docs)]
#[derive(Clone,Debug,PartialEq)]
pub struct BookmarkManager {
bookmarks: Vec<Bookmark>,
pub matched_bookmark: BookmarkId,
pub rule_bookmark: BookmarkId,
}
#[allow(missing_docs)]
impl BookmarkManager {
pub fn new() -> BookmarkManager {
let mut bookmarks = Vec::new();
let matched_bookmark = BookmarkManager::make_bookmark(&mut bookmarks);
let rule_bookmark = BookmarkManager::make_bookmark(&mut bookmarks);
BookmarkManager {bookmarks,matched_bookmark,rule_bookmark}
}
fn make_bookmark(bookmarks:&mut Vec<Bookmark>) -> BookmarkId {
bookmarks.push(Bookmark::default());
BookmarkId::new(bookmarks.len() - 1)
}
pub fn add_bookmark(&mut self) -> BookmarkId {
BookmarkManager::make_bookmark(&mut self.bookmarks)
}
pub fn bookmark<R:ReaderOps>(&mut self, bookmark:BookmarkId, reader:&mut R) {
self.bookmarks[bookmark.id].offset = reader.offset() - reader.character().size;
self.bookmarks[bookmark.id].length = reader.result().len();
self.bookmarks[bookmark.id].set = true
}
pub fn unset<R:ReaderOps>(&mut self, bookmark:BookmarkId) {
self.bookmarks[bookmark.id].offset = 0;
self.bookmarks[bookmark.id].length = 0;
self.bookmarks[bookmark.id].set = false
}
pub fn decrease_bookmark_offsets(&mut self, amount:usize) {
for bookmark in self.bookmarks.iter_mut() {
if bookmark.set {
bookmark.offset -= amount
}
}
}
pub fn rewind<R:ReaderOps>(&mut self, bookmark:BookmarkId, reader:&mut R) {
let bookmark = self.bookmarks.get(bookmark.id).expect("Bookmark must exist.");
reader.set_offset(bookmark.offset);
reader.truncate_match(bookmark.length);
reader.advance_char(self);
}
pub fn min_offset(&self) -> Option<usize> {
self.bookmarks.iter().filter_map(|b| b.set.and_option(Some(b.offset))).min()
}
}
impl Default for BookmarkManager {
fn default() -> Self {
BookmarkManager::new()
}
}
#[cfg(test)]
mod tests {
extern crate test;
use super::*;
use decoder::*;
use test::Bencher;
#[derive(Debug, Clone)]
struct Repeat<T> {
buffer: Vec<T>,
offset: usize,
repeat: usize,
}
fn repeat<T:Copy>(input:Vec<T>, repeat:usize) -> impl Read<Item=T> {
Repeat { buffer:input, repeat, offset: 0 }
}
impl<T:Copy> Read for Repeat<T> {
type Item = T;
fn read(&mut self, mut buffer:&mut [Self::Item]) -> usize {
if self.repeat == 0 { return 0 }
let len = self.buffer.len();
let read = buffer.len();
if read < len - self.offset {
buffer.copy_from_slice(&self.buffer[self.offset..self.offset + read]);
self.offset += read;
return read
}
buffer[..len - self.offset].copy_from_slice(&self.buffer[self.offset..]);
buffer = &mut buffer[len - self.offset..];
let repeat = std::cmp::min(buffer.len() / len, self.repeat - 1);
self.repeat = self.repeat - repeat - 1;
for _ in 0..repeat {
buffer[..len].copy_from_slice(&self.buffer[..]);
buffer = &mut buffer[len..];
}
if self.repeat == 0 {
return len - self.offset + repeat * len
}
buffer.copy_from_slice(&self.buffer[..buffer.len()]);
self.offset = buffer.len();
read
}
}
pub fn bookmark_manager() -> BookmarkManager {
BookmarkManager::new()
}
#[test]
fn test_repeater_with_small_buffer() {
let mut repeater = repeat(vec![1, 2, 3], 1);
let mut buffer = [0; 2];
assert_eq!(repeater.read(&mut buffer), 2);
assert_eq!(&buffer, &[1, 2]);
assert_eq!(repeater.read(&mut buffer), 1);
assert_eq!(&buffer, &[3, 2])
}
#[test]
fn test_repeater_with_big_buffer() {
let mut repeater = repeat(vec![1, 2], 3);
let mut buffer = [0; 5];
assert_eq!(repeater.read(&mut buffer), 5);
assert_eq!(&buffer, &[1, 2, 1, 2, 1]);
assert_eq!(repeater.read(&mut buffer), 1);
assert_eq!(&buffer, &[2, 2, 1, 2, 1])
}
#[test]
fn test_reader_small_input() {
let mut mgr = bookmark_manager();
let str = "a.b^c! #𤭢界んにち𤭢#𤭢";
let mut reader = Reader::new(str.as_bytes(), DecoderUTF8());
let mut result = String::from("");
while let Ok(char) = reader.next_char(&mut mgr) {
result.push(char);
}
assert_eq!(&result, str);
}
#[test]
fn test_reader_big_input() {
let mut mgr = bookmark_manager();
let str = "a.b^c! #𤭢界んにち𤭢#𤭢".repeat(10_000);
let mut reader = Reader::new(str.as_bytes(), DecoderUTF8());
let mut result = String::from("");
while let Ok(char) = reader.next_char(&mut mgr) {
mgr.bookmark(mgr.matched_bookmark,&mut reader);
result.push(char);
}
assert_eq!(&result, &str);
assert_eq!(reader.buffer.len(), BUFFER_SIZE);
}
#[bench]
fn bench_reader(bencher:&mut Bencher) {
let run = || {
let mut mgr = bookmark_manager();
let str = repeat("Hello, World!".as_bytes().to_vec(), 10_000_000);
let mut reader = Reader::new(str, DecoderUTF8());
let mut count = 0;
while reader.next_char(&mut mgr) != Err(Error::EOF) {
count += 1;
}
count
};
bencher.iter(run);
}
}