use {
memmap::{Mmap, MmapOptions},
std::{fs::File, str::Utf8Error},
};
#[derive(Clone, Copy, Debug)]
pub struct CharPosition {
pub byte_position: usize,
pub char_position: usize,
}
pub struct MappedFile {
pub file: File,
pub map: Mmap,
pub line_ending_positions: Vec<CharPosition>,
}
#[derive(Debug)]
pub enum IndexError {
OutOfBounds,
InvalidChar(Utf8Error),
}
impl MappedFile {
pub fn new(file: File) -> Result<MappedFile, String> {
let map = unsafe { MmapOptions::new().map(&file).map_err(|e| e.to_string())? };
Ok(MappedFile {
file,
map,
line_ending_positions: vec![CharPosition {
char_position: 0,
byte_position: 0,
}],
})
}
fn find_with_cache(&mut self, index: usize) -> Result<char, IndexError> {
for window in self.line_ending_positions.windows(2) {
let (last, current) = (window[0], window[1]);
if last.char_position < index && index <= current.char_position {
return self.find_nth_in_str(
index - last.char_position,
last,
Some(current),
);
}
}
Err(IndexError::OutOfBounds)
}
fn find_nth_in_str(
&mut self,
n: usize,
start: CharPosition,
end: Option<CharPosition>,
) -> Result<char, IndexError> {
let str = match std::str::from_utf8(match end {
Some(end) => &self.map[start.byte_position..end.byte_position],
None => &self.map[start.byte_position..],
}) {
Ok(s) => s,
Err(e) => return Err(IndexError::InvalidChar(e)),
};
if let Some(_) = end {
Ok(str.chars().nth(n).unwrap())
} else {
let mut byte_position = start.byte_position;
for (char_index, c) in str.chars().enumerate() {
if c == '\n' {
self.line_ending_positions.push(CharPosition {
byte_position: byte_position,
char_position: char_index + start.char_position,
});
}
if char_index == n {
return Ok(c);
}
byte_position += c.len_utf8();
}
Err(IndexError::OutOfBounds)
}
}
pub fn unicode_at(&mut self, index: usize) -> Result<char, IndexError> {
if let Ok(c) = self.find_with_cache(index) {
return Ok(c);
} else {
let current = self.line_ending_positions.last().cloned().unwrap();
self.find_nth_in_str(index - current.char_position, current, None)
}
}
}
#[cfg(test)]
mod tests {}