use super::latin_1::{char_to_latin1, Latin1String, Utf8ToLatin1Error};
use super::source::{Position, Range};
use std::fs::File;
use std::io;
use std::io::prelude::Read;
use std::path::Path;
pub struct Contents {
lines: Vec<String>,
}
impl Contents {
pub fn from_latin1_file(file_name: &Path) -> io::Result<Contents> {
let mut file = File::open(file_name)?;
let mut bytes = Vec::new();
file.read_to_end(&mut bytes)?;
Ok(Contents::from_str(
&Latin1String::from_vec(bytes).to_string(),
))
}
pub fn from_str(code: &str) -> Contents {
Contents {
lines: split_lines(code),
}
}
pub fn start(&self) -> Position {
Position {
line: 0,
character: 0,
}
}
#[cfg(test)]
fn end(&self) -> Position {
let line = self.num_lines().saturating_sub(1) as u32;
let character = self
.lines
.last()
.map(|line| line.chars().map(|chr| chr.len_utf16()).sum())
.unwrap_or(0) as u32;
Position { line, character }
}
#[cfg(test)]
pub fn range(&self) -> Range {
Range::new(self.start(), self.end())
}
#[cfg(test)]
pub fn crop(&self, range: Range) -> Contents {
let mut reader = ContentReader::new(self);
reader.seek_pos(range.start);
let mut result = String::new();
while reader.pos() < range.end {
if let Some(chr) = reader.pop_char() {
result.push(chr);
}
}
Contents {
lines: split_lines(&result),
}
}
pub fn num_lines(&self) -> usize {
self.lines.len()
}
pub fn get_line(&self, lineno: usize) -> Option<&str> {
self.lines.get(lineno).map(|string| string.as_str())
}
pub fn change(&mut self, range: &Range, content: &str) {
if self.lines.len() == 0 {
self.lines = split_lines(content);
return;
}
let Range { start, end } = range;
let start_char = start.character as usize;
let end_char = end.character as usize;
let start_line = start.line as usize;
let end_line = end.line as usize;
let mut merged_content = String::new();
if let Some(line) = self.lines.get(start_line) {
let mut i = 0;
for chr in line.chars() {
if i < start_char {
merged_content.push(chr);
} else {
break;
};
i += chr.len_utf16();
}
}
merged_content.push_str(content);
if let Some(line) = self.lines.get(end_line) {
let mut i = 0;
for chr in line.chars() {
if i >= end_char {
merged_content.push(chr);
};
i += chr.len_utf16();
}
}
let end_line = std::cmp::min(self.lines.len().saturating_sub(1), end_line);
self.lines
.splice(
start_line..=end_line,
split_lines(&merged_content).into_iter(),
)
.count();
}
}
fn split_lines(code: &str) -> Vec<String> {
let mut lines = Vec::new();
let bytes = code.as_bytes();
let mut i = 0;
let mut start = 0;
while i < bytes.len() {
let byte = bytes[i];
if byte == b'\n' {
i += 1;
let line = bytes[start..i].to_owned();
let line = unsafe { String::from_utf8_unchecked(line) };
lines.push(line);
start = i;
} else if byte == b'\r' {
i += 1;
let mut line = bytes[start..i].to_owned();
let last = line.len().saturating_sub(1);
line[last] = b'\n';
let line = unsafe { String::from_utf8_unchecked(line) };
lines.push(line);
if bytes.get(i) == Some(&b'\n') {
i += 1;
}
start = i;
} else {
i += 1;
}
}
if start < bytes.len() {
let bytes = bytes[start..].to_owned();
let line = unsafe { String::from_utf8_unchecked(bytes) };
lines.push(line);
}
lines
}
#[derive(PartialEq, Clone, Copy, Debug)]
pub struct ReaderState {
pos: Position,
idx: usize, }
impl ReaderState {
pub fn pos(&self) -> Position {
self.pos
}
}
#[derive(Clone)]
pub struct ContentReader<'a> {
contents: &'a Contents,
state: ReaderState,
}
impl<'a> ContentReader<'a> {
pub fn new(contents: &'a Contents) -> ContentReader<'a> {
ContentReader {
contents,
state: ReaderState {
pos: Position::default(),
idx: 0,
},
}
}
#[must_use]
pub fn get_char(&self) -> Option<char> {
if let Some(line) = self.contents.get_line(self.state.pos.line as usize) {
let bytes = line.as_bytes();
let idx = self.state.idx;
if idx < bytes.len() {
let slice = unsafe { std::str::from_utf8_unchecked(&bytes[idx..]) };
slice.chars().next()
} else {
None
}
} else {
None
}
}
#[must_use]
pub fn pop(&mut self) -> Result<Option<u8>, Utf8ToLatin1Error> {
if let Some(chr) = self.get_char() {
if let Some(latin1) = char_to_latin1(chr) {
self.skip_char(chr);
Ok(Some(latin1))
} else {
let pos = self.pos();
self.skip_char(chr);
Err(Utf8ToLatin1Error { pos, value: chr })
}
} else {
Ok(None)
}
}
#[must_use]
pub fn get(&self) -> Result<Option<u8>, Utf8ToLatin1Error> {
if let Some(chr) = self.get_char() {
if let Some(latin1) = char_to_latin1(chr) {
Ok(Some(latin1))
} else {
Err(Utf8ToLatin1Error {
pos: self.pos(),
value: chr,
})
}
} else {
Ok(None)
}
}
#[must_use]
pub fn pop_char(&mut self) -> Option<char> {
let chr = self.get_char()?;
self.skip_char(chr);
Some(chr)
}
fn skip_char(&mut self, chr: char) {
self.state.pos.move_after_char(chr);
if self.state.pos.character == 0 {
self.state.idx = 0;
} else {
self.state.idx += chr.len_utf8();
}
}
pub fn skip(&mut self) {
let _ = self.pop_char();
}
pub fn pop_lowercase(&mut self) -> Result<Option<u8>, Utf8ToLatin1Error> {
Ok(self.pop()?.map(Latin1String::lowercase))
}
pub fn peek_lowercase(&mut self) -> Result<Option<u8>, Utf8ToLatin1Error> {
Ok(self.peek()?.map(Latin1String::lowercase))
}
#[cfg(test)]
pub fn matches(&mut self, substr: &str) -> bool {
let mut lookahead = self.clone();
for exp in substr.chars() {
if let Some(chr) = lookahead.pop_char() {
if chr != exp {
return false;
}
} else {
return false;
}
}
true
}
pub fn skip_if(&mut self, value: u8) -> Result<bool, Utf8ToLatin1Error> {
if self.peek()? == Some(value) {
self.skip();
Ok(true)
} else {
Ok(false)
}
}
pub fn set_state(&mut self, state: ReaderState) {
self.state = state;
}
pub fn set_to(&mut self, reader: &ContentReader) {
self.state = reader.state;
}
pub fn pos(&self) -> Position {
self.state.pos()
}
#[cfg(test)]
pub fn seek_pos(&mut self, pos: Position) {
self.state = ReaderState {
pos: Position {
line: pos.line,
character: 0,
},
idx: 0,
};
while self.pos() < pos {
self.skip();
}
assert_eq!(self.pos(), pos);
}
pub fn state(&self) -> ReaderState {
self.state
}
pub fn peek(&self) -> Result<Option<u8>, Utf8ToLatin1Error> {
self.get()
}
pub fn peek_char(&self) -> Option<char> {
self.get_char()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn new(code: &str) -> Contents {
Contents::from_str(code)
}
fn reader(contents: &Contents) -> ContentReader {
ContentReader::new(contents)
}
#[test]
fn pop_latin1_ok() {
let contents = new("hi");
let mut reader = reader(&contents);
assert_eq!(reader.pop(), Ok(Some(b'h')));
assert_eq!(reader.pop(), Ok(Some(b'i')));
assert_eq!(reader.pop(), Ok(None));
}
#[test]
fn pop_latin1_err() {
let contents = new("h€i");
let mut reader = reader(&contents);
assert_eq!(reader.pop(), Ok(Some(b'h')));
assert_eq!(
reader.pop(),
Err(Utf8ToLatin1Error {
pos: Position::new(0, 1),
value: '€'
})
);
assert_eq!(reader.pop(), Ok(Some(b'i')));
assert_eq!(reader.pop(), Ok(None));
}
#[test]
fn pop_single_line() {
let contents = new("hi");
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('h'));
assert_eq!(reader.pop_char(), Some('i'));
assert_eq!(reader.pop_char(), None);
}
#[test]
fn pop_char() {
let contents = new("hå");
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('h'));
assert_eq!(reader.pop_char(), Some('Ã¥'));
assert_eq!(reader.pop_char(), None);
}
#[test]
fn pop_multi_line_no_newline_at_end() {
let contents = new("h\ni");
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('h'));
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), Some('i'));
assert_eq!(reader.pop_char(), None);
}
#[test]
fn pop_multi_line() {
let contents = new("h\ni\n");
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('h'));
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), Some('i'));
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), None);
}
#[test]
fn empty_lines() {
let contents = new("\n\n\n");
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), Some('\n'));
}
#[test]
fn peek() {
let contents = new("hi");
let mut reader = reader(&contents);
assert_eq!(reader.peek_char(), Some('h'));
assert_eq!(reader.pop_char(), Some('h'));
assert_eq!(reader.peek_char(), Some('i'));
assert_eq!(reader.pop_char(), Some('i'));
assert_eq!(reader.peek_char(), None);
assert_eq!(reader.pop_char(), None);
}
#[test]
fn cr_is_removed() {
let contents = new("1\r2\r\n");
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('1'));
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), Some('2'));
assert_eq!(reader.pop_char(), Some('\n'));
assert_eq!(reader.pop_char(), None);
}
#[test]
fn matches() {
let contents = new("abc");
let mut reader = reader(&contents);
assert!(reader.matches(&"abc"));
assert!(!reader.matches(&"bc"));
reader.skip();
assert!(reader.matches(&"bc"));
}
#[test]
fn character_is_utf16_len() {
let bomb = '\u{1F4A3}';
let contents = new(&format!("aä{}", bomb).to_string());
assert_eq!(contents.end(), Position::new(0, 4));
let mut reader = reader(&contents);
assert_eq!(reader.pop_char(), Some('a'));
assert_eq!(reader.pos(), Position::new(0, 1));
assert_eq!(reader.pop_char(), Some('ä'));
assert_eq!(reader.pos(), Position::new(0, 2));
assert_eq!(reader.pop_char(), Some(bomb));
assert_eq!(reader.pos(), Position::new(0, 4));
}
fn flatten(contents: &Contents) -> String {
let mut result = String::new();
for line in contents.lines.iter() {
result.push_str(&line.to_string());
}
result
}
#[test]
fn change_first() {
let mut contents = new("hello");
assert_eq!(flatten(&contents), "hello");
contents.change(&Range::new(Position::new(0, 0), Position::new(0, 1)), "_");
assert_eq!(flatten(&contents), "_ello");
}
#[test]
fn change_last() {
let mut contents = new("hello");
assert_eq!(flatten(&contents), "hello");
contents.change(&Range::new(Position::new(0, 4), Position::new(0, 5)), "_");
assert_eq!(flatten(&contents), "hell_");
}
#[test]
fn change_middle() {
let mut contents = new("hello");
assert_eq!(flatten(&contents), "hello");
contents.change(&Range::new(Position::new(0, 2), Position::new(0, 4)), "__");
assert_eq!(flatten(&contents), "he__o");
}
#[test]
fn change_shrink() {
let mut contents = new("hello");
assert_eq!(flatten(&contents), "hello");
contents.change(&Range::new(Position::new(0, 2), Position::new(0, 4)), "_");
assert_eq!(flatten(&contents), "he_o");
}
#[test]
fn change_grow() {
let mut contents = new("hello");
assert_eq!(flatten(&contents), "hello");
contents.change(&Range::new(Position::new(0, 2), Position::new(0, 4)), "___");
assert_eq!(flatten(&contents), "he___o");
}
#[test]
fn change_multi_line() {
let mut contents = new("hello\nworld");
assert_eq!(flatten(&contents), "hello\nworld");
contents.change(
&Range::new(Position::new(0, 3), Position::new(1, 2)),
"__\n__",
);
assert_eq!(flatten(&contents), "hel__\n__rld");
assert_eq!(contents.num_lines(), 2);
assert_eq!(contents.get_line(0).unwrap().to_string(), "hel__\n");
assert_eq!(contents.get_line(1).unwrap().to_string(), "__rld");
}
#[test]
fn change_to_less_lines() {
let mut contents = new("hello\nworld");
assert_eq!(flatten(&contents), "hello\nworld");
contents.change(&Range::new(Position::new(0, 3), Position::new(1, 2)), "");
assert_eq!(flatten(&contents), "helrld");
assert_eq!(contents.num_lines(), 1);
assert_eq!(contents.get_line(0).unwrap().to_string(), "helrld");
}
#[test]
fn change_to_more_lines() {
let mut contents = new("hello\nworld");
assert_eq!(flatten(&contents), "hello\nworld");
contents.change(
&Range::new(Position::new(0, 3), Position::new(1, 2)),
"\nmiddle\n",
);
assert_eq!(flatten(&contents), "hel\nmiddle\nrld");
assert_eq!(contents.num_lines(), 3);
assert_eq!(contents.get_line(0).unwrap().to_string(), "hel\n");
assert_eq!(contents.get_line(1).unwrap().to_string(), "middle\n");
assert_eq!(contents.get_line(2).unwrap().to_string(), "rld");
}
#[test]
fn change_keeps_surrounding_lines() {
let mut contents = new("___\nhello\nworld\n...");
assert_eq!(flatten(&contents), "___\nhello\nworld\n...");
contents.change(&Range::new(Position::new(1, 3), Position::new(2, 2)), "");
assert_eq!(flatten(&contents), "___\nhelrld\n...");
assert_eq!(contents.num_lines(), 3);
assert_eq!(contents.get_line(0).unwrap().to_string(), "___\n");
assert_eq!(contents.get_line(1).unwrap().to_string(), "helrld\n");
assert_eq!(contents.get_line(2).unwrap().to_string(), "...");
}
#[test]
fn change_empty() {
let mut contents = new("");
assert_eq!(flatten(&contents), "");
contents.change(&Range::new(Position::new(0, 0), Position::new(0, 0)), "H");
assert_eq!(flatten(&contents), "H");
}
#[test]
fn change_to_empty() {
let mut contents = new("H");
assert_eq!(flatten(&contents), "H");
contents.change(&Range::new(Position::new(0, 0), Position::new(0, 1)), "");
assert_eq!(flatten(&contents), "");
}
#[test]
fn change_add_missing_newline() {
let mut contents = new("a");
assert_eq!(flatten(&contents), "a");
contents.change(&Range::new(Position::new(0, 1), Position::new(1, 0)), "\n");
assert_eq!(flatten(&contents), "a\n");
assert_eq!(contents.num_lines(), 1);
assert_eq!(contents.get_line(0).unwrap().to_string(), "a\n");
}
}