use std::fmt::Display;
use std::fmt::Error;
use std::fmt::Formatter;
use std::io::BufReader;
use std::io::Read;
use std::result::Result;
use super::utils::biglist::BigList;
use super::utils::iterable::Iterable;
pub type Utf16C = u16;
#[derive(Copy, Clone)]
pub struct TextSpan {
pub index: usize,
pub length: usize
}
impl Display for TextSpan {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "@{}+{}", self.index, self.length)
}
}
#[derive(Copy, Clone)]
pub struct TextPosition {
pub line: usize,
pub column: usize
}
impl Display for TextPosition {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "({}, {})", self.line, self.column)
}
}
pub struct TextContext {
pub content: String,
pub pointer: String
}
pub struct Text {
content: BigList<Utf16C>,
lines: Vec<usize>
}
impl Text {
pub fn new(input: &str) -> Text {
let mut content = BigList::<Utf16C>::new(0);
for c in input.chars() {
let value = c as u32;
if value <= 0xFFFF {
content.push(value as u16);
} else {
let temp = value - 0x10000;
let lead = (temp >> 10) + 0xD800;
let trail = (temp & 0x03FF) + 0xDC00;
content.push(lead as Utf16C);
content.push(trail as Utf16C);
}
}
let lines = find_lines_in(&content);
Text { content, lines }
}
pub fn from_utf16_stream(input: &mut Read, big_endian: bool) -> Text {
let reader = &mut BufReader::new(input);
let mut content = BigList::<Utf16C>::new(0);
let iterator = Utf16IteratorRaw::new(reader, big_endian);
for c in iterator {
content.push(c);
}
let lines = find_lines_in(&content);
Text { content, lines }
}
pub fn from_utf8_stream(input: &mut Read) -> Text {
let reader = &mut BufReader::new(input);
let mut content = BigList::<Utf16C>::new(0);
let iterator = Utf16IteratorOverUtf8::new(reader);
for c in iterator {
content.push(c);
}
let lines = find_lines_in(&content);
Text { content, lines }
}
pub fn get_line_count(&self) -> usize {
self.lines.len()
}
pub fn len(&self) -> usize {
self.content.len()
}
pub fn is_end(&self, index: usize) -> bool {
index >= self.content.len()
}
pub fn at(&self, index: usize) -> Utf16C {
self.content[index]
}
pub fn get_value(&self, index: usize, length: usize) -> String {
utf16_to_string(&self.content, index, length)
}
pub fn get_value_for(&self, span: TextSpan) -> String {
self.get_value(span.index, span.length)
}
pub fn get_line_index(&self, line: usize) -> usize {
self.lines[line - 1]
}
pub fn get_line_length(&self, line: usize) -> usize {
if line == self.lines.len() {
self.content.len() - self.lines[line - 1]
} else {
self.lines[line] - self.lines[line - 1]
}
}
pub fn get_line_content(&self, line: usize) -> String {
self.get_value(self.get_line_index(line), self.get_line_length(line))
}
pub fn get_position_at(&self, index: usize) -> TextPosition {
let line = find_line_at(&self.lines, index);
TextPosition {
line: line + 1,
column: index - self.lines[line] + 1
}
}
pub fn get_context_at(&self, position: TextPosition) -> TextContext {
self.get_context_for(position, 1)
}
pub fn get_context_for(&self, position: TextPosition, length: usize) -> TextContext {
let line_index = self.get_line_index(position.line);
let line_length = self.get_line_length(position.line);
if line_length == 0 {
return TextContext {
content: String::from(""),
pointer: String::from("^")
};
}
let mut end = line_index + line_length - 1;
while end != line_index + 1
&& (self.content[end] == 0x000A || self.content[end] == 0x000B
|| self.content[end] == 0x000C || self.content[end] == 0x000D
|| self.content[end] == 0x0085 || self.content[end] == 0x2028
|| self.content[end] == 0x2029)
{
end = end - 1;
}
let mut start = line_index;
while start < end && is_white_space(self.content[start]) {
start = start + 1;
}
if line_index + position.column - 1 < start {
start = line_index;
}
if line_index + position.column - 1 > end {
end = line_index + line_length - 1;
}
let mut pointer = String::new();
for i in start..(line_index + position.column - 1) {
pointer.push(if self.content[i] == 0x0009 { '\t' } else { ' ' });
}
pointer.push('^');
for _i in 1..length {
pointer.push('^');
}
TextContext {
content: utf16_to_string(&self.content, start, end - start + 1),
pointer
}
}
pub fn get_context_of(&self, span: TextSpan) -> TextContext {
let position = self.get_position_at(span.index);
return self.get_context_for(position, span.length);
}
}
struct Utf16IteratorRaw<'a> {
big_endian: bool,
input: &'a mut Read
}
impl<'a> Iterator for Utf16IteratorRaw<'a> {
type Item = Utf16C;
fn next(&mut self) -> Option<Self::Item> {
let mut bytes: [u8; 2] = [0; 2];
let read = self.input.read(&mut bytes);
if read.is_err() || read.unwrap() < 2 {
return None;
}
if self.big_endian {
Some((bytes[1] as u16) << 8 | (bytes[0] as u16))
} else {
Some((bytes[0] as u16) << 8 | (bytes[1] as u16))
}
}
}
impl<'a> Utf16IteratorRaw<'a> {
pub fn new(input: &'a mut Read, big_endian: bool) -> Utf16IteratorRaw {
Utf16IteratorRaw { big_endian, input }
}
}
struct Utf16IteratorOverUtf8<'a> {
input: &'a mut Read,
next: Option<Utf16C>
}
impl<'a> Utf16IteratorOverUtf8<'a> {
fn read(input: &mut Read, buffer: &mut [u8]) -> usize {
let read = input.read(buffer);
match read {
Err(e) => panic!("{}", e),
Ok(size) => size
}
}
}
impl<'a> Iterator for Utf16IteratorOverUtf8<'a> {
type Item = Utf16C;
fn next(&mut self) -> Option<Self::Item> {
if self.next.is_some() {
let result = self.next;
self.next = None;
return result;
}
let mut bytes: [u8; 1] = [0; 1];
{
if Utf16IteratorOverUtf8::read(&mut self.input, &mut bytes) == 0 {
return None;
}
}
let b0 = bytes[0] as u8;
let c = match b0 {
_ if b0 >> 3 == 0b11110 => {
let mut others: [u8; 3] = [0; 3];
if Utf16IteratorOverUtf8::read(&mut self.input, &mut others) < 3 {
return None;
}
((b0 as u32) & 0b00000111) << 18 | ((others[0] as u32) & 0b00111111) << 12
| ((others[1] as u32) & 0b00111111) << 6
| ((others[2] as u32) & 0b00111111)
}
_ if b0 >> 4 == 0b1110 => {
let mut others: [u8; 2] = [0; 2];
if Utf16IteratorOverUtf8::read(&mut self.input, &mut others) < 2 {
return None;
}
((b0 as u32) & 0b00001111) << 12 | ((others[0] as u32) & 0b00111111) << 6
| ((others[1] as u32) & 0b00111111)
}
_ if b0 >> 5 == 0b110 => {
if Utf16IteratorOverUtf8::read(&mut self.input, &mut bytes) < 1 {
return None;
}
((b0 as u32) & 0b00011111) << 6 | ((bytes[0] as u32) & 0b00111111)
}
_ if b0 >> 7 == 0 => {
b0 as u32
}
_ => {
return None;
}
};
if (c >= 0xD800 && c < 0xE000) || c >= 0x110000 {
return None;
}
if c <= 0xFFFF {
return Some(c as Utf16C);
}
let temp = c - 0x10000;
let lead = (temp >> 10) + 0xD800;
let trail = (temp & 0x03FF) + 0xDC00;
self.next = Some(trail as Utf16C);
Some(lead as Utf16C)
}
}
impl<'a> Utf16IteratorOverUtf8<'a> {
pub fn new(input: &'a mut Read) -> Utf16IteratorOverUtf8 {
Utf16IteratorOverUtf8 { input, next: None }
}
}
fn is_line_ending(c1: Utf16C, c2: Utf16C) -> bool {
(c2 == 0x000B || c2 == 0x000C || c2 == 0x0085 || c2 == 0x2028 || c2 == 0x2029)
|| (c1 == 0x000D || c2 == 0x000A)
}
fn is_white_space(c: Utf16C) -> bool {
c == 0x0020 || c == 0x0009 || c == 0x000B || c == 0x000C
}
fn find_lines_in<'a, T: Iterable<'a, Item = Utf16C>>(iterable: &'a T) -> Vec<usize> {
let mut result = Vec::<usize>::new();
let mut c1;
let mut c2 = 0;
let mut i = 0;
result.push(0);
for x in iterable.iter() {
c1 = c2;
c2 = x;
if is_line_ending(c1, c2) {
result.push(if c1 == 0x000D && c2 != 0x000A {
i
} else {
i + 1
});
}
i = i + 1;
}
result
}
fn find_line_at(lines: &Vec<usize>, index: usize) -> usize {
for i in 1..lines.len() {
if index < lines[i] {
return i - 1;
}
}
return lines.len() - 1;
}
fn utf16_to_string(content: &BigList<Utf16C>, start: usize, length: usize) -> String {
let mut buffer = Vec::<Utf16C>::with_capacity(length);
for i in start..(start + length) {
buffer.push(content[i]);
}
let result = String::from_utf16(&buffer);
result.unwrap_or(String::new())
}
#[test]
fn test_text_lines() {
let text = Text::new("this is\na new line");
assert_eq!(text.lines.len(), 2);
assert_eq!(text.lines[0], 0);
assert_eq!(text.lines[1], 8);
}
#[test]
fn test_text_substring() {
let text = Text::new("this is\na new line");
assert_eq!(utf16_to_string(&text.content, 8, 5), "a new");
}
#[test]
fn test_read_utf8() {
let bytes: [u8; 13] = [
0x78, 0xE2, 0x80, 0xA8, 0xE2, 0x80, 0xA8, 0x78, 0xE2, 0x80, 0xA8, 0x79, 0x78
];
let mut content = Vec::<Utf16C>::new();
let reader = &mut bytes.as_ref();
let iterator = Utf16IteratorOverUtf8::new(reader);
for c in iterator {
content.push(c);
}
assert_eq!(7, content.len());
assert_eq!(0x78, content[0]);
assert_eq!(0x2028, content[1]);
assert_eq!(0x2028, content[2]);
assert_eq!(0x78, content[3]);
assert_eq!(0x2028, content[4]);
assert_eq!(0x79, content[5]);
assert_eq!(0x78, content[6]);
}