use std::borrow::Cow;
use std::cmp::{Ord, Ordering};
use std::fmt::{Display, Error, Formatter};
use std::io::Read;
use std::result::Result;
use std::str::Chars;
use serde_derive::{Deserialize, Serialize};
pub type Utf16C = u16;
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TextSpan {
pub index: usize,
pub length: usize
}
impl PartialOrd for TextSpan {
fn partial_cmp(&self, other: &TextSpan) -> Option<Ordering> {
match self.index.cmp(&other.index) {
Ordering::Greater => Some(Ordering::Greater),
Ordering::Less => Some(Ordering::Less),
Ordering::Equal => Some(self.length.cmp(&other.length))
}
}
}
impl Ord for TextSpan {
fn cmp(&self, other: &TextSpan) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
impl Display for TextSpan {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "@{}+{}", self.index, self.length)
}
}
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TextPosition {
pub line: usize,
pub column: usize
}
impl PartialOrd for TextPosition {
fn partial_cmp(&self, other: &TextPosition) -> Option<Ordering> {
match self.line.cmp(&other.line) {
Ordering::Greater => Some(Ordering::Greater),
Ordering::Less => Some(Ordering::Less),
Ordering::Equal => Some(self.column.cmp(&other.column))
}
}
}
impl Ord for TextPosition {
fn cmp(&self, other: &TextPosition) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
impl Display for TextPosition {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
write!(f, "({}, {})", self.line, self.column)
}
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct TextContext<'a> {
pub content: &'a str,
pub pointer: String
}
#[derive(Debug, Clone)]
pub struct Text<'a> {
content: Cow<'a, str>,
lines: Vec<usize>
}
impl<'a> Text<'a> {
#[allow(clippy::should_implement_trait)]
pub fn from_str(content: &'a str) -> Text<'a> {
let lines = find_lines_in(content.char_indices());
Text {
content: Cow::Borrowed(content),
lines
}
}
pub fn from_string(content: String) -> Text<'static> {
let lines = find_lines_in(content.char_indices());
Text {
content: Cow::Owned(content),
lines
}
}
pub fn from_utf8_stream(input: &mut dyn Read) -> Result<Text<'static>, std::io::Error> {
let mut content = String::new();
input.read_to_string(&mut content)?;
let lines = find_lines_in(content.char_indices());
Ok(Text {
content: Cow::Owned(content),
lines
})
}
pub fn get_line_count(&self) -> usize {
self.lines.len()
}
pub fn is_empty(&self) -> bool {
self.content.is_empty()
}
pub fn len(&self) -> usize {
self.content.len()
}
pub fn is_end(&self, index: usize) -> bool {
index >= self.content.len()
}
pub fn at(&self, index: usize) -> char {
self.content[index..].chars().next().unwrap()
}
pub fn get_value(&self, index: usize, length: usize) -> &str {
&self.content[index..(index + length)]
}
pub fn get_value_for(&self, span: TextSpan) -> &str {
self.get_value(span.index, span.length)
}
pub fn get_value_at(&self, position: TextPosition, length: usize) -> &str {
let from_line = &self.content[self.lines[position.line - 1]..];
let in_line_offset = from_line
.char_indices()
.take(position.column - 1)
.last()
.map(|(offset, c)| offset + c.len_utf8())
.unwrap_or_default();
let start = self.lines[position.line - 1] + in_line_offset;
&self.content[start..(start + length)]
}
pub fn get_line_index(&self, line: usize) -> usize {
self.lines[line - 1]
}
pub fn get_line_length(&self, line: usize) -> usize {
if line == self.lines.len() {
self.content.len() - self.lines[line - 1]
} else {
self.lines[line] - self.lines[line - 1]
}
}
pub fn get_line_content(&self, line: usize) -> &str {
self.get_value(self.get_line_index(line), self.get_line_length(line))
}
pub fn get_position_at(&self, index: usize) -> TextPosition {
let line = find_line_at(&self.lines, index);
let nb_chars = self.content[self.lines[line]..index].chars().count();
TextPosition {
line: line + 1,
column: nb_chars + 1
}
}
pub fn get_position_for(&self, position: TextPosition, length: usize) -> TextPosition {
let index = self.lines[position.line - 1] + position.column - 1 + length;
self.get_position_at(index)
}
pub fn get_context_at(&self, position: TextPosition) -> TextContext {
self.get_context_for(position, 1)
}
pub fn get_context_for(&self, position: TextPosition, length: usize) -> TextContext {
let mut line_content = self.get_line_content(position.line);
let mut end = line_content.len();
while end != 0 {
let last = line_content.chars().last().unwrap();
if !is_line_ending_char(last) {
break;
}
end -= last.len_utf8();
line_content = &line_content[..end];
}
let mut removed_heading = 0;
loop {
match line_content.chars().next() {
None => break,
Some(c) => {
if c.is_whitespace() {
line_content = &line_content[c.len_utf8()..];
removed_heading += 1;
} else {
break;
}
}
}
}
let in_line_offset = line_content
.char_indices()
.take(position.column - 1 - removed_heading)
.last()
.map(|(offset, c)| offset + c.len_utf8())
.unwrap_or_default();
let pointer_count = line_content[in_line_offset..]
.char_indices()
.take_while(|&(offset, _)| offset < length)
.count()
.max(1);
let pointer_blank_count = position.column - 1 - removed_heading;
let mut pointer = String::with_capacity(pointer_count + pointer_blank_count);
for c in line_content.chars().take(pointer_blank_count) {
pointer.push(if c == '\t' { '\t' } else { ' ' });
}
for _ in 0..pointer_count {
pointer.push('^');
}
TextContext {
content: line_content,
pointer
}
}
pub fn get_context_of(&self, span: TextSpan) -> TextContext {
let position = self.get_position_at(span.index);
self.get_context_for(position, span.length)
}
pub fn iter_utf16_from(&self, from: usize) -> Utf16Iter {
Utf16Iter {
inner: self.content[from..].chars(),
next_cp: None
}
}
}
pub struct Utf16Iter<'a> {
inner: Chars<'a>,
next_cp: Option<(Utf16C, usize)>
}
impl<'a> Iterator for Utf16Iter<'a> {
type Item = (Utf16C, usize);
fn next(&mut self) -> Option<Self::Item> {
match self.next_cp.take() {
Some(r) => Some(r),
None => match self.inner.next() {
None => None,
Some(c) => {
let length = c.len_utf8();
let mut encoded = [0_u16; 2];
c.encode_utf16(&mut encoded);
if encoded[1] != 0 {
self.next_cp = Some((encoded[1], length));
Some((encoded[0], 0))
} else {
Some((encoded[0], length))
}
}
}
}
}
}
fn is_line_ending(c1: char, c2: char) -> bool {
(c2 == '\u{000B}'
|| c2 == '\u{000C}'
|| c2 == '\u{0085}'
|| c2 == '\u{2028}'
|| c2 == '\u{2029}')
|| (c1 == '\u{000D}' || c2 == '\u{000A}')
}
fn is_line_ending_char(c: char) -> bool {
(c == '\u{000B}' || c == '\u{000C}' || c == '\u{0085}' || c == '\u{2028}' || c == '\u{2029}')
|| c == '\u{000D}'
|| c == '\u{000A}'
}
fn find_lines_in<T: Iterator<Item = (usize, char)>>(iterator: T) -> Vec<usize> {
let mut result = Vec::new();
let mut c1: char;
let mut c2: char = '\0';
result.push(0);
for (offset, x) in iterator {
c1 = c2;
c2 = x;
if is_line_ending(c1, c2) {
result.push(if c1 == '\u{000D}' && c2 != '\u{000A}' {
offset
} else {
offset + x.len_utf8()
});
}
}
result
}
fn find_line_at(lines: &[usize], index: usize) -> usize {
for (i, line) in lines.iter().enumerate().skip(1) {
if index < *line {
return i - 1;
}
}
lines.len() - 1
}
#[test]
fn test_text_lines() {
let text = Text::from_str("this is\na new line");
assert_eq!(text.lines.len(), 2);
assert_eq!(text.lines[0], 0);
assert_eq!(text.lines[1], 8);
}
#[test]
fn test_text_at() {
let text = Text::from_str("this is\na new line");
assert_eq!(text.at(0), 't');
assert_eq!(text.at(8), 'a');
}
#[test]
fn test_text_get_value() {
let text = Text::from_str("this is\na new line");
assert_eq!(text.get_value(0, 3), "thi");
assert_eq!(text.get_value(8, 2), "a ");
}
#[test]
fn test_text_get_value_at() {
let text = Text::from_str("this is\na new line");
assert_eq!(
text.get_value_at(TextPosition { line: 1, column: 1 }, 3),
"thi"
);
assert_eq!(
text.get_value_at(TextPosition { line: 2, column: 3 }, 3),
"new"
);
}
#[test]
fn test_text_get_value_at_2() {
let text = Text::from_str("नमस्ते\nЗдравствуйте");
assert_eq!(
text.get_value_at(TextPosition { line: 1, column: 1 }, 6),
"नम"
);
assert_eq!(
text.get_value_at(TextPosition { line: 2, column: 3 }, 4),
"ра"
);
}
#[test]
fn test_text_get_line_index() {
let text = Text::from_str("this is\na new line");
assert_eq!(text.get_line_index(1), 0);
assert_eq!(text.get_line_index(2), 8);
}
#[test]
fn test_text_get_line_length() {
let text = Text::from_str("this is\na new line");
assert_eq!(text.get_line_length(1), 8);
assert_eq!(text.get_line_length(2), 10);
}
#[test]
fn test_text_get_line_content() {
let text = Text::from_str("this is\na new line");
assert_eq!(text.get_line_content(1), "this is\n");
assert_eq!(text.get_line_content(2), "a new line");
}
#[test]
fn test_text_get_position_at() {
let text = Text::from_str("this is\na new line");
for i in 0..8 {
assert_eq!(
text.get_position_at(i),
TextPosition {
line: 1,
column: i + 1
}
);
}
for i in 8..text.content.len() {
assert_eq!(
text.get_position_at(i),
TextPosition {
line: 2,
column: i + 1 - 8
}
);
}
}
#[test]
fn test_text_get_context_for() {
let text = Text::from_str("नमस्ते\nЗдравствуйте");
assert_eq!(
text.get_context_for(TextPosition { line: 1, column: 2 }, 6),
TextContext {
content: "नमस्ते",
pointer: String::from(" ^^")
}
);
assert_eq!(
text.get_context_for(TextPosition { line: 2, column: 3 }, 6),
TextContext {
content: "Здравствуйте",
pointer: String::from(" ^^^")
}
);
}
#[test]
fn test_text_get_context_for_on_empty_last() {
let text = Text::from_str("x\n");
assert_eq!(
text.get_context_for(TextPosition { line: 2, column: 1 }, 0),
TextContext {
content: "",
pointer: String::from("^")
}
);
}
#[test]
fn test_text_get_context_for_no_leading() {
let text = Text::from_str("x\n\n xxyx\nx");
assert_eq!(
text.get_context_for(TextPosition { line: 3, column: 5 }, 1),
TextContext {
content: "xxyx",
pointer: String::from(" ^")
}
);
}
#[test]
fn test_text_get_context_for_inner_tab() {
let text = Text::from_str("x\n\nx\txyx\nx");
assert_eq!(
text.get_context_for(TextPosition { line: 3, column: 4 }, 1),
TextContext {
content: "x\txyx",
pointer: String::from(" \t ^")
}
);
}
#[test]
fn test_text_get_context_for_no_leading_tab() {
let text = Text::from_str("x\n\n\txxyx\nx");
assert_eq!(
text.get_context_for(TextPosition { line: 3, column: 4 }, 1),
TextContext {
content: "xxyx",
pointer: String::from(" ^")
}
);
}