use std::str::Chars;
#[derive(Debug, Eq, PartialEq)]
pub struct CharWidth {
pub byte_width: usize,
pub char_width: usize,
pub character: char,
}
#[derive(Clone)]
pub struct NewlineNormalizedCharWidths<'t> {
iter: Chars<'t>,
text: &'t str,
idx: usize,
}
impl<'t> NewlineNormalizedCharWidths<'t> {
pub fn new(text: &'t str) -> Self {
Self {
text,
iter: text.chars(),
idx: 0,
}
}
pub fn previous(&mut self) -> Option<<Self as Iterator>::Item> {
let mut back_iter = self.text[..self.idx].chars();
let result = match back_iter.next_back() {
Some('\n') => {
match back_iter.next_back() {
Some('\r') => Some(CharWidth {
byte_width: '\r'.len_utf8() + '\n'.len_utf8(),
char_width: 2,
character: '\n',
}),
_ => Some(CharWidth {
byte_width: '\n'.len_utf8(),
char_width: 1,
character: '\n',
}),
}
}
Some('\r') => Some(CharWidth {
byte_width: '\n'.len_utf8(),
char_width: 1,
character: '\n',
}),
Some(ch) => Some(CharWidth {
byte_width: ch.len_utf8(),
char_width: 1,
character: ch,
}),
None => None,
};
if let Some(r) = &result {
self.idx -= r.byte_width;
self.iter = self.text[self.idx..].chars();
}
result
}
pub fn peek_character(&self) -> Option<char> {
match self.iter.clone().next() {
Some('\r') => Some('\n'),
ch => ch,
}
}
}
impl<'t> Iterator for NewlineNormalizedCharWidths<'t> {
type Item = CharWidth;
fn next(&mut self) -> Option<Self::Item> {
let result = match self.iter.next() {
Some('\r') => {
let mut speculative = self.iter.clone();
match speculative.next() {
Some('\n') => {
self.iter = speculative;
Some(CharWidth {
byte_width: '\r'.len_utf8() + '\n'.len_utf8(),
char_width: 2,
character: '\n',
})
}
_ => Some(CharWidth {
byte_width: '\r'.len_utf8(),
char_width: 1,
character: '\n',
}),
}
}
Some(ch) => Some(CharWidth {
byte_width: ch.len_utf8(),
char_width: 1,
character: ch,
}),
None => None,
};
if let Some(r) = &result {
self.idx += r.byte_width;
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ascii_no_newlines() {
let mut cw = NewlineNormalizedCharWidths::new("in");
assert_eq!(cw.peek_character(), Some('i'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 'i'
})
);
assert_eq!(cw.peek_character(), Some('n'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 'n'
})
);
assert_eq!(cw.peek_character(), None);
assert_eq!(cw.next(), None);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 'n'
})
);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 'i'
})
);
assert_eq!(cw.previous(), None);
assert_eq!(cw.peek_character(), Some('i'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 'i'
})
);
}
#[test]
fn test_unicode_no_newlines() {
let mut cw = NewlineNormalizedCharWidths::new("t\u{00e9}st");
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 't'
})
);
assert_eq!(cw.peek_character(), Some('\u{00e9}'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 2,
char_width: 1,
character: '\u{00e9}'
})
);
assert_eq!(cw.peek_character(), Some('s'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 's'
})
);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 's'
})
);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 2,
char_width: 1,
character: '\u{00e9}'
})
);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: 't'
})
);
}
#[test]
fn test_newlines() {
let mut cw = NewlineNormalizedCharWidths::new("\n\r\r\n");
assert_eq!(cw.peek_character(), Some('\n'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: '\n'
})
);
assert_eq!(cw.peek_character(), Some('\n'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: '\n'
})
);
assert_eq!(cw.peek_character(), Some('\n'));
assert_eq!(
cw.next(),
Some(CharWidth {
byte_width: 2,
char_width: 2,
character: '\n'
})
);
assert_eq!(cw.peek_character(), None);
assert_eq!(cw.next(), None);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 2,
char_width: 2,
character: '\n'
})
);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: '\n'
})
);
assert_eq!(
cw.previous(),
Some(CharWidth {
byte_width: 1,
char_width: 1,
character: '\n'
})
);
assert_eq!(cw.previous(), None);
}
#[test]
fn test_empty() {
let mut cw = NewlineNormalizedCharWidths::new("");
assert_eq!(cw.peek_character(), None);
assert_eq!(cw.next(), None);
assert_eq!(cw.previous(), None);
}
}