use super::{Indentation, Range};
use std::borrow::Cow;
use std::collections::VecDeque;
#[derive(Debug, Clone)]
pub struct Tokeneer<'s> {
s: &'s str,
unbreakable_ranges: Vec<Range>,
unbreakable_idx: usize,
inner: std::iter::Peekable<std::iter::Enumerate<std::str::CharIndices<'s>>>,
previous_byte_offset: usize,
previous_char_offset: usize,
}
impl<'s> Tokeneer<'s> {
pub fn new(s: &'s str, unbreakable_ranges: Vec<Range>) -> Self {
let inner = s.char_indices().enumerate().peekable();
Self {
s,
unbreakable_ranges,
unbreakable_idx: 0usize,
inner,
previous_byte_offset: 0usize,
previous_char_offset: 0usize,
}
}
#[inline(always)]
pub fn add_unbreakables(&mut self, unbreakable_ranges: impl IntoIterator<Item = Range>) {
self.unbreakable_ranges.extend(unbreakable_ranges);
}
pub fn craft_token(
&mut self,
char_idx: usize,
byte_offset: usize,
) -> Option<(Range, Range, Cow<'s, str>)> {
let byte_range = if let Some(&(_char_idx, (byte_offset_next, _c_next))) = self.inner.peek()
{
self.previous_byte_offset..byte_offset_next
} else if self.previous_byte_offset <= byte_offset {
self.previous_byte_offset..self.s.len()
} else if cfg!(debug_assertions) {
unreachable!(
"Unreachable condition was reached (byte_offset={}, previous={})",
byte_offset, self.previous_byte_offset
);
} else {
log::error!(
"Should be never be reachable, please file a bug with the corresponding input data"
);
return None;
};
let char_range = self.previous_char_offset..char_idx + 1;
let item = (
char_range,
byte_range.clone(),
std::borrow::Cow::Borrowed(&self.s[byte_range]),
);
Some(item)
}
}
impl<'s> Iterator for Tokeneer<'s> {
type Item = (Range, Range, Cow<'s, str>);
fn next(&mut self) -> Option<Self::Item> {
'outer: while let Some((char_idx, (byte_offset, c))) = self.inner.next() {
'unbreakable: while let Some(unbreakable) =
self.unbreakable_ranges.get(self.unbreakable_idx)
{
if char_idx < unbreakable.start {
break 'unbreakable;
}
if unbreakable.contains(&char_idx) {
if unbreakable.contains(&(char_idx + 1)) {
continue 'outer;
} else {
break 'unbreakable;
}
}
let idx = self.unbreakable_idx + 1;
if let Some(unbreakable_next) = self.unbreakable_ranges.get(idx) {
if unbreakable_next.contains(&char_idx) {
self.unbreakable_idx = idx;
continue 'unbreakable;
}
}
break 'unbreakable;
}
if c.is_whitespace() {
self.previous_byte_offset = byte_offset + 1;
self.previous_char_offset = char_idx + 1;
continue 'outer;
}
if let Some((_char_idx_next, (_byte_offset_next, c_next))) = self.inner.peek() {
if !c_next.is_whitespace() {
continue 'outer;
}
}
if let Some(item) = self.craft_token(char_idx, byte_offset) {
return Some(item);
}
}
None
}
}
#[derive(Debug, Clone)]
pub struct Gluon<'s> {
queue: VecDeque<(Range, std::borrow::Cow<'s, str>)>,
max_line_width: usize,
line_counter: usize,
indentations: &'s [Indentation<'s>],
inner: Tokeneer<'s>,
}
impl<'s> Gluon<'s> {
pub(crate) fn new(
s: &'s str,
max_line_width: usize,
indentations: &'s [Indentation<'s>],
) -> Self {
Self {
queue: VecDeque::new(),
max_line_width,
indentations,
line_counter: 0usize,
inner: Tokeneer::<'s>::new(s, vec![]),
}
}
#[inline]
#[allow(unused)]
pub(crate) fn add_unbreakables(&mut self, unbreakable_ranges: impl IntoIterator<Item = Range>) {
self.inner.add_unbreakables(unbreakable_ranges);
}
fn craft_line(&mut self) -> (usize, String, Range) {
use itertools::Itertools;
self.line_counter += 1;
#[allow(clippy::reversed_empty_ranges)]
let mut char_range = usize::MAX..0;
let line_content = self
.queue
.drain(..)
.map(|(range, s)| {
char_range.start = std::cmp::min(range.start, char_range.start);
char_range.end = std::cmp::max(range.end, char_range.end);
s
})
.inspect(|x| log::trace!("Gluing together: {:?} with > <", x))
.join(" ");
(self.line_counter, line_content, char_range)
}
}
impl<'s> Iterator for Gluon<'s> {
type Item = (usize, String, Range);
fn next(&mut self) -> Option<Self::Item> {
let indentation = self
.indentations
.get(
self.line_counter + 1,
)
.map(|x| *x)
.unwrap_or_else(|| self.indentations.last().map(|x| *x).unwrap_or_default());
while let Some((char_range, _byte_range, cow_str)) = self.inner.next() {
let mut qiter = self.queue.iter();
let acc_len = if let Some((first, _)) = qiter.next() {
qiter
.map(|(r, _)| r.len())
.fold(first.len(), |acc, len| acc + 1 + len)
} else {
0usize
};
let offset = indentation.offset();
let item_len = char_range.len();
let item = (char_range.clone(), cow_str);
let ret = if offset + acc_len <= self.max_line_width {
let sum = offset + acc_len + 1 + item_len;
if sum > self.max_line_width {
let ret = self.craft_line();
self.queue.push_back(item);
ret
} else {
self.queue.push_back(item);
continue;
}
} else if item_len > self.max_line_width {
log::warn!(
"A unbreakable chunk is larger than the max line width {} vs {}",
item_len,
self.max_line_width
);
if acc_len > 0 {
let line = self.craft_line();
self.queue.push_back(item);
line
} else {
self.queue.push_back(item);
continue;
}
} else {
let ret = self.craft_line();
self.queue.push_back(item);
ret
};
return Some(ret);
}
if self.queue.is_empty() {
None
} else {
let line = self.craft_line();
Some(line)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn verify_reflow(
content: &'static str,
expected: &'static str,
max_line_width: usize,
unbreakables: Vec<Range>,
indentations: Vec<usize>,
) {
let indentations = indentations
.into_iter()
.map(|n| Indentation::<'static>::new(n))
.collect::<Vec<_>>();
let expected = expected
.lines()
.enumerate()
.map(|(idx, line)| (idx + 1, line));
let mut gluon = Gluon::new(content, max_line_width, &indentations);
gluon.add_unbreakables(unbreakables);
for ((line_no, line_content, _), (expected_no, expected_content)) in
gluon.clone().zip(expected.clone())
{
assert_eq!(line_no, expected_no);
assert_eq!(dbg!(line_content), dbg!(expected_content));
}
assert_eq!(dbg!(gluon).count(), expected.count());
}
mod tokeneer {
use super::*;
fn verify(content: &'static str, expected: &[&'static str], unbreakables: Vec<Range>) {
let mut expected_iter = expected.into_iter();
let tokeneer = Tokeneer::new(content, unbreakables);
for (idx, (_char_range, _byte_range, s)) in tokeneer.enumerate() {
let expected = expected_iter
.next()
.expect("Must be of equal length at index");
println!("idx {} : {} <=> {}", idx, s, expected);
assert_eq!(s.to_owned(), expected.to_owned());
}
}
#[test]
fn smilies() {
const CONTENT: &str = "🍇🌡 🌤";
const EXPECTED: &[&'static str] = &["🍇🌡", "🌤"];
verify(CONTENT, EXPECTED, vec![0..2]);
}
#[test]
fn multi_char() {
const CONTENT: &str = "abc xyz qwert";
const EXPECTED: &[&'static str] = &["abc", "xyz", "qwert"];
verify(CONTENT, EXPECTED, vec![]);
}
#[test]
fn partial_covered_word_unbreakable() {
const CONTENT: &str = "abc xyz qwert";
const EXPECTED: &[&'static str] = &["abc xyz", "qwert"];
verify(CONTENT, EXPECTED, vec![2..5]);
}
}
mod gluon {
use super::*;
#[test]
fn wrap_too_long_fluid() {
const CONTENT: &str = "something kinda too long for a single line";
const EXPECTED: &str = r#"something kinda too long for a
single line"#;
verify_reflow(CONTENT, EXPECTED, 30usize, vec![], vec![0]);
}
#[test]
fn wrap_too_short_fluid() {
const CONTENT: &str = r#"something
kinda
too
short
for
a
single
line"#;
const EXPECTED: &str = r#"something kinda too short for
a single line"#;
verify_reflow(CONTENT, EXPECTED, 30usize, vec![], vec![0; 8]);
}
#[test]
fn wrap_just_fine() {
const CONTENT: &str = r#"just fine, no action required 🐱"#;
const EXPECTED: &str = CONTENT;
verify_reflow(CONTENT, EXPECTED, 40usize, vec![], vec![0]);
}
#[test]
fn wrap_too_long_unbreakable() {
const CONTENT: &str = "something kinda too Xong for a singlX line";
const EXPECTED: &str = r#"something kinda too
Xong for a singlX line"#;
verify_reflow(CONTENT, EXPECTED, 30usize, vec![20..37], vec![0]);
}
#[test]
fn spaces_and_tabs() {
const CONTENT: &str = " something kinda ";
const EXPECTED: &str = r#"something kinda"#;
verify_reflow(CONTENT, EXPECTED, 20usize, vec![], vec![0]);
}
#[test]
fn deep_indentation_too_long() {
const CONTENT: &str = r#"deep indentation"#;
const EXPECTED: &str = r#"deep
indentation"#;
verify_reflow(CONTENT, EXPECTED, 20usize, vec![], vec![15]);
}
#[test]
fn deep_indentation_too_short() {
let _ = env_logger::builder()
.is_test(true)
.filter(None, log::LevelFilter::Trace)
.try_init();
const CONTENT: &str = r#"deep
indentation"#;
const EXPECTED: &str = r#"deep indentation"#;
verify_reflow(CONTENT, EXPECTED, 22usize, vec![], vec![5, 5]);
}
}
}