extern crate unicode_width;
extern crate term_size;
#[cfg(feature = "hyphenation")]
extern crate hyphenation;
use unicode_width::UnicodeWidthStr;
use unicode_width::UnicodeWidthChar;
#[cfg(feature = "hyphenation")]
use hyphenation::{Hyphenation, Corpus};
pub trait WordSplitter {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
}
pub struct NoHyphenation;
impl WordSplitter for NoHyphenation {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
vec![(word, "", "")]
}
}
pub struct HyphenSplitter;
impl WordSplitter for HyphenSplitter {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
let mut triples = Vec::new();
let char_indices = word.char_indices().collect::<Vec<_>>();
for w in char_indices.windows(3) {
let ((_, prev), (n, c), (_, next)) = (w[0], w[1], w[2]);
if prev.is_alphanumeric() && c == '-' && next.is_alphanumeric() {
let (head, tail) = word.split_at(n + 1);
triples.push((head, "", tail));
}
}
triples.push((word, "", ""));
triples
}
}
#[cfg(feature = "hyphenation")]
impl WordSplitter for Corpus {
fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
let mut triples = Vec::new();
for n in word.opportunities(&self) {
let (head, tail) = word.split_at(n);
let hyphen = if head.ends_with('-') { "" } else { "-" };
triples.push((head, hyphen, tail));
}
triples.push((word, "", ""));
triples
}
}
struct IndentedString {
value: String,
empty_len: usize,
}
impl IndentedString {
#[inline]
fn new(indent: &str, capacity: usize) -> IndentedString {
let mut value = String::with_capacity(capacity);
value.push_str(indent);
IndentedString {
value: value,
empty_len: indent.len(),
}
}
#[inline]
fn is_empty(&self) -> bool {
self.value.len() == self.empty_len
}
#[inline]
fn push(&mut self, ch: char) {
self.value.push(ch);
}
#[inline]
fn push_str(&mut self, s: &str) {
self.value.push_str(s);
}
fn into_string(self) -> String {
self.value
}
}
pub struct Wrapper<'a> {
pub width: usize,
pub initial_indent: &'a str,
pub subsequent_indent: &'a str,
pub break_words: bool,
pub squeeze_whitespace: bool,
pub splitter: Box<WordSplitter>,
}
impl<'a> Wrapper<'a> {
pub fn new(width: usize) -> Wrapper<'a> {
Wrapper {
width: width,
initial_indent: "",
subsequent_indent: "",
break_words: true,
squeeze_whitespace: false,
splitter: Box::new(HyphenSplitter),
}
}
pub fn with_termwidth() -> Wrapper<'a> {
Wrapper::new(term_size::dimensions_stdout().map_or(80, |(w, _)| w))
}
pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a> {
Wrapper { initial_indent: indent, ..self }
}
pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a> {
Wrapper { subsequent_indent: indent, ..self }
}
pub fn break_words(self, setting: bool) -> Wrapper<'a> {
Wrapper { break_words: setting, ..self }
}
pub fn squeeze_whitespace(self, setting: bool) -> Wrapper<'a> {
Wrapper { squeeze_whitespace: setting, ..self }
}
pub fn word_splitter(self, splitter: Box<WordSplitter>) -> Wrapper<'a> {
Wrapper { splitter: splitter, ..self }
}
pub fn fill(&self, s: &str) -> String {
self.wrap(s).join("\n")
}
pub fn wrap(&self, s: &str) -> Vec<String> {
let mut lines = Vec::with_capacity(s.len() / (self.width + 1));
let mut line = IndentedString::new(self.initial_indent, self.width);
let mut remaining = self.width - self.initial_indent.width();
const NBSP: char = '\u{a0}';
for mut word in s.split(|c: char| c.is_whitespace() && c != NBSP) {
if self.squeeze_whitespace && word.is_empty() {
continue;
}
if self.fit_part(word, "", &mut remaining, &mut line) {
continue;
}
while !word.is_empty() {
let splits = self.splitter.split(word);
let (smallest, hyphen, longest) = splits[0];
let min_width = smallest.width() + hyphen.len();
if !line.is_empty() && 1 + min_width > remaining {
lines.push(line.into_string());
line = IndentedString::new(self.subsequent_indent, self.width);
remaining = self.width - self.subsequent_indent.width();
}
for &(head, hyphen, tail) in splits.iter().rev() {
if self.fit_part(head, hyphen, &mut remaining, &mut line) {
word = tail;
break;
}
}
if line.is_empty() {
if self.break_words && self.width > 1 {
let mut head_width = 0;
for (idx, c) in word.char_indices() {
head_width += c.width().unwrap_or(0);
if head_width > remaining {
let (head, tail) = word.split_at(idx);
line.push_str(head);
lines.push(line.into_string());
line = IndentedString::new(self.subsequent_indent, self.width);
word = tail;
break;
}
}
} else {
lines.push(String::from(smallest) + hyphen);
remaining = self.width;
word = longest;
}
}
}
}
if !line.is_empty() {
lines.push(line.into_string());
}
lines
}
fn fit_part<'b>(&self,
part: &'b str,
hyphen: &'b str,
remaining: &mut usize,
line: &mut IndentedString)
-> bool {
let space = if line.is_empty() { 0 } else { 1 };
let space_needed = space + part.width() + hyphen.len();
let fits_in_line = space_needed <= *remaining;
if fits_in_line {
if !line.is_empty() {
line.push(' ');
}
line.push_str(part);
line.push_str(hyphen);
*remaining -= space_needed;
}
fits_in_line
}
}
pub fn fill(s: &str, width: usize) -> String {
wrap(s, width).join("\n")
}
pub fn wrap(s: &str, width: usize) -> Vec<String> {
Wrapper::new(width).wrap(s)
}
pub fn indent(s: &str, prefix: &str) -> String {
let mut result = String::new();
for line in s.lines() {
if line.chars().any(|c| !c.is_whitespace()) {
result.push_str(prefix);
result.push_str(line);
}
result.push('\n');
}
result
}
pub fn dedent(s: &str) -> String {
let mut prefix = String::new();
let mut lines = s.lines();
for line in &mut lines {
let whitespace = line.chars()
.take_while(|c| c.is_whitespace())
.collect::<String>();
if whitespace.len() < line.len() {
prefix = whitespace;
break;
}
}
for line in &mut lines {
let whitespace = line.chars()
.zip(prefix.chars())
.take_while(|&(a, b)| a == b)
.map(|(_, b)| b)
.collect::<String>();
if whitespace.len() < prefix.len() {
prefix = whitespace;
}
}
let mut result = String::new();
for line in s.lines() {
if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
let (_, tail) = line.split_at(prefix.len());
result.push_str(tail);
}
result.push('\n');
}
result
}
#[cfg(test)]
mod tests {
#[cfg(feature = "hyphenation")]
extern crate hyphenation;
#[cfg(feature = "hyphenation")]
use hyphenation::Language;
use super::*;
fn add_nl(lines: &Vec<&str>) -> String {
lines.join("\n") + "\n"
}
#[test]
fn no_wrap() {
assert_eq!(wrap("foo", 10), vec!["foo"]);
}
#[test]
fn simple() {
assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
}
#[test]
fn multi_word_on_line() {
assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
}
#[test]
fn long_word() {
assert_eq!(wrap("foo", 0), vec!["foo"]);
}
#[test]
fn long_words() {
assert_eq!(wrap("foo bar", 0), vec!["foo", "bar"]);
}
#[test]
fn whitespace_is_significant() {
assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]);
}
#[test]
fn extra_whitespace_start_of_line() {
assert_eq!(wrap("foo bar", 5), vec!["foo ", "bar"]);
}
#[test]
fn whitespace_is_squeezed() {
let wrapper = Wrapper::new(10).squeeze_whitespace(true);
assert_eq!(wrapper.wrap(" foo \t bar "), vec!["foo bar"]);
}
#[test]
fn wide_character_handling() {
assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
assert_eq!(wrap("Hello, World!", 15),
vec!["Hello,", "World!"]);
}
#[test]
fn indent_empty() {
let wrapper = Wrapper::new(10).initial_indent("!!!");
assert_eq!(wrapper.fill(""), "");
}
#[test]
fn indent_single_line() {
let wrapper = Wrapper::new(10).initial_indent(">>>"); assert_eq!(wrapper.fill("foo"), ">>>foo");
}
#[test]
fn indent_multiple_lines() {
let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" ");
assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]);
}
#[test]
fn indent_break_words() {
let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" ");
assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]);
}
#[test]
fn hyphens() {
assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
}
#[test]
fn trailing_hyphen() {
let wrapper = Wrapper::new(5).break_words(false);
assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]);
}
#[test]
fn multiple_hyphens() {
assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
}
#[test]
fn hyphens_flag() {
let wrapper = Wrapper::new(5).break_words(false);
assert_eq!(wrapper.wrap("The --foo-bar flag."),
vec!["The", "--foo-", "bar", "flag."]);
}
#[test]
fn repeated_hyphens() {
let wrapper = Wrapper::new(4).break_words(false);
assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]);
}
#[test]
fn hyphens_alphanumeric() {
assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
}
#[test]
fn hyphens_non_alphanumeric() {
let wrapper = Wrapper::new(5).break_words(false);
assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]);
}
#[test]
fn multiple_splits() {
assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
}
#[test]
fn forced_split() {
let wrapper = Wrapper::new(5).break_words(false);
assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]);
}
#[test]
fn no_hyphenation() {
let wrapper = Wrapper::new(8).word_splitter(Box::new(NoHyphenation));
assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
}
#[test]
#[cfg(feature = "hyphenation")]
fn auto_hyphenation() {
let corpus = hyphenation::load(Language::English_US).unwrap();
let wrapper = Wrapper::new(10);
assert_eq!(wrapper.wrap("Internationalization"),
vec!["Internatio", "nalization"]);
let wrapper = wrapper.word_splitter(Box::new(corpus));
assert_eq!(wrapper.wrap("Internationalization"),
vec!["Interna-", "tionaliza-", "tion"]);
}
#[test]
#[cfg(feature = "hyphenation")]
fn auto_hyphenation_with_hyphen() {
let corpus = hyphenation::load(Language::English_US).unwrap();
let wrapper = Wrapper::new(8).break_words(false);
assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]);
let wrapper = wrapper.word_splitter(Box::new(corpus));
assert_eq!(wrapper.wrap("over-caffinated"),
vec!["over-", "caffi-", "nated"]);
}
#[test]
fn break_words() {
assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
}
#[test]
fn break_words_wide_characters() {
assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]);
}
#[test]
fn break_words_zero_width() {
assert_eq!(wrap("foobar", 0), vec!["foobar"]);
}
#[test]
fn test_non_breaking_space() {
let wrapper = Wrapper::new(5).break_words(false);
assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz");
}
#[test]
fn test_non_breaking_hyphen() {
let wrapper = Wrapper::new(5).break_words(false);
assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz");
}
#[test]
fn test_fill() {
assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz");
}
#[test]
fn test_indent_empty() {
assert_eq!(indent("\n", " "), "\n");
}
#[test]
#[cfg_attr(rustfmt, rustfmt_skip)]
fn test_indent_nonempty() {
let x = vec![" foo",
"bar",
" baz"];
let y = vec!["// foo",
"//bar",
"// baz"];
assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
}
#[test]
#[cfg_attr(rustfmt, rustfmt_skip)]
fn test_indent_empty_line() {
let x = vec![" foo",
"bar",
"",
" baz"];
let y = vec!["// foo",
"//bar",
"",
"// baz"];
assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
}
#[test]
fn test_dedent_empty() {
assert_eq!(dedent(""), "");
}
#[test]
#[cfg_attr(rustfmt, rustfmt_skip)]
fn test_dedent_multi_line() {
let x = vec![" foo",
" bar",
" baz"];
let y = vec![" foo",
"bar",
" baz"];
assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
}
#[test]
#[cfg_attr(rustfmt, rustfmt_skip)]
fn test_dedent_empty_line() {
let x = vec![" foo",
" bar",
" ",
" baz"];
let y = vec![" foo",
"bar",
"",
" baz"];
assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
}
#[test]
#[cfg_attr(rustfmt, rustfmt_skip)]
fn test_dedent_mixed_whitespace() {
let x = vec!["\tfoo",
" bar"];
let y = vec!["\tfoo",
" bar"];
assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
}
}