use lazy_static::lazy_static;
use log::{debug, trace};
use paragraph_breaker::{Item, INFINITE_PENALTY};
use regex::Regex;
use unicode_width::UnicodeWidthChar;
const MAX_LINE_LENGTH: usize = 30;
const MAX_LINES: usize = 2;
#[derive(Debug)]
enum Ends {
Phrase,
Sentence,
}
impl Ends {
fn penalty(&self) -> i32 {
match self {
Ends::Phrase => -1000,
Ends::Sentence => -5000,
}
}
}
#[derive(Debug)]
struct WordInfo<'a> {
word: &'a str,
width: usize,
ends: Option<Ends>,
}
impl WordInfo<'_> {
fn new(word: &str) -> WordInfo {
lazy_static! {
static ref SENTENCE_END: Regex = Regex::new(r"[\p{Sentence_Terminal}][\p{Terminal_Punctuation}]?$").unwrap();
static ref PHRASE_END: Regex = Regex::new(r"[\p{Terminal_Punctuation}]$").unwrap();
}
let is_sentence_end = SENTENCE_END.is_match(word);
let is_phrase_end = PHRASE_END.is_match(word);
let ends = if is_sentence_end {
Some(Ends::Sentence)
} else if is_phrase_end {
Some(Ends::Phrase)
} else {
None
};
WordInfo {
word,
width: word.chars().map(|c| c.width().unwrap_or(1)).sum(),
ends,
}
}
}
pub fn segment_subtitle_text(text: &str) -> Vec<String> {
lazy_static! {
static ref WHITESPACE: Regex = Regex::new(r"\s+").unwrap();
}
let words = WHITESPACE
.split(text)
.map(WordInfo::new)
.collect::<Vec<_>>();
let total_width = words.iter().map(|w| w.width).sum::<usize>() + words.len() - 1;
if total_width <= MAX_LINE_LENGTH {
return vec![text.to_string()];
}
let mut items = vec![];
for word in &words {
items.push(Item::Box {
width: word.width as i32,
data: word.word,
});
if let Some(ends) = &word.ends {
items.push(Item::Penalty {
width: 0,
penalty: ends.penalty(),
flagged: false,
});
items.push(Item::Glue {
width: 0,
stretch: 3,
shrink: 0,
});
} else {
items.push(Item::Glue {
width: 1,
stretch: 2,
shrink: 0,
})
}
}
items.push(Item::Penalty {
width: 0,
penalty: -INFINITE_PENALTY,
flagged: false,
});
let ideal_line_length = MAX_LINE_LENGTH as i32;
let breakpoints =
paragraph_breaker::total_fit(&items, &[ideal_line_length], 20.0, 0);
let mut lines = vec![];
let mut start_idx = 0;
trace!("Words: {:?}", words);
trace!("Breakpoints: {:?}", breakpoints);
for &bp in &breakpoints {
let end_idx = bp.index as usize;
let line = items[start_idx..end_idx]
.iter()
.filter_map(|item| match item {
Item::Box { data, .. } => Some(*data),
_ => None,
})
.collect::<Vec<_>>()
.join(" ");
lines.push(line);
start_idx = end_idx;
}
let last_line = items[start_idx..]
.iter()
.filter_map(|item| match item {
Item::Box { data, .. } => Some(*data),
_ => None,
})
.collect::<Vec<_>>()
.join(" ");
if !last_line.is_empty() {
lines.push(last_line);
}
let subtitles = lines
.chunks(MAX_LINES)
.map(|chunk| chunk.join("\n"))
.collect::<Vec<_>>();
if subtitles.len() > 1 {
debug!("Broke subtitles into: {:?}", subtitles);
}
subtitles
}