use std::borrow::Cow;
use hyphenation_commons::dictionary::extended::*;
use hyphenation_commons::dictionary::*;
use crate::case_folding::{realign, refold, Shift};
use crate::score::Score;
pub fn soft_hyphen_indices(word : &str) -> Option<Vec<usize>> {
let shys : Vec<_> = word.match_indices('\u{00ad}').map(|(i, _)| i).collect();
if !shys.is_empty() {
Some(shys)
} else {
None
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Word<'t, Break> {
pub text : &'t str,
pub breaks : Vec<Break>,
}
pub trait Hyphenator<'h> {
type Opportunity;
type Exact;
fn hyphenate<'t>(&'h self, word : &'t str) -> Word<'t, Self::Opportunity>;
fn opportunities(&'h self, lowercase_word : &str) -> Vec<Self::Opportunity> {
match self.boundaries(lowercase_word) {
None => vec![],
Some(mins) => match self.exception(lowercase_word) {
None => self.opportunities_within(lowercase_word, mins),
Some(known) => known,
},
}
}
fn opportunities_within(&'h self,
lowercase_word : &str,
bounds : (usize, usize))
-> Vec<Self::Opportunity>;
fn exception(&'h self, lowercase_word : &str) -> Option<Vec<Self::Opportunity>> {
self.boundaries(lowercase_word)
.and_then(|mins| self.exception_within(lowercase_word, mins))
}
fn exception_within(&'h self,
lowercase_word : &str,
bounds : (usize, usize))
-> Option<Vec<Self::Opportunity>>;
fn add_exception(&mut self,
lowercase_word : String,
ops : Vec<Self::Exact>)
-> Option<Vec<Self::Exact>>;
fn remove_exception(&mut self, word : &str) -> Option<Vec<Self::Exact>>;
fn unbreakable_chars(&self) -> (usize, usize);
fn boundaries(&self, word : &str) -> Option<(usize, usize)> {
let (l_min, r_min) = self.unbreakable_chars();
let length_min = l_min + r_min;
if word.chars().count() >= length_min {
(word.char_indices().nth(l_min).unwrap().0,
word.char_indices()
.rev()
.nth(r_min.saturating_sub(1))
.unwrap()
.0)
.into()
} else {
None
}
}
}
#[derive(Debug, Clone)]
struct Prepared<'t> {
word : Cow<'t, str>,
shifts : Vec<Shift>,
}
fn prepare(text : &str) -> Prepared<'_> {
let (word, shifts) = refold(text);
Prepared { word, shifts }
}
impl<'h> Hyphenator<'h> for Standard {
type Opportunity = usize;
type Exact = usize;
fn hyphenate<'t>(&'h self, word : &'t str) -> Word<'t, Self::Opportunity> {
let breaks = match soft_hyphen_indices(word) {
Some(ops) => ops,
None => {
let Prepared { ref word,
ref shifts, } = prepare(word);
if !shifts.is_empty() {
self.opportunities(word)
.into_iter()
.map(move |o| realign(o, shifts))
.collect()
} else {
self.opportunities(word)
}
}
};
Word { breaks,
text : word }
}
fn opportunities_within(&'h self, word : &str, (l, r) : (usize, usize)) -> Vec<usize> {
(1 .. word.len()).zip(self.score(word))
.filter(|&(i, v)| {
let valid = Self::denotes_opportunity(v);
let within_bounds = i >= l && i <= r;
let legal_index = word.is_char_boundary(i);
valid && within_bounds && legal_index
})
.map(|(i, _)| i)
.collect()
}
#[inline]
fn exception_within(&'h self,
w : &str,
(l, r) : (usize, usize))
-> Option<Vec<Self::Opportunity>> {
self.exceptions
.0
.get(w)
.cloned()
.map(|v| v.into_iter().filter(|&i| i >= l && i <= r).collect())
}
#[inline]
fn add_exception(&mut self, w : String, ops : Vec<usize>) -> Option<Vec<usize>> {
self.exceptions.0.insert(w, ops)
}
fn remove_exception(&mut self, w : &str) -> Option<Vec<usize>> { self.exceptions.0.remove(w) }
#[inline]
fn unbreakable_chars(&self) -> (usize, usize) { self.minima }
}
impl<'h> Hyphenator<'h> for Extended {
type Opportunity = (usize, Option<&'h Subregion>);
type Exact = (usize, Option<Subregion>);
fn hyphenate<'t>(&'h self, word : &'t str) -> Word<'t, Self::Opportunity> {
let breaks = match soft_hyphen_indices(word) {
Some(ops) => ops.into_iter().map(|i| (i, None)).collect(),
None => {
let Prepared { ref word,
ref shifts, } = prepare(word);
if !shifts.is_empty() {
self.opportunities(word)
.into_iter()
.map(move |(i, subr)| (realign(i, shifts), subr))
.collect()
} else {
self.opportunities(word)
}
}
};
Word { breaks,
text : word }
}
fn opportunities_within(&'h self,
word : &str,
(l, r) : (usize, usize))
-> Vec<Self::Opportunity> {
(1 .. word.len()).zip(self.score(word))
.filter(|&(i, v)| {
let valid = Self::denotes_opportunity(v);
let within_bounds = i >= l && i <= r;
let legal_index = word.is_char_boundary(i);
valid && within_bounds && legal_index
})
.map(|(i, (_, subr))| (i, subr))
.collect()
}
#[inline]
fn exception_within(&'h self,
w : &str,
(l, r) : (usize, usize))
-> Option<Vec<Self::Opportunity>> {
self.exceptions.0.get(w).map(|v| {
v.iter()
.filter(|&(i, _)| *i >= l && *i <= r)
.map(|&(i, ref sub)| (i, sub.as_ref()))
.collect()
})
}
fn add_exception(&mut self, w : String, ops : Vec<Self::Exact>) -> Option<Vec<Self::Exact>> {
self.exceptions.0.insert(w, ops)
}
fn remove_exception(&mut self, w : &str) -> Option<Vec<Self::Exact>> {
self.exceptions.0.remove(w)
}
#[inline]
fn unbreakable_chars(&self) -> (usize, usize) { self.minima }
}