use std::borrow::Cow;
use std::default::Default;
use common::{NB_CHAR, NB_CHAR_NARROW, NB_CHAR_EM};
use common::is_whitespace;
use clean;
use escape;
#[derive(Debug)]
pub struct FrenchFormatter {
threshold_currency: usize,
threshold_unit: usize,
threshold_quote: usize,
threshold_real_word: usize,
typographic_quotes: bool,
typographic_ellipsis: bool,
ligature_dashes: bool,
ligature_guillemets: bool,
}
impl Default for FrenchFormatter {
fn default() -> Self {
FrenchFormatter {
threshold_currency: 3,
threshold_unit: 2,
threshold_quote: 20,
threshold_real_word: 3,
typographic_quotes: true,
typographic_ellipsis: true,
ligature_dashes: false,
ligature_guillemets: false,
}
}
}
impl FrenchFormatter {
pub fn new() -> Self {
Self::default()
}
pub fn threshold_currency(&mut self, t: usize) -> &mut Self {
self.threshold_currency = t;
self
}
pub fn threshold_unit(&mut self, t: usize) -> &mut Self {
self.threshold_unit = t;
self
}
pub fn threshold_quote(&mut self, t: usize) -> &mut Self {
self.threshold_quote = t;
self
}
pub fn threshold_real_word(&mut self, t: usize) -> &mut Self {
self.threshold_real_word = t;
self
}
pub fn typographic_quotes(&mut self, b: bool) -> &mut Self {
self.typographic_quotes = b;
self
}
pub fn typographic_ellipsis(&mut self, b: bool) -> &mut Self {
self.typographic_ellipsis = b;
self
}
pub fn ligature_dashes(&mut self, b: bool) -> &mut Self {
self.ligature_dashes = b;
self
}
pub fn ligature_guillemets(&mut self, b: bool) -> &mut Self {
self.ligature_guillemets = b;
self
}
pub fn format<'a, S: Into<Cow<'a, str>>>(&self, input: S) -> Cow<'a, str> {
let mut input = clean::whitespaces(input);
if self.ligature_dashes {
input = clean::dashes(input);
}
if self.ligature_guillemets {
input = clean::guillemets(input);
}
if self.typographic_quotes {
input = clean::quotes(input);
}
if self.typographic_ellipsis {
input = clean::ellipsis(input);
}
let first = input.chars().position(is_trouble);
let first_number = input.chars().position(|c| c.is_digit(10));
if first.is_none() && first_number.is_none() {
return input;
}
let (nb_char, nb_char_em, nb_char_narrow) = (NB_CHAR, NB_CHAR_EM, NB_CHAR_NARROW);
let mut chars = input.chars().collect::<Vec<_>>();
let mut is_number_series = false;
if let Some(first) = first_number {
let first = if first > 1 { first - 1 } else { 0 };
for i in first..(chars.len() - 1) {
let current = chars[i];
let next = chars[i + 1];
match current {
'0'..='9' => {
if i == 0 || !chars[i - 1].is_alphabetic() {
is_number_series = true;
}
}
c if c.is_whitespace() => {
if is_number_series &&
(next.is_digit(10) || self.char_is_symbol(&chars, i + 1)) {
chars[i] = nb_char_narrow;
}
}
_ => {
is_number_series = false;
}
}
}
}
if let Some(first) = first {
let first = if first > 1 { first - 1 } else { 0 };
for i in first..(chars.len() - 1) {
let current = chars[i];
let next = chars[i + 1];
if is_whitespace(current) {
match next {
'?' | '!' | ';' => chars[i] = nb_char_narrow,
':' => chars[i] = nb_char,
'»' => {
if current == ' ' {
chars[i] = nb_char;
}
}
_ => (),
}
} else {
match current {
'—' | '«' | '-' | '–' => {
if is_whitespace(next) {
let replacing_char = match current {
'—' | '-' | '–' => {
if i <= 1 {
nb_char_em
} else if chars[i - 1] == nb_char {
' '
} else {
if let Some(closing) =
self.find_closing_dash(&chars, i + 1) {
chars[closing] = nb_char;
}
nb_char
}
}
'«' => {
let j = find_next(&chars, '»', i);
if let Some(j) = j {
if chars[j - 1].is_whitespace() {
if i <= 1 ||
j - i > self.threshold_quote {
chars[j - 1] = nb_char;
nb_char
} else {
chars[j - 1] = nb_char_narrow;
nb_char_narrow
}
} else {
nb_char
}
} else {
nb_char
}
},
_ => unreachable!(),
};
chars[i + 1] = replacing_char;
}
}
_ => (),
}
}
}
}
Cow::Owned(chars.into_iter().collect())
}
pub fn format_tex<'a, S: Into<Cow<'a, str>>>(&self, input: S) -> Cow<'a, str> {
escape::nb_spaces_tex(escape::tex(self.format(input)))
}
pub fn format_html<'a, S: Into<Cow<'a, str>>>(&self, input: S) -> Cow<'a, str> {
escape::nb_spaces_html(escape::html(self.format(input)))
}
fn char_is_symbol(&self, v: &[char], i: usize) -> bool {
let is_next_letter = if i < v.len() - 1 {
v[i + 1].is_alphabetic()
} else {
false
};
if is_next_letter {
match v[i] {
'°' => true,
c if c.is_uppercase() => {
let word = get_next_word(v, i);
if word.len() > self.threshold_currency {
false
} else {
word.iter().all(|c| c.is_uppercase())
}
}
c if c.is_alphabetic() => {
let word = get_next_word(v, i);
word.len() <= self.threshold_unit
}
_ => false,
}
} else {
match v[i] {
c if (!c.is_alphabetic() && !c.is_whitespace()) => true,
c if c.is_uppercase() => true,
_ => false,
}
}
}
fn find_closing_dash(&self, v: &[char], n: usize) -> Option<usize> {
let mut word = String::new();
for j in n..v.len() {
match v[j] {
'!' | '?' => {
if is_next_char_uppercase(v, j + 1) {
return None;
}
}
'-' | '–' | '—' => {
if v[j - 1].is_whitespace() {
return Some(j - 1);
}
}
'.' => {
if !is_next_char_uppercase(v, j + 1) {
continue;
} else if let Some(c) = word.chars().next() {
if !c.is_uppercase() || word.len() > self.threshold_real_word {
return None;
}
}
}
c if c.is_whitespace() => word = String::new(),
c => word.push(c),
}
}
None
}
}
fn is_trouble(c: char) -> bool {
match c {
'?' | '!' | ';' | ':' | '»' | '«' | '—' | '–' => true,
_ => false,
}
}
fn find_next(v: &[char], c: char, n: usize) -> Option<usize> {
for (i, car) in v.iter()
.enumerate()
.skip(n) {
if *car == c {
return Some(i);
}
}
None
}
fn is_next_char_uppercase(v: &[char], n: usize) -> bool {
for i in n..v.len() {
if v[i].is_whitespace() {
continue;
}
if v[i].is_uppercase() {
return true;
}
if v[i].is_lowercase() {
return false;
}
}
false
}
fn get_next_word(v: &[char], n: usize) -> &[char] {
let mut beginning = n;
let mut end = v.len();
for (i, car) in v.iter()
.enumerate()
.skip(n) {
if car.is_alphabetic() {
beginning = i;
break;
}
}
for (i, car) in v.iter()
.enumerate()
.skip(beginning) {
if car.is_whitespace() {
end = i - 1;
break;
}
}
&v[beginning..end]
}
#[cfg(test)]
#[test]
fn french() {
let s = " « Comment allez-vous ? » demanda-t-elle à son \
interlocutrice qui lui répondit \
: « Mais très bien ma chère ! »";
let res = FrenchFormatter::new().format(s);
assert_eq!(&res,
" « Comment allez-vous ? » demanda-t-elle à son \
interlocutrice qui lui répondit : \
« Mais très bien ma chère ! »");
}
#[test]
fn french_quotes_1() {
let s = "« Un test »";
let res = FrenchFormatter::new().format_tex(s);
assert_eq!(&res, "«~Un test~»");
}
#[test]
fn french_quotes_2() {
let s = "« Un test";
let res = FrenchFormatter::new().format_tex(s);
assert_eq!(&res, "«~Un test");
}
#[test]
fn french_quotes_3() {
let s = "Un test »";
let res = FrenchFormatter::new().format_tex(s);
assert_eq!(&res, "Un test~»");
}
#[test]
fn french_quotes_4() {
let s = "test « court »";
let res = FrenchFormatter::new().format(s);
assert_eq!(&res, "test « court »");
}
#[test]
fn french_quotes_5() {
let s = "test « beaucoup, beaucoup plus long »";
let res = FrenchFormatter::new().format(s);
assert_eq!(&res, "test « beaucoup, beaucoup plus long »");
}
#[test]
fn french_dashes_1() {
let s = "Il faudrait gérer ces tirets – sans ça certains textes rendent mal – un jour ou \
l'autre";
let res = FrenchFormatter::new().format_tex(s);
assert_eq!(&res,
"Il faudrait gérer ces tirets –~sans ça certains textes \
rendent mal~– un jour ou l’autre");
}
#[test]
fn french_dashes_2() {
let s = "Il faudrait gérer ces tirets – sans ça certains textes rendent mal. Mais ce n'est \
pas si simple – si ?";
let res = FrenchFormatter::new().format_tex(s);
assert_eq!(&res,
"Il faudrait gérer ces tirets –~sans ça certains textes rendent mal. Mais ce \
n’est pas si simple –~si\\,?");
}
#[test]
fn french_numbers() {
let french = FrenchFormatter::new();
let s = Cow::Borrowed("10 000");
let res = french.format_tex(s);
assert_eq!(&res, "10\\,000");
let s = Cow::Borrowed("10 000 €");
let res = french.format_tex(s);
assert_eq!(&res, "10\\,000\\,€");
let s = Cow::Borrowed("10 000 euros");
let res = french.format_tex(s);
assert_eq!(&res, "10\\,000 euros");
let s = Cow::Borrowed("10 000 EUR");
let res = french.format_tex(s);
assert_eq!(&res, "10\\,000\\,EUR");
let s = Cow::Borrowed("50 km");
let res = french.format_tex(s);
assert_eq!(&res, "50\\,km");
let s = Cow::Borrowed("50 %");
let res = french.format_tex(s);
assert_eq!(&res, "50\\,\\%");
let s = Cow::Borrowed("20 °C");
let res = french.format_tex(s);
assert_eq!(&res, "20\\,°C");
let s = Cow::Borrowed("20 F");
let res = french.format_tex(s);
assert_eq!(&res, "20\\,F");
let s = Cow::Borrowed("20 BALLES");
let res = french.format_tex(s);
assert_eq!(&res, "20 BALLES");
}