pyphen_rs/pyphen/
mod.rs

1// This file is part of pyphen-rs
2//
3// Copyright 2008 - Wilbert Berendsen <info@wilbertberendsen.nl>
4// Copyright 2012-2013 - Guillaume Ayoub <guillaume.ayoub@kozea.fr>
5// Copyright 2019 - Naresh Ganduri <gandurinaresh@gmail.com>
6//
7// This library is free software.  It is released under the
8// GPL 2.0+/LGPL 2.1+/MPL 1.1 tri-license.  See COPYING.GPL, COPYING.LGPL and
9// COPYING.MPL for more details.
10//
11// This library is distributed in the hope that it will be useful, but WITHOUT
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
14// details.
15
16use std::borrow::Cow;
17use std::rc::Rc;
18
19use super::{DataInt, HyphDict};
20use iter::Iter;
21
22pub mod builder;
23pub mod iter;
24
25/// Hyphenation class, with methods to hyphenate strings in various ways.
26pub struct Pyphen {
27    left: usize,
28    right: usize,
29    hd: Rc<HyphDict>,
30}
31
32impl Pyphen {
33    /// Get a list of positions where the word can be hyphenated.
34    /// The points that are too far to the left or right are removed.
35    ///
36    /// - *word* - unicode string of the word to hyphenate
37    pub fn positions(&self, word: &str) -> Vec<DataInt> {
38        let right = word.len() - self.right;
39        self.hd
40            .positions(word)
41            .iter()
42            .cloned()
43            .filter(|i| i.value >= self.left && i.value <= right)
44            .collect()
45    }
46
47    /// Iterate over all hyphenation possibilities, the longest first.
48    ///
49    /// - *word* - unicode string of the word to hyphenate
50    pub fn iterate<'b>(&self, word: &'b str) -> Iter<'b> {
51        Iter {
52            iter: self.positions(word).into_iter().rev(),
53            word,
54            is_upper: word == word.to_uppercase(),
55        }
56    }
57
58    /// Get the longest possible first part and the last part of a word.
59    ///
60    /// The first part has the hyphen already attached.
61    ///
62    /// Returns ``None`` if there is no hyphenation point before ``width``, or
63    /// if the word could not be hyphenated.
64    ///
65    /// - *word* - unicode string of the word to hyphenate
66    /// - *width* - maximum length of the first part
67    /// - *hyphen* - unicode string used as hyphen character
68    pub fn wrap_with<'b>(
69        &self,
70        word: &'b str,
71        mut width: usize,
72        hyphen: &str,
73    ) -> Option<(String, Cow<'b, str>)> {
74        width -= hyphen.len();
75        for (w1, w2) in self.iterate(word) {
76            if w1.len() <= width {
77                let w1 = w1.into_owned();
78                return Some((w1 + hyphen, w2));
79            }
80        }
81
82        None
83    }
84
85    /// Get the longest possible first part and the last part of a word.
86    ///
87    /// The first part has the hyphen already attached.
88    ///
89    /// Returns ``None`` if there is no hyphenation point before ``width``, or
90    /// if the word could not be hyphenated.
91    ///
92    /// - *word* - unicode string of the word to hyphenate
93    /// - *width* - maximum length of the first part
94    pub fn wrap<'b>(&self, word: &'b str, width: usize) -> Option<(String, Cow<'b, str>)> {
95        self.wrap_with(word, width, "-")
96    }
97
98    /// Get the word as a string with all the possible hyphens inserted.
99    ///
100    /// - *word* - unicode string of the word to hyphenate
101    /// - *hyphen* - unicode string used as hyphen character
102    ///
103    /// # Example
104    /// ```
105    /// use pyphen_rs::Builder;
106    ///
107    /// let dic = Builder::lang("nl_NL").build().unwrap();
108    ///
109    /// assert_eq!(dic.inserted_with("lettergrepen", "."), "let.ter.gre.pen");
110    /// ```
111    pub fn inserted_with(&self, word: &str, hyphen: &str) -> String {
112        let mut word_list: Vec<_> = word.chars().collect();
113        let is_upper = word == word.to_uppercase();
114
115        for position in self.positions(word).into_iter().rev() {
116            if let Some(data) = position.data {
117                // get the nonstandard hyphenation data
118                let (change, mut index, cut) = data;
119                let change = if is_upper {
120                    change.to_uppercase()
121                } else {
122                    change.to_string()
123                };
124                index += position.value as isize;
125
126                let index = if index < 0 {
127                    word_list.len() - index as usize
128                } else {
129                    index as usize
130                };
131
132                word_list.splice(index..(index + cut), change.replace('=', hyphen).chars());
133            } else {
134                word_list.splice(position.value..position.value, hyphen.chars());
135            }
136        }
137
138        word_list.into_iter().collect()
139    }
140
141    /// Get the word as a string with all the possible hyphens inserted.
142    ///
143    /// - *word* - unicode string of the word to hyphenate
144    ///
145    /// # Example
146    /// ```
147    /// use pyphen_rs::Builder;
148    ///
149    /// let dic = Builder::lang("nl_NL").build().unwrap();
150    ///
151    /// assert_eq!(dic.inserted("lettergrepen"), "let-ter-gre-pen");
152    /// ```
153    pub fn inserted(&self, word: &str) -> String {
154        self.inserted_with(word, "-")
155    }
156}