pyphen_rs/pyphen/mod.rs
1// This file is part of pyphen-rs
2//
3// Copyright 2008 - Wilbert Berendsen <info@wilbertberendsen.nl>
4// Copyright 2012-2013 - Guillaume Ayoub <guillaume.ayoub@kozea.fr>
5// Copyright 2019 - Naresh Ganduri <gandurinaresh@gmail.com>
6//
7// This library is free software. It is released under the
8// GPL 2.0+/LGPL 2.1+/MPL 1.1 tri-license. See COPYING.GPL, COPYING.LGPL and
9// COPYING.MPL for more details.
10//
11// This library is distributed in the hope that it will be useful, but WITHOUT
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14// details.
15
16use std::borrow::Cow;
17use std::rc::Rc;
18
19use super::{DataInt, HyphDict};
20use iter::Iter;
21
22pub mod builder;
23pub mod iter;
24
25/// Hyphenation class, with methods to hyphenate strings in various ways.
26pub struct Pyphen {
27 left: usize,
28 right: usize,
29 hd: Rc<HyphDict>,
30}
31
32impl Pyphen {
33 /// Get a list of positions where the word can be hyphenated.
34 /// The points that are too far to the left or right are removed.
35 ///
36 /// - *word* - unicode string of the word to hyphenate
37 pub fn positions(&self, word: &str) -> Vec<DataInt> {
38 let right = word.len() - self.right;
39 self.hd
40 .positions(word)
41 .iter()
42 .cloned()
43 .filter(|i| i.value >= self.left && i.value <= right)
44 .collect()
45 }
46
47 /// Iterate over all hyphenation possibilities, the longest first.
48 ///
49 /// - *word* - unicode string of the word to hyphenate
50 pub fn iterate<'b>(&self, word: &'b str) -> Iter<'b> {
51 Iter {
52 iter: self.positions(word).into_iter().rev(),
53 word,
54 is_upper: word == word.to_uppercase(),
55 }
56 }
57
58 /// Get the longest possible first part and the last part of a word.
59 ///
60 /// The first part has the hyphen already attached.
61 ///
62 /// Returns ``None`` if there is no hyphenation point before ``width``, or
63 /// if the word could not be hyphenated.
64 ///
65 /// - *word* - unicode string of the word to hyphenate
66 /// - *width* - maximum length of the first part
67 /// - *hyphen* - unicode string used as hyphen character
68 pub fn wrap_with<'b>(
69 &self,
70 word: &'b str,
71 mut width: usize,
72 hyphen: &str,
73 ) -> Option<(String, Cow<'b, str>)> {
74 width -= hyphen.len();
75 for (w1, w2) in self.iterate(word) {
76 if w1.len() <= width {
77 let w1 = w1.into_owned();
78 return Some((w1 + hyphen, w2));
79 }
80 }
81
82 None
83 }
84
85 /// Get the longest possible first part and the last part of a word.
86 ///
87 /// The first part has the hyphen already attached.
88 ///
89 /// Returns ``None`` if there is no hyphenation point before ``width``, or
90 /// if the word could not be hyphenated.
91 ///
92 /// - *word* - unicode string of the word to hyphenate
93 /// - *width* - maximum length of the first part
94 pub fn wrap<'b>(&self, word: &'b str, width: usize) -> Option<(String, Cow<'b, str>)> {
95 self.wrap_with(word, width, "-")
96 }
97
98 /// Get the word as a string with all the possible hyphens inserted.
99 ///
100 /// - *word* - unicode string of the word to hyphenate
101 /// - *hyphen* - unicode string used as hyphen character
102 ///
103 /// # Example
104 /// ```
105 /// use pyphen_rs::Builder;
106 ///
107 /// let dic = Builder::lang("nl_NL").build().unwrap();
108 ///
109 /// assert_eq!(dic.inserted_with("lettergrepen", "."), "let.ter.gre.pen");
110 /// ```
111 pub fn inserted_with(&self, word: &str, hyphen: &str) -> String {
112 let mut word_list: Vec<_> = word.chars().collect();
113 let is_upper = word == word.to_uppercase();
114
115 for position in self.positions(word).into_iter().rev() {
116 if let Some(data) = position.data {
117 // get the nonstandard hyphenation data
118 let (change, mut index, cut) = data;
119 let change = if is_upper {
120 change.to_uppercase()
121 } else {
122 change.to_string()
123 };
124 index += position.value as isize;
125
126 let index = if index < 0 {
127 word_list.len() - index as usize
128 } else {
129 index as usize
130 };
131
132 word_list.splice(index..(index + cut), change.replace('=', hyphen).chars());
133 } else {
134 word_list.splice(position.value..position.value, hyphen.chars());
135 }
136 }
137
138 word_list.into_iter().collect()
139 }
140
141 /// Get the word as a string with all the possible hyphens inserted.
142 ///
143 /// - *word* - unicode string of the word to hyphenate
144 ///
145 /// # Example
146 /// ```
147 /// use pyphen_rs::Builder;
148 ///
149 /// let dic = Builder::lang("nl_NL").build().unwrap();
150 ///
151 /// assert_eq!(dic.inserted("lettergrepen"), "let-ter-gre-pen");
152 /// ```
153 pub fn inserted(&self, word: &str) -> String {
154 self.inserted_with(word, "-")
155 }
156}