contractions/lib.rs
1//! [`contractions`](https://docs.rs/contractions) is a library to handle contractions
2//! So far only data sets to expand contractions are implemented.
3//!
4//! Expands "I’m" to "I am" etc.
5//! The default data set has a replacement for all-lowercase, all-uppercase and first letter
6//! uppercase.
7//!
8//! ## Example
9//!
10//! ```rust
11//! let contractions = contractions::Contractions::default();
12//! assert_eq!("I am sure you would have been fine.", contractions.apply("I’m sure you’d’ve been fine."));
13//! assert_eq!("Are you sure?", contractions.apply("R u sure?"));
14//! ```
15//!
16//! ```rust
17//! let mut contractions = Contractions::new();
18//! contractions.add_from_json(contractions::SINGLE_CONTRACTIONS_JSON);
19//! assert_eq!("I am sad you couldn’t’ve come.", contractions.apply("I’m sad you couldn’t’ve come."));
20//! ```
21
22#![deny(clippy::all)]
23#![deny(clippy::pedantic)]
24#![deny(clippy::nursery)]
25#![deny(clippy::cargo)]
26#![warn(missing_docs)]
27
28#[macro_use]
29extern crate log;
30
31use linked_hash_map::LinkedHashMap;
32use regex::Regex;
33
34mod regex_wrapper;
35use regex_wrapper::RegexWrapper;
36use serde::{
37 Deserialize,
38 Serialize,
39};
40
41/// Contains slang terms which will be expanded/changed to their full form
42pub const EXPAND_SLANG_JSON :&str = include_str!("../data/expand/slang.json");
43/// Contains contractions with one apostroph in json form (eg: I'm, I've, 'twas)
44pub const EXPAND_SINGLE_CONTRACTIONS_JSON :&str =
45 include_str!("../data/expand/single_contractions.json");
46/// Contains contractions with two apostroph in json form (eg: Who'll've, Wouldn't've, Mustn't've)
47pub const EXPAND_DOUBLE_CONTRACTIONS_JSON :&str =
48 include_str!("../data/expand/double_contractions.json");
49/// Contains contractions with three apostroph in json form (Y'all'd've, 'twou'dn't)
50pub const EXPAND_TRIPPLE_CONTRACTIONS_JSON :&str =
51 include_str!("../data/expand/tripple_contractions.json");
52/// Contains most of `CONTRACTIONS_SINGLE_JSON` contractions but without apostroph
53pub const EXPAND_SINGLE_NO_APOSTROPHE_CONTRACTIONS_JSON :&str =
54 include_str!("../data/expand/single_no_apostroph_contractions.json");
55/// Contains most of `CONTRACTIONS_DOUBLE_JSON` contractions but without apostroph
56pub const EXPAND_DOUBLE_NO_APOSTROPHE_CONTRACTIONS_JSON :&str =
57 include_str!("../data/expand/double_no_apostroph_contractions.json");
58/// Contains partial contractions in json form. (eg: 'm, 've, n't, 're)
59pub const EXPAND_PARTIAL_CONTRACTIONS_JSON :&str =
60 include_str!("../data/expand/partial_contractions.json");
61
62/// The list of all json strings.
63///
64/// The order is preserved and will be processed from top to bottom.
65pub const CONTRACTIONS_JSON_ORDER :&[&str] = &[
66 EXPAND_SLANG_JSON,
67 EXPAND_DOUBLE_NO_APOSTROPHE_CONTRACTIONS_JSON,
68 EXPAND_SINGLE_NO_APOSTROPHE_CONTRACTIONS_JSON,
69 EXPAND_TRIPPLE_CONTRACTIONS_JSON,
70 EXPAND_DOUBLE_CONTRACTIONS_JSON,
71 EXPAND_SINGLE_CONTRACTIONS_JSON,
72];
73
74/// [`Contraction`](struct.Contraction.html) holds search term and the replacement-pairs
75#[derive(Debug, Serialize, Deserialize)]
76struct Contraction {
77 #[serde(with = "serde_regex")]
78 find :Regex,
79 replace :LinkedHashMap<RegexWrapper, String>,
80}
81
82impl Contraction {
83 fn is_match(&self, text :&str) -> bool {
84 if self.find.is_match(text) {
85 debug!(
86 "Found match - Pattern: \"{}\" - Text: \"{}\"",
87 &self.find, &text
88 );
89 true
90 } else {
91 false
92 }
93 }
94
95 fn replace_all(&self, text :&mut String) {
96 debug!("Replace all - Pattern: \"{}\"", &self.find);
97 for (search, replace) in self.replace.iter() {
98 *text = search.0.replace_all(text, replace).into_owned();
99 }
100 }
101}
102
103/// Main actor in the [`contractions`](https://docs.rs/contractions) crate
104///
105/// Stores [`Contractions`](struct.Contractions.html) in a [`Vec`](https://doc.rust-lang.org/std/vec/struct.Vec.html)
106///
107/// # Example
108/// ```
109/// let contractions = contractions::Contractions::default();
110/// assert_eq!("I am sure you would have been fine.", contractions.apply("I’m sure you’d’ve been fine."));
111/// assert_eq!("Are you sure?", contractions.apply("R u sure?"));
112/// ```
113#[derive(Debug)]
114pub struct Contractions {
115 contractions :Vec<Contraction>,
116}
117
118impl Default for Contractions {
119 /// Returns the built in configuration for [`Contractions`](struct.Contractions.html)
120 ///
121 /// # Example
122 /// ```
123 /// use contractions::Contractions;
124 /// let contractions = Contractions::new();
125 /// ```
126 /// # Panics
127 /// Only panics when the library internal configuration is faulty
128 /// this ought to only happen during development
129 fn default() -> Self {
130 Self::from_json(CONTRACTIONS_JSON_ORDER).unwrap()
131 }
132}
133
134impl Contractions {
135 /// Creates empty [`Contractions`](struct.Contractions.html)
136 ///
137 /// # Example
138 /// ```
139 /// use contractions::{self, Contractions};
140 /// let contractions = Contractions::new();
141 /// ```
142 #[must_use]
143 pub const fn new() -> Self {
144 Self {
145 contractions :vec![],
146 }
147 }
148
149 /// Deserialize `Contraction` from json
150 ///
151 /// Convenience method that chains [`Contractions::new()`](struct.Contractions.html#method.new)
152 /// and [`Contractions::add_from_json()`](struct.Contractions.html#method.add_from_json)
153 ///
154 /// # Example
155 /// ```
156 /// use contractions::{self, Contractions};
157 /// let contractions = Contractions::from_json(&[contractions::SINGLE_CONTRACTIONS_JSON, contractions::SINGLE_NO_APOSTROPHE_CONTRACTIONS_JSON]);
158 /// ```
159 /// # Errors
160 /// Returns an Error if deserialization fails
161 pub fn from_json(contractions_as_str :&[&str]) -> Result<Self, Box<dyn std::error::Error>> {
162 let mut contractions = Self::new();
163 for s in contractions_as_str {
164 contractions.add_from_json(s)?;
165 }
166 Ok(contractions)
167 }
168
169 /// Add `Contraction`s from a json string to an existing
170 /// [`Contractions`](struct.Contractions.html) struct
171 ///
172 /// # Example
173 /// ```
174 /// use contractions::{self, Contractions};
175 /// let mut contractions = Contractions::new();
176 /// contractions.add_from_json(contractions::SINGLE_CONTRACTIONS_JSON);
177 /// ```
178 ///
179 /// # Errors
180 /// Returns an Error if deserialization fails
181 pub fn add_from_json(
182 &mut self,
183 contractions_as_str :&str,
184 ) -> Result<(), Box<dyn std::error::Error>> {
185 let mut contr_part :Vec<Contraction> = serde_json::from_str(contractions_as_str)?;
186 debug!("Added contractions from json.\n{:#?}\n", contr_part);
187 self.contractions.append(&mut contr_part);
188 Ok(())
189 }
190
191 /// Remove a `Contraction` from [`Contractions`](struct.Contractions.html)
192 ///
193 /// Provide the exact `find` key to delete the corresponding `Contraction`
194 ///
195 /// # Example
196 /// ```
197 /// use contractions::{self, Contractions};
198 /// let mut contractions = Contractions::new();
199 /// assert_eq!("I’m happy", contractions.apply("I’m happy"));
200 /// contractions.add_from_json(contractions::SINGLE_CONTRACTIONS_JSON);
201 /// assert_eq!("I am happy", contractions.apply("I’m happy"));
202 /// contractions.remove("\\b(?i)i['’`]m(?-i)\\b");
203 /// assert_eq!("I’m happy", contractions.apply("I’m happy"));
204 /// ```
205 pub fn remove(&mut self, key :&str) {
206 self.contractions.retain(|c| c.find.as_str() != key);
207 }
208
209 /// Add a contraction to [`Contractions`](struct.Contractions.html)
210 ///
211 /// # Example
212 /// ```
213 /// use contractions::{self, Contractions};
214 /// let mut contractions = Contractions::new();
215 /// assert_eq!("I’m happy", contractions.apply("I’m happy"));
216 /// let find = r#"\b(?i)i['’`]m(?-i)\b"#;
217 /// let mut replace = linked_hash_map::LinkedHashMap::new();
218 /// replace.insert(r#"\bi['’`]m\b"#, "i am");
219 /// replace.insert(r#"\bI['’`]m\b"#, "I am");
220 /// replace.insert(r#"\bI['’`]M\b"#, "I AM");
221 /// contractions.add(find, replace);
222 /// assert_eq!("I am happy", contractions.apply("I’m happy"));
223 /// ```
224 ///
225 /// # Errors
226 /// Returns an Error if `find` or the key in the `replace`
227 /// cannot be successfully turned into a Regex
228 pub fn add(
229 &mut self,
230 find :&str,
231 replace :LinkedHashMap<&str, &str>,
232 ) -> Result<(), Box<dyn std::error::Error>> {
233 let find = Regex::new(find)?;
234 let in_replace = replace;
235 let mut replace :LinkedHashMap<RegexWrapper, String> = LinkedHashMap::new();
236 for (f, r) in in_replace {
237 replace.insert(RegexWrapper(Regex::new(f)?), r.to_string());
238 }
239
240 let contraction = Contraction { find, replace };
241 self.contractions.push(contraction);
242 Ok(())
243 }
244
245 /// Replace contractions with their long form
246 ///
247 /// # Example
248 /// ```
249 /// use contractions::Contractions;
250 /// let contractions = Contractions::default();
251 /// assert_eq!("I am your brother’s son", contractions.apply("I’m your brother’s son"));
252 /// ```
253 #[must_use]
254 pub fn apply(&self, input :&str) -> String {
255 let mut output = input.to_string();
256 for contraction in &self.contractions {
257 if contraction.is_match(&output) {
258 contraction.replace_all(&mut output);
259 }
260 }
261 output
262 }
263}