contractions/
lib.rs

1//! [`contractions`](https://docs.rs/contractions) is a library to handle contractions
2//! So far only data sets to expand contractions are implemented.
3//!
4//! Expands "I’m" to "I am" etc.
5//! The default data set has a replacement for all-lowercase, all-uppercase and first letter
6//! uppercase.
7//!
8//! ## Example
9//!
10//! ```rust
11//! let contractions = contractions::Contractions::default();
12//! assert_eq!("I am sure you would have been fine.", contractions.apply("I’m sure you’d’ve been fine."));
13//! assert_eq!("Are you sure?", contractions.apply("R u sure?"));
14//! ```
15//!
16//! ```rust
17//! let mut contractions = Contractions::new();
18//! contractions.add_from_json(contractions::SINGLE_CONTRACTIONS_JSON);
19//! assert_eq!("I am sad you couldn’t’ve come.", contractions.apply("I’m sad you couldn’t’ve come."));
20//! ```
21
22#![deny(clippy::all)]
23#![deny(clippy::pedantic)]
24#![deny(clippy::nursery)]
25#![deny(clippy::cargo)]
26#![warn(missing_docs)]
27
28#[macro_use]
29extern crate log;
30
31use linked_hash_map::LinkedHashMap;
32use regex::Regex;
33
34mod regex_wrapper;
35use regex_wrapper::RegexWrapper;
36use serde::{
37    Deserialize,
38    Serialize,
39};
40
41/// Contains slang terms which will be expanded/changed to their full form
42pub const EXPAND_SLANG_JSON :&str = include_str!("../data/expand/slang.json");
43/// Contains contractions with one apostroph in json form (eg: I'm, I've, 'twas)
44pub const EXPAND_SINGLE_CONTRACTIONS_JSON :&str =
45    include_str!("../data/expand/single_contractions.json");
46/// Contains contractions with two apostroph in json form (eg: Who'll've, Wouldn't've, Mustn't've)
47pub const EXPAND_DOUBLE_CONTRACTIONS_JSON :&str =
48    include_str!("../data/expand/double_contractions.json");
49/// Contains contractions with three apostroph in json form (Y'all'd've, 'twou'dn't)
50pub const EXPAND_TRIPPLE_CONTRACTIONS_JSON :&str =
51    include_str!("../data/expand/tripple_contractions.json");
52/// Contains most of `CONTRACTIONS_SINGLE_JSON` contractions but without apostroph
53pub const EXPAND_SINGLE_NO_APOSTROPHE_CONTRACTIONS_JSON :&str =
54    include_str!("../data/expand/single_no_apostroph_contractions.json");
55/// Contains most of `CONTRACTIONS_DOUBLE_JSON` contractions but without apostroph
56pub const EXPAND_DOUBLE_NO_APOSTROPHE_CONTRACTIONS_JSON :&str =
57    include_str!("../data/expand/double_no_apostroph_contractions.json");
58/// Contains partial contractions in json form. (eg: 'm, 've, n't, 're)
59pub const EXPAND_PARTIAL_CONTRACTIONS_JSON :&str =
60    include_str!("../data/expand/partial_contractions.json");
61
62/// The list of all json strings.
63///
64/// The order is preserved and will be processed from top to bottom.
65pub const CONTRACTIONS_JSON_ORDER :&[&str] = &[
66    EXPAND_SLANG_JSON,
67    EXPAND_DOUBLE_NO_APOSTROPHE_CONTRACTIONS_JSON,
68    EXPAND_SINGLE_NO_APOSTROPHE_CONTRACTIONS_JSON,
69    EXPAND_TRIPPLE_CONTRACTIONS_JSON,
70    EXPAND_DOUBLE_CONTRACTIONS_JSON,
71    EXPAND_SINGLE_CONTRACTIONS_JSON,
72];
73
74/// [`Contraction`](struct.Contraction.html) holds search term and the replacement-pairs
75#[derive(Debug, Serialize, Deserialize)]
76struct Contraction {
77    #[serde(with = "serde_regex")]
78    find :Regex,
79    replace :LinkedHashMap<RegexWrapper, String>,
80}
81
82impl Contraction {
83    fn is_match(&self, text :&str) -> bool {
84        if self.find.is_match(text) {
85            debug!(
86                "Found match - Pattern: \"{}\" - Text: \"{}\"",
87                &self.find, &text
88            );
89            true
90        } else {
91            false
92        }
93    }
94
95    fn replace_all(&self, text :&mut String) {
96        debug!("Replace all - Pattern: \"{}\"", &self.find);
97        for (search, replace) in self.replace.iter() {
98            *text = search.0.replace_all(text, replace).into_owned();
99        }
100    }
101}
102
103/// Main actor in the [`contractions`](https://docs.rs/contractions) crate
104///
105/// Stores [`Contractions`](struct.Contractions.html) in a [`Vec`](https://doc.rust-lang.org/std/vec/struct.Vec.html)
106///
107/// # Example
108/// ```
109/// let contractions = contractions::Contractions::default();
110/// assert_eq!("I am sure you would have been fine.", contractions.apply("I’m sure you’d’ve been fine."));
111/// assert_eq!("Are you sure?", contractions.apply("R u sure?"));
112/// ```
113#[derive(Debug)]
114pub struct Contractions {
115    contractions :Vec<Contraction>,
116}
117
118impl Default for Contractions {
119    /// Returns the built in configuration for [`Contractions`](struct.Contractions.html)
120    ///
121    /// # Example
122    /// ```
123    /// use contractions::Contractions;
124    /// let contractions = Contractions::new();
125    /// ```
126    /// # Panics
127    /// Only panics when the library internal configuration is faulty
128    /// this ought to only happen during development
129    fn default() -> Self {
130        Self::from_json(CONTRACTIONS_JSON_ORDER).unwrap()
131    }
132}
133
134impl Contractions {
135    /// Creates empty [`Contractions`](struct.Contractions.html)
136    ///
137    /// # Example
138    /// ```
139    /// use contractions::{self, Contractions};
140    /// let contractions = Contractions::new();
141    /// ```
142    #[must_use]
143    pub const fn new() -> Self {
144        Self {
145            contractions :vec![],
146        }
147    }
148
149    /// Deserialize `Contraction` from json
150    ///
151    /// Convenience method that chains [`Contractions::new()`](struct.Contractions.html#method.new)
152    /// and [`Contractions::add_from_json()`](struct.Contractions.html#method.add_from_json)
153    ///
154    /// # Example
155    /// ```
156    /// use contractions::{self, Contractions};
157    /// let contractions = Contractions::from_json(&[contractions::SINGLE_CONTRACTIONS_JSON, contractions::SINGLE_NO_APOSTROPHE_CONTRACTIONS_JSON]);
158    /// ```
159    /// # Errors
160    /// Returns an Error if deserialization fails
161    pub fn from_json(contractions_as_str :&[&str]) -> Result<Self, Box<dyn std::error::Error>> {
162        let mut contractions = Self::new();
163        for s in contractions_as_str {
164            contractions.add_from_json(s)?;
165        }
166        Ok(contractions)
167    }
168
169    /// Add `Contraction`s from a json string to an existing
170    /// [`Contractions`](struct.Contractions.html) struct
171    ///
172    /// # Example
173    /// ```
174    /// use contractions::{self, Contractions};
175    /// let mut contractions = Contractions::new();
176    /// contractions.add_from_json(contractions::SINGLE_CONTRACTIONS_JSON);
177    /// ```
178    ///
179    /// # Errors
180    /// Returns an Error if deserialization fails
181    pub fn add_from_json(
182        &mut self,
183        contractions_as_str :&str,
184    ) -> Result<(), Box<dyn std::error::Error>> {
185        let mut contr_part :Vec<Contraction> = serde_json::from_str(contractions_as_str)?;
186        debug!("Added contractions from json.\n{:#?}\n", contr_part);
187        self.contractions.append(&mut contr_part);
188        Ok(())
189    }
190
191    /// Remove a `Contraction` from [`Contractions`](struct.Contractions.html)
192    ///
193    /// Provide the exact `find` key to delete the corresponding `Contraction`
194    ///
195    /// # Example
196    /// ```
197    /// use contractions::{self, Contractions};
198    /// let mut contractions = Contractions::new();
199    /// assert_eq!("I’m happy", contractions.apply("I’m happy"));
200    /// contractions.add_from_json(contractions::SINGLE_CONTRACTIONS_JSON);
201    /// assert_eq!("I am happy", contractions.apply("I’m happy"));
202    /// contractions.remove("\\b(?i)i['’`]m(?-i)\\b");
203    /// assert_eq!("I’m happy", contractions.apply("I’m happy"));
204    /// ```
205    pub fn remove(&mut self, key :&str) {
206        self.contractions.retain(|c| c.find.as_str() != key);
207    }
208
209    /// Add a contraction to [`Contractions`](struct.Contractions.html)
210    ///
211    /// # Example
212    /// ```
213    /// use contractions::{self, Contractions};
214    /// let mut contractions = Contractions::new();
215    /// assert_eq!("I’m happy", contractions.apply("I’m happy"));
216    /// let find = r#"\b(?i)i['’`]m(?-i)\b"#;
217    /// let mut replace = linked_hash_map::LinkedHashMap::new();
218    /// replace.insert(r#"\bi['’`]m\b"#, "i am");
219    /// replace.insert(r#"\bI['’`]m\b"#, "I am");
220    /// replace.insert(r#"\bI['’`]M\b"#, "I AM");
221    /// contractions.add(find, replace);
222    /// assert_eq!("I am happy", contractions.apply("I’m happy"));
223    /// ```
224    ///
225    /// # Errors
226    /// Returns an Error if `find` or the key in the `replace`
227    /// cannot be successfully turned into a Regex
228    pub fn add(
229        &mut self,
230        find :&str,
231        replace :LinkedHashMap<&str, &str>,
232    ) -> Result<(), Box<dyn std::error::Error>> {
233        let find = Regex::new(find)?;
234        let in_replace = replace;
235        let mut replace :LinkedHashMap<RegexWrapper, String> = LinkedHashMap::new();
236        for (f, r) in in_replace {
237            replace.insert(RegexWrapper(Regex::new(f)?), r.to_string());
238        }
239
240        let contraction = Contraction { find, replace };
241        self.contractions.push(contraction);
242        Ok(())
243    }
244
245    /// Replace contractions with their long form
246    ///
247    /// # Example
248    /// ```
249    /// use contractions::Contractions;
250    /// let contractions = Contractions::default();
251    /// assert_eq!("I am your brother’s son", contractions.apply("I’m your brother’s son"));
252    /// ```
253    #[must_use]
254    pub fn apply(&self, input :&str) -> String {
255        let mut output = input.to_string();
256        for contraction in &self.contractions {
257            if contraction.is_match(&output) {
258                contraction.replace_all(&mut output);
259            }
260        }
261        output
262    }
263}