Skip to main content

harper_core/
irregular_verbs.rs

1use serde::Deserialize;
2use std::sync::{Arc, LazyLock};
3
4type Verb = (String, String, String);
5
6#[derive(Debug, Deserialize)]
7pub struct IrregularVerbs {
8    verbs: Vec<Verb>,
9}
10
11/// The uncached function that is used to produce the original copy of the
12/// irregular verb table.
13fn uncached_inner_new() -> Arc<IrregularVerbs> {
14    IrregularVerbs::from_json_file(include_str!("../irregular_verbs.json"))
15        .map(Arc::new)
16        .unwrap_or_else(|e| panic!("Failed to load irregular verb table: {}", e))
17}
18
19static VERBS: LazyLock<Arc<IrregularVerbs>> = LazyLock::new(uncached_inner_new);
20
21impl IrregularVerbs {
22    pub fn new() -> Self {
23        Self { verbs: vec![] }
24    }
25
26    pub fn from_json_file(json: &str) -> Result<Self, serde_json::Error> {
27        // Deserialize into Vec<serde_json::Value> to handle mixed types
28        let values: Vec<serde_json::Value> =
29            serde_json::from_str(json).expect("Failed to parse irregular verbs JSON");
30
31        let mut verbs = Vec::new();
32
33        for value in values {
34            match value {
35                serde_json::Value::Array(arr) if arr.len() == 3 => {
36                    // Handle array of 3 strings
37                    if let (Some(lemma), Some(preterite), Some(past_participle)) =
38                        (arr[0].as_str(), arr[1].as_str(), arr[2].as_str())
39                    {
40                        verbs.push((
41                            lemma.to_string(),
42                            preterite.to_string(),
43                            past_participle.to_string(),
44                        ));
45                    }
46                }
47                // Strings are used for comments to guide contributors editing the file
48                serde_json::Value::String(_) => {}
49                _ => {}
50            }
51        }
52
53        Ok(Self { verbs })
54    }
55
56    pub fn curated() -> Arc<Self> {
57        (*VERBS).clone()
58    }
59
60    pub fn get_past_participle_for_preterite(&self, preterite: &str) -> Option<&str> {
61        self.verbs
62            .iter()
63            .find(|(_, pt, _)| pt.eq_ignore_ascii_case(preterite))
64            .map(|(_, _, pp)| pp.as_str())
65    }
66
67    pub fn get_lemma_for_preterite(&self, preterite: &str) -> Option<&str> {
68        self.verbs
69            .iter()
70            .find(|(_, pt, _)| pt.eq_ignore_ascii_case(preterite))
71            .map(|(lemma, _, _)| lemma.as_str())
72    }
73
74    pub fn get_pasts_for_lemma(&self, lemma: &str) -> Option<(&str, &str)> {
75        self.verbs
76            .iter()
77            .find(|(l, _, _)| l.eq_ignore_ascii_case(lemma))
78            .map(|(_, pt, pp)| (pt.as_str(), pp.as_str()))
79    }
80}
81
82impl Default for IrregularVerbs {
83    fn default() -> Self {
84        Self::new()
85    }
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn can_find_irregular_past_participle_for_preterite_lowercase() {
94        assert_eq!(
95            IrregularVerbs::curated().get_past_participle_for_preterite("arose"),
96            Some("arisen")
97        );
98    }
99
100    #[test]
101    fn can_find_irregular_past_participle_for_preterite_uppercase() {
102        assert_eq!(
103            IrregularVerbs::curated().get_past_participle_for_preterite("WENT"),
104            Some("gone")
105        );
106    }
107
108    #[test]
109    fn can_find_irregular_past_participle_same_as_past_tense() {
110        assert_eq!(
111            IrregularVerbs::curated().get_past_participle_for_preterite("taught"),
112            Some("taught")
113        );
114    }
115
116    #[test]
117    fn cant_find_regular_past_participle() {
118        assert_eq!(
119            IrregularVerbs::curated().get_past_participle_for_preterite("walked"),
120            None
121        );
122    }
123
124    #[test]
125    fn cant_find_non_verb() {
126        assert_eq!(
127            IrregularVerbs::curated().get_past_participle_for_preterite("the"),
128            None
129        );
130    }
131}