1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
use crate::Result;
use serde::Deserialize;

/// This struct represents each word and its associated data in the response.
/// It is constructed when parsing a [Response](Response) with the method list().
/// Note that all optional values can still be None even if the proper flag
/// is set
#[derive(Debug, PartialEq)]
pub struct WordElement {
    /// The word returned based on the search parameters
    pub word: String,
    /// A score which ranks the word based on how well it fit the provided parameters.
    /// Note that by default the words are ranked by score from highest to lowest
    pub score: usize,
    /// The number of syllables the word has. This will only have a value if
    /// the meta data flag [SyllableCount](crate::MetaDataFlag::SyllableCount) is set
    pub num_syllables: Option<usize>,
    /// The part(s) of speech a word can be. This will only have a value if
    /// the meta data flag [PartsOfSpeech](crate::MetaDataFlag::PartsOfSpeech) is set
    pub parts_of_speech: Option<Vec<PartOfSpeech>>,
    /// The pronunciation of the word. This will only have a value if
    /// the meta data flag [Pronunciation](crate::MetaDataFlag::Pronunciation) is set.
    /// If an IPA pronuncation is available, it takes precedence as it is optional
    pub pronunciation: Option<String>,
    /// The frequency of a word based on how many times the word is used per 1,000,000
    /// words of text. This will only have a value if the meta data flag
    /// [WordFrequency](crate::MetaDataFlag::WordFrequency) is set
    pub frequency: Option<f32>,
    /// Definitions of a word and the associated part of speech with its use. This will only
    /// have a value if the meta data flag [Definitions](crate::MetaDataFlag::Definitions) is set
    pub definitions: Option<Vec<Definition>>,
}

/// A struct representing a word definition
#[derive(Debug, PartialEq)]
pub struct Definition {
    /// The part of speech associated with the definition
    pub part_of_speech: Option<PartOfSpeech>,
    /// The definition itself
    pub definition: String,
}

/// A struct representing a response from a request.
/// This can be parsed into a word list using the list() method
#[derive(Debug)]
pub struct Response {
    json: String,
}

/// An enum representing all possible parts of speech returned from the api
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum PartOfSpeech {
    /// Noun
    Noun, //n
    /// Adjective
    Adjective, //adj
    /// Adverb
    Adverb, //adv
    /// Verb
    Verb, //v
}

#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct DatamuseWordObject {
    word: String,
    score: usize,
    num_syllables: Option<usize>,
    tags: Option<Vec<String>>,
    defs: Option<Vec<String>>,
}

impl Response {
    /// Parses the response into a list of word elements
    pub fn list(&self) -> Result<Vec<WordElement>> {
        parse_response(&self.json)
    }

    pub(crate) fn new(json: String) -> Response {
        Response { json }
    }
}

impl PartOfSpeech {
    fn from_str(pos: &str) -> Option<Self> {
        match pos {
            "n" => Some(Self::Noun),
            "adj" => Some(Self::Adjective),
            "adv" => Some(Self::Adverb),
            "v" => Some(Self::Verb),
            _ => None, //Also catches undefined option "u"
        }
    }
}

fn parse_response(response: &str) -> Result<Vec<WordElement>> {
    let word_list: Vec<DatamuseWordObject> = serde_json::from_str(response)?;
    let mut converted_word_list: Vec<WordElement> = Vec::new();

    for word in word_list {
        converted_word_list.push(word_obj_to_word_elem(word));
    }

    Ok(converted_word_list)
}

fn word_obj_to_word_elem(word_obj: DatamuseWordObject) -> WordElement {
    let word = word_obj.word;
    let score = word_obj.score;
    let num_syllables = word_obj.num_syllables;

    let mut parts_of_speech: Vec<PartOfSpeech> = Vec::new();
    let mut pronunciation = None;
    let mut frequency = None;

    if let Some(tags) = word_obj.tags {
        for tag in tags {
            let parts: Vec<&str> = tag.split(':').collect();

            match parts[0] {
                "f" => {
                    if parts.len() == 2 {
                        frequency = match parts[1].parse() {
                            Ok(val) => Some(val),
                            Err(_) => None,
                        }
                    }
                }
                "pron" => {
                    if let None = pronunciation {
                        //If pronunciation already has a value ignore b/c of ipa
                        if parts.len() == 2 {
                            pronunciation = Some(parts[1].to_string());
                        }
                    }
                }
                "ipa_pron" => {
                    if parts.len() == 2 {
                        pronunciation = Some(parts[1].to_string());
                    }
                }
                val => match PartOfSpeech::from_str(&val) {
                    Some(val) => parts_of_speech.push(val),
                    None => continue,
                },
            }
        }
    }

    let pos;
    if parts_of_speech.len() > 0 {
        pos = Some(parts_of_speech);
    } else {
        pos = None;
    }
    let parts_of_speech = pos;

    let mut definitions = None;
    if let Some(defs) = word_obj.defs {
        if defs.len() > 0 {
            let mut def_list: Vec<Definition> = Vec::new();

            for def in defs {
                let parts: Vec<&str> = def.split('\t').collect();

                if parts.len() == 2 {
                    let pos = PartOfSpeech::from_str(&parts[0]);
                    def_list.push(Definition {
                        part_of_speech: pos,
                        definition: parts[1].to_string(),
                    });
                }
            }

            definitions = Some(def_list);
        }
    }

    WordElement {
        word,
        score,
        num_syllables,
        parts_of_speech,
        pronunciation,
        frequency,
        definitions,
    }
}

#[cfg(test)]
mod tests {
    use super::DatamuseWordObject;
    use crate::{Definition, PartOfSpeech, WordElement};

    #[test]
    fn word_obj_to_word_elem() {
        let word_obj = DatamuseWordObject {
            word: String::from("cow"),
            score: 2168,
            num_syllables: Some(1),
            tags: Some(vec![
                String::from("n"),
                String::from("pron:K AW1 "),
                String::from("f:16.567268"),
            ]),
            defs: Some(vec![
                String::from("n\tmature female of mammals of which the male is called `bull'"),
                String::from("n\tfemale of domestic cattle"),
            ]),
        };

        let actual = super::word_obj_to_word_elem(word_obj);

        let expected = WordElement {
            word: String::from("cow"),
            score: 2168,
            num_syllables: Some(1),
            parts_of_speech: Some(vec![PartOfSpeech::Noun]),
            pronunciation: Some(String::from("K AW1 ")),
            frequency: Some(16.567268),
            definitions: Some(vec![
                Definition {
                    part_of_speech: Some(PartOfSpeech::Noun),
                    definition: String::from(
                        "mature female of mammals of which the male is called `bull'",
                    ),
                },
                Definition {
                    part_of_speech: Some(PartOfSpeech::Noun),
                    definition: String::from("female of domestic cattle"),
                },
            ]),
        };

        assert_eq!(expected, actual);
    }

    #[test]
    fn json_to_word_elem() {
        let json = r#"
        [
            {
                "word":"milk",
                "score":2168,
                "numSyllables":1,
                "tags": [],
                "defs": []
            },
            {
                "word":"cow",
                "score":2168,
                "numSyllables":1,
                "tags": [
                    "n",
                    "pron:K AW1 ",
                    "f:16.567268"
                ],
                "defs": [
                    "n\tmature female of mammals of which the male is called `bull'",
                    "n\tfemale of domestic cattle"
                ]
            }
        ]
        "#;

        let actual = super::parse_response(json).unwrap();

        let expected1 = WordElement {
            word: String::from("milk"),
            score: 2168,
            num_syllables: Some(1),
            parts_of_speech: None,
            pronunciation: None,
            frequency: None,
            definitions: None,
        };

        let expected2 = WordElement {
            word: String::from("cow"),
            score: 2168,
            num_syllables: Some(1),
            parts_of_speech: Some(vec![PartOfSpeech::Noun]),
            pronunciation: Some(String::from("K AW1 ")),
            frequency: Some(16.567268),
            definitions: Some(vec![
                Definition {
                    part_of_speech: Some(PartOfSpeech::Noun),
                    definition: String::from(
                        "mature female of mammals of which the male is called `bull'",
                    ),
                },
                Definition {
                    part_of_speech: Some(PartOfSpeech::Noun),
                    definition: String::from("female of domestic cattle"),
                },
            ]),
        };

        assert_eq!(expected1, actual[0]);
        assert_eq!(expected2, actual[1]);
    }
}