1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
use std::collections::HashMap;
use reqwest::Client;
use serde::{Deserialize, Deserializer};
pub mod error;
use error::TezaursApiError;


use std::str::FromStr;
use serde_json::Value;

const API: &str = "http://api.tezaurs.lv:8182";

fn deserialize_from_string<'de, D>(deserializer: D) -> Result<usize, D::Error>
    where
        D: Deserializer<'de>,
{
    let s = String::deserialize(deserializer)?;
    usize::from_str(&s).map_err(serde::de::Error::custom)
}


fn deserialize_from_string_option<'de, D>(deserializer: D) -> Result<Option<usize>, D::Error>
    where
        D: Deserializer<'de>,
{
    let s = String::deserialize(deserializer)?;
    let result = usize::from_str(&s).ok();
    Ok(result)
}


pub struct TezaursApi {
    client: Client,
}

impl TezaursApi {
    pub fn new() -> Self {
        Self {
            client: Client::new(),
        }
    }

    pub async fn analyze(&self, word: String) -> Result<Vec<AnalyzedWord>, TezaursApiError> {
        let url = format!("{}/analyze/{}", API, word);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        let json: serde_json::Value = serde_json::from_str(&response_text)?;
        let data: Vec<AnalyzedWord> = serde_json::from_value(json.clone())?;

        Ok(data)
    }


    pub async fn tokenize(&self, sentence: String) -> Result<Vec<Token>, TezaursApiError> {
        let url = format!("{}/tokenize/{}", API, sentence);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        let json: Value = serde_json::from_str(&response_text)?;
        let data: Vec<Token> = serde_json::from_value(json.clone())?;
        Ok(data)
    }

    pub async fn normalize_phrase(&self, sentence: String) -> Result<String, TezaursApiError> {
        let url = format!("{}/normalize_phrase/{}", API, sentence);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        Ok(response_text)
    }

    pub async fn suitable_paradigm(&self, word: String) -> Result<Vec<Paradigm>, TezaursApiError> {
        let url = format!("{}/suitable_paradigm/{}", API, word);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        let json: Value = serde_json::from_str(&response_text)?;
        let data: Vec<Paradigm> = serde_json::from_value(json.clone())?;
        Ok(data)
    }

    pub async fn morphotagger(&self, sentence: String) -> Result<String, TezaursApiError> {
        let url = format!("{}/morphotagger/{}", API, sentence);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        Ok(response_text)
    }

    pub async fn verbs(&self, word: String) -> Result<Vec<String>, TezaursApiError> {
        let url = format!("{}/verbs/{}", API, word);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        let elements: Vec<&str> = response_text[1..response_text.len()-1].split(',').collect();

        let vec: Vec<String> = elements.iter()
            .map(|s| s.trim().replace("\"", ""))
            .collect();
        Ok(vec)
    }

    pub async fn inflect_phrase(&self, sentence: String) -> Result<Vec<Inflection>, TezaursApiError> {
        let url = format!("{}/inflect_phrase/{}", API, sentence);
        let response = self.client.get(url).send().await?;
        let response_text = response.text().await?;
        let json: Value = serde_json::from_str(&response_text)?;
        let map: HashMap<String, String> = serde_json::from_value(json).unwrap();
        let inflections: Vec<Inflection> = map.into_iter().map(|(case, sentence)| {
            let case = match case.as_str() {
                "Akuzatīvs" => Case::Accusative,
                "Ģenitīvs" => Case::Genitive,
                "Datīvs" => Case::Datīvs,
                "Lokatīvs" => Case::Locative,
                "Nominatīvs" => Case::Nominative,
                "Instrumentālis" => Case::Instrumental,
                "Vokatīvs" => Case::Vocative,
                _ => panic!("Unexpected case"), // Handle unexpected cases appropriately
            };
            Inflection { case, sentence }
        }).collect();

        Ok(inflections)
    }
}

#[derive(Deserialize, Debug)]
pub struct Paradigm {
    #[serde(rename = "ID")]
    pub id: u32,
    #[serde(rename = "Description")]
    pub description: String,
}

#[derive(Deserialize, Debug)]
pub struct TokenizeResponse {
    pub tokens: Vec<Token>
}

#[derive(Deserialize, Debug)]
pub struct Token {
    #[serde(rename = "Vārds")]
    pub word: String,
    #[serde(rename = "Marķējums")]
    pub tag: String,
    #[serde(rename = "Pamatforma")]
    pub base_form: String,
}

#[derive(Deserialize, Debug)]
pub struct AnalyzedWord {
    #[serde(rename = "Skaitlis")]
    pub number: Number,
    #[serde(rename = "Šķirkļa ID", default, deserialize_with = "deserialize_from_string_option")]
    pub id: Option<usize>,
    #[serde(rename = "Vārds")]
    pub word: String,
    #[serde(rename = "Šķirkļa cilvēklasāmais ID")]
    pub word_id: Option<String>,
    #[serde(rename = "Leksēmas nr", deserialize_with = "deserialize_from_string")]
    pub lexem: usize,
    #[serde(rename = "FreeText")]
    pub free_text: Option<String>,
    #[serde(rename = "Galotnes nr", deserialize_with = "deserialize_from_string")]
    pub end: usize,
    #[serde(rename = "Avots")]
    pub source: Option<String>,
    #[serde(rename = "Vārdšķira")]
    pub part_of_speech: PartOfSpeech,
    #[serde(rename = "Mija", deserialize_with = "deserialize_from_string")]
    pub swap: usize,
    #[serde(rename = "Minēšana")]
    pub mention: String,
    #[serde(rename = "Pamatforma")]
    pub basic_form: String,
    #[serde(rename = "Locījums")]
    pub case: Case,
    #[serde(rename = "Dzimte")]
    pub gender: Gender,
    #[serde(rename = "Vārdgrupas nr", deserialize_with = "deserialize_from_string")]
    pub group: usize,
    #[serde(rename = "Deklinācija", deserialize_with = "deserialize_from_string")]
    pub declination: usize,

}

#[derive(Deserialize, Debug)]

pub enum Number {
    #[serde(rename = "Vienskaitlis")]
    Singular,
    #[serde(rename = "Daudzskaitlis")]
    Plural,
}

#[derive(Deserialize, Debug)]

pub enum Gender {
    #[serde(rename = "Sieviešu")]
    Female,
    #[serde(rename = "Vīriešu")]
    Male,
}

#[derive(Deserialize, Debug)]

pub enum PartOfSpeech {
    #[serde(rename = "Lietvārds")]
    Noun,
    #[serde(rename = "Īpašības vārds")]
    Adjective,
    #[serde(rename = "Skaitļa vārds")]
    Numeral,
    #[serde(rename = "Darbības vārds")]
    Verb,
}


#[derive(Deserialize, Debug)]
pub struct Inflection {
    pub case: Case,
    pub sentence: String,
}

#[derive(Deserialize, Debug)]

pub enum Case {
    #[serde(rename = "Nominatīvs")]
    Nominative,
    #[serde(rename = "Ģenitīvs")]
    Genitive,
    #[serde(rename = "Datīvs")]
    Datīvs,
    #[serde(rename = "Akuzatīvs")]
    Accusative,
    #[serde(rename = "Instrumentālis")]
    Instrumental ,
    #[serde(rename = "Lokatīvs")]
    Locative,
    #[serde(rename = "Vokatīvs")]
    Vocative,
}



#[cfg(test)]
mod tests {

    use crate::{TezaursApi,};

    #[tokio::test]
    async fn my_test() -> Result<(), Box<dyn std::error::Error>> {

        let api = TezaursApi::new();


        let words = api.analyze("jūra".to_string()).await?;
        println!("{:?}", words);
        // [AnalyzedWord { number: Singular, id: None, word: "jūra", word_id: None, lexem: 1033983, free_text: None, end: 28, source: Some("VVC paplašinātais vārdadienu saraksts 2014-10-31"), part_of_speech: Noun, swap: 1, mention: "Nav", basic_form: "Jūris", case: Genitive, gender: Male, group: 3, declination: 2 }, AnalyzedWord { number: Singular, id: Some(134187), word: "jūra", word_id: Some("jūra:1"), lexem: 138064, free_text: None, end: 75, source: None, part_of_speech: Noun, swap: 0, mention: "Nav", basic_form: "jūra", case: Nominative, gender: Female, group: 7, declination: 4 }]

        let tokens = api.tokenize(String::from("es domāju")).await?; //
        println!("{:?}", tokens);
        // [Token { word: "es", tag: "pp10snn", base_form: "es" }, Token { word: "domāju", tag: "vmnip_21san", base_form: "domāt" }]

        let text = api.normalize_phrase(String::from("Latvijas Universitātes Matemātikas un Informātikas Institūtam")).await?;

        println!("{:?}", text);
        // Latvijas Universitātes Matemātikas un Informātikas Institūts


        let inflections = api.inflect_phrase(String::from("Latvijas Universitātes Matemātikas un Informātikas Institūtam")).await?;
        println!("{:?}", inflections);
        //[Inflection { case: Accusative, sentence: "Latvijas Universitātes Matemātikas un Informātikas Institūtu" }, Inflection { case: Datīvs, sentence: "Latvijas Universitātes Matemātikas un Informātikas Institūtam" }, Inflection { case:Locative, sentence: "Latvijas Universitātes Matemātikas un Informātikas Institūtā" }, Inflection { case: Nominative, sentence: "Latvijas Universitātes Matemātikas un Informātikas Institūts" }, Inflection { case: Genitive, sentence: "Latvijas Universitātes Matemātikas un Informātikas Institūta" }]

        let paradigms = api.suitable_paradigm(String::from("pokemonizators")).await?;
        println!("{:?}", paradigms);
        // [Paradigm { id: 1, description: "noun-1a" }, Paradigm { id: 13, description: "adj-1" }, Paradigm { id: 39, description: "foreign" }]

        let morphs = api.morphotagger(String::from("vīrs ar cirvi.")).await?;
        println!("{:?}", morphs);
        // vīrs    ncmsn1  vīrs
        // ar      spsa    ar
        // cirvi   ncmsa2  cirvis
        // .       zs      .

        let inflcs = api.verbs(String::from("domai")).await?;
        println!("{:?}", inflcs);
        // ["Dat", "Nom", "Gen", "Acc", "Loc"]

        assert!(true);

        Ok(())
    }
}