cmark_translate/
deepl.rs

1// SPDX-License-Identifier: MIT
2//!
3//! DeepL REST API wrapper
4//!
5
6pub struct Deepl {
7    config: DeeplConfig,
8}
9
10impl Deepl {
11    // New DeepL instance from default config file (deepl.toml or ~/.deepl.toml)
12    pub fn new() -> std::io::Result<Self> {
13        let deepl_config = DeeplConfig::new()?;
14
15        Ok(Self {
16            config: deepl_config,
17        })
18    }
19
20    /// New DeepL instance from specific config file
21    pub fn with_config<P: AsRef<std::path::Path>>(config_path: P) -> std::io::Result<Self> {
22        let deepl_config = DeeplConfig::with_config(config_path)?;
23
24        Ok(Self {
25            config: deepl_config,
26        })
27    }
28
29    /// Translate single text string
30    #[allow(dead_code)]
31    pub async fn translate(
32        &self,
33        from_lang: Language,
34        to_lang: Language,
35        formality: Formality,
36        body: &str,
37    ) -> reqwest::Result<String> {
38        let mut result = self
39            .translate_strings(from_lang, to_lang, formality, &vec![body])
40            .await?;
41        if 0 < result.len() {
42            Ok(result.swap_remove(0))
43        } else {
44            // Empty response
45            Ok(String::new())
46        }
47    }
48
49    pub async fn translate_strings(
50        &self,
51        from_lang: Language,
52        to_lang: Language,
53        formality: Formality,
54        body: &Vec<&str>,
55    ) -> reqwest::Result<Vec<String>> {
56        let mut params = vec![
57            ("source_lang", from_lang.as_langcode()),
58            ("target_lang", to_lang.as_langcode()),
59            ("preserve_formatting", "1"),
60            ("formality", formality.to_str()),
61        ];
62        if let Some(glossary_id) = self.config.glossary(from_lang, to_lang) {
63            log::debug!("Use glossary {}", glossary_id);
64            params.push(("glossary_id", glossary_id));
65        }
66
67        // add texts to be translated
68        for t in body {
69            params.push(("text", *t));
70        }
71
72        // Make DeepL API request
73        let client = reqwest::Client::new();
74        let resp = client
75            .post(self.config.endpoint("translate"))
76            .header(
77                "authorization",
78                format!("DeepL-Auth-Key {}", self.config.api_key),
79            )
80            .form(&params)
81            .send()
82            .await?;
83
84        // Returns error
85        resp.error_for_status_ref()?;
86
87        // Parse response
88        let deepl_resp = resp.json::<DeeplTranslationResponse>().await?;
89        Ok(deepl_resp
90            .translations
91            .into_iter()
92            .map(|t| t.text)
93            .collect())
94    }
95
96    /// Translate XML string
97    pub async fn translate_xml(
98        &self,
99        from_lang: Language,
100        to_lang: Language,
101        formality: Formality,
102        xml_body: &str,
103    ) -> reqwest::Result<String> {
104        // Prepare request parameters
105        let mut params = vec![
106            ("source_lang", from_lang.as_langcode()),
107            ("target_lang", to_lang.as_langcode()),
108            ("preserve_formatting", "1"),
109            ("formality", formality.to_str()),
110            ("tag_handling", "xml"),
111            ("ignore_tags", "header,embed,object"),
112            (
113                "splitting_tags",
114                "blockquote,li,dt,dd,p,h1,h2,h3,h4,h5,h6,th,td",
115            ),
116            ("non_splitting_tags", "embed,em,strong,del,a,img"),
117        ];
118        if let Some(glossary_id) = self.config.glossary(from_lang, to_lang) {
119            log::debug!("Use glossary {}", glossary_id);
120            params.push(("glossary_id", glossary_id));
121        }
122        params.push(("text", xml_body));
123
124        // Make DeepL API request
125        let client = reqwest::Client::new();
126        let resp = client
127            .post(self.config.endpoint("translate"))
128            .header(
129                "authorization",
130                format!("DeepL-Auth-Key {}", self.config.api_key),
131            )
132            .form(&params)
133            .send()
134            .await?;
135
136        // Returns error
137        resp.error_for_status_ref()?;
138
139        // Parse response
140        let mut deepl_resp = resp.json::<DeeplTranslationResponse>().await?;
141        if 0 < deepl_resp.translations.len() {
142            Ok(deepl_resp.translations.swap_remove(0).text)
143        } else {
144            // Empty response
145            Ok(String::new())
146        }
147    }
148
149    /// Register new glossary
150    pub async fn register_glossaries<S: AsRef<str>>(
151        &self,
152        name: &str,
153        from_lang: Language,
154        to_lang: Language,
155        glossaries: &[(S, S)],
156    ) -> reqwest::Result<DeeplGlossary> {
157        // Remove spaces, empty items
158        let mut filtered_glossaries = glossaries
159            .iter()
160            .filter_map(|(from, to)| {
161                let from_trimed = from.as_ref().trim();
162                let to_trimed = to.as_ref().trim();
163                if from_trimed.is_empty() || to_trimed.is_empty() {
164                    None
165                } else {
166                    Some((from, to))
167                }
168            })
169            .collect::<Vec<_>>();
170
171        // Check duplicates
172        filtered_glossaries.sort_by(|(from1, _), (from2, _)| from1.as_ref().cmp(from2.as_ref()));
173        filtered_glossaries.iter().fold("", |prev_key, (from, _)| {
174            let key = from.as_ref();
175            if prev_key == key {
176                // Duplicated
177                log::warn!("Duplicated key : \"{}\"", key);
178            }
179            key
180        });
181
182        // Make TSV text
183        let tsv: String = filtered_glossaries
184            .iter()
185            .map(|(from, to)| {
186                let row = format!("{}\t{}", from.as_ref(), to.as_ref());
187                log::trace!("TSV: {}", row);
188                row
189            })
190            .collect::<Vec<String>>()
191            .join("\n");
192
193        // Make DeepL API request
194        let client = reqwest::Client::new();
195        let resp = client
196            .post(self.config.endpoint("glossaries"))
197            .header(
198                "authorization",
199                format!("DeepL-Auth-Key {}", self.config.api_key),
200            )
201            .form(&[
202                ("name", name),
203                ("source_lang", from_lang.as_langcode()),
204                ("target_lang", to_lang.as_langcode()),
205                ("entries_format", "tsv"),
206                ("entries", &tsv),
207            ])
208            .send()
209            .await?;
210
211        if let Err(err) = resp.error_for_status_ref() {
212            // Returns error with printing details
213            if let Ok(err_body_text) = resp.text().await {
214                log::error!("{}", err_body_text);
215            }
216            Err(err)
217        } else {
218            // Success, parse response
219            let deepl_resp = resp.json::<DeeplGlossary>().await?;
220            Ok(deepl_resp)
221        }
222    }
223
224    /// List registered glossaries
225    pub async fn list_glossaries(&self) -> reqwest::Result<Vec<DeeplGlossary>> {
226        // Make DeepL API request
227        let client = reqwest::Client::new();
228        let resp = client
229            .get(self.config.endpoint("glossaries"))
230            .header(
231                "authorization",
232                format!("DeepL-Auth-Key {}", self.config.api_key),
233            )
234            .send()
235            .await?;
236
237        // Returns error
238        resp.error_for_status_ref()?;
239
240        // Parse response
241        let deepl_resp = resp.json::<DeeplListGlossariesResponse>().await?;
242        Ok(deepl_resp.glossaries)
243    }
244
245    /// Remove registered glossaries
246    pub async fn remove_glossary(&self, id: &str) -> reqwest::Result<()> {
247        // Make DeepL API request
248        let client = reqwest::Client::new();
249        let resp = client
250            .delete(self.config.endpoint(&format!("glossaries/{}", id)))
251            .header(
252                "authorization",
253                format!("DeepL-Auth-Key {}", self.config.api_key),
254            )
255            .send()
256            .await?;
257
258        // Check response
259        resp.error_for_status()?;
260
261        Ok(())
262    }
263
264    /// Get usage, returns translated characters
265    pub async fn get_usage(&self) -> reqwest::Result<i32> {
266        // Make DeepL API request
267        let client = reqwest::Client::new();
268        let resp = client
269            .get(self.config.endpoint("usage"))
270            .header(
271                "authorization",
272                format!("DeepL-Auth-Key {}", self.config.api_key),
273            )
274            .send()
275            .await?;
276
277        // Returns error
278        resp.error_for_status_ref()?;
279
280        // Parse response
281        let deepl_resp = resp.json::<DeeplUsageResponse>().await?;
282        Ok(deepl_resp.character_count)
283    }
284}
285
286#[derive(Clone, Copy, serde::Deserialize)]
287pub enum Language {
288    De,
289    Es,
290    En,
291    Fr,
292    It,
293    Ja,
294    Nl,
295    Pt,
296    PtBr,
297    Ru,
298}
299
300impl Language {
301    pub fn as_langcode(&self) -> &'static str {
302        match self {
303            Self::De => "de",
304            Self::Es => "es",
305            Self::En => "en",
306            Self::Fr => "fr",
307            Self::It => "it",
308            Self::Ja => "ja",
309            Self::Nl => "nl",
310            Self::Pt => "pt-br",
311            Self::PtBr => "pt-br",
312            Self::Ru => "ru",
313        }
314    }
315}
316
317impl std::str::FromStr for Language {
318    type Err = std::io::Error;
319
320    fn from_str(s: &str) -> Result<Self, Self::Err> {
321        let lowcase = s.to_ascii_lowercase();
322        match lowcase.as_str() {
323            "de" => Ok(Self::De),
324            "es" => Ok(Self::Es),
325            "en" => Ok(Self::En),
326            "fr" => Ok(Self::Fr),
327            "it" => Ok(Self::It),
328            "ja" => Ok(Self::Ja),
329            "nl" => Ok(Self::Nl),
330            "pt" => Ok(Self::Pt),
331            "pt-br" => Ok(Self::PtBr),
332            "ru" => Ok(Self::Ru),
333            _ => Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)),
334        }
335    }
336}
337
338/// Translation output formality
339#[derive(Clone, Copy, serde::Deserialize)]
340pub enum Formality {
341    Default,
342    Formal,
343    Informal,
344}
345
346impl Formality {
347    pub fn to_str(&self) -> &'static str {
348        match self {
349            Self::Default => "default",
350            Self::Formal => "prefer_more",
351            Self::Informal => "prefer_less",
352        }
353    }
354}
355
356impl Default for Formality {
357    fn default() -> Self {
358        Self::Default
359    }
360}
361
362impl std::str::FromStr for Formality {
363    type Err = std::io::Error;
364
365    fn from_str(s: &str) -> Result<Self, Self::Err> {
366        let lowcase = s.to_ascii_lowercase();
367        match lowcase.as_str() {
368            "default" => Ok(Self::Default),
369            "formal" => Ok(Self::Formal),
370            "informal" => Ok(Self::Informal),
371            _ => Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)),
372        }
373    }
374}
375
376#[derive(serde::Deserialize, serde::Serialize)]
377#[serde(rename_all = "snake_case")]
378struct DeeplConfig {
379    api_key: String,
380    glossaries: std::collections::HashMap<String, String>,
381}
382
383impl DeeplConfig {
384    // Search default config file
385    fn new() -> std::io::Result<Self> {
386        use std::path::PathBuf;
387        let config_files = [
388            PathBuf::new().join("deepl.toml"),
389            dirs::home_dir()
390                .unwrap_or(PathBuf::new())
391                .join(".deepl.toml"),
392        ];
393
394        for config_file in config_files {
395            match Self::with_config(&config_file) {
396                Ok(conf) => {
397                    log::debug!("Read config file {:?}", config_file);
398                    return Ok(conf);
399                }
400                Err(err) => {
401                    if err.kind() == std::io::ErrorKind::NotFound {
402                        log::debug!("Config file {:?} NOT found.", &config_file);
403                    } else {
404                        // Other err, stop searching
405                        log::error!("Can not parse config file {:?} : {:?}", &config_file, err);
406                        return Err(err);
407                    }
408                }
409            }
410        }
411
412        // Config file not found
413        Err(std::io::Error::new(
414            std::io::ErrorKind::NotFound,
415            "deepl.toml NOT found",
416        ))
417    }
418
419    // Config from specific file
420    fn with_config<P: AsRef<std::path::Path>>(config_path: P) -> std::io::Result<Self> {
421        use std::io::Read;
422        let mut file = std::fs::File::open(&config_path)?;
423
424        // Read .deepl as TOML
425        let mut config = String::new();
426        file.read_to_string(&mut config)?;
427        let deepl_config: DeeplConfig = toml::from_str(&config)?;
428
429        Ok(deepl_config)
430    }
431
432    // DeepL endpoint URL
433    fn endpoint(&self, api: &str) -> String {
434        if self.api_key.ends_with(":fx") {
435            // API free plan key
436            format!("https://api-free.deepl.com/v2/{}", api)
437        } else {
438            // API Pro key
439            format!("https://api.deepl.com/v2/{}", api)
440        }
441    }
442
443    // Find glossary
444    fn glossary<'a>(&'a self, from_lang: Language, to_lang: Language) -> Option<&'a str> {
445        let glossary_key = format!("{}_{}", from_lang.as_langcode(), to_lang.as_langcode());
446        self.glossaries.get(&glossary_key).map(|v| v.as_str())
447    }
448}
449
450/// DeepL translation response JSON
451#[derive(serde::Deserialize)]
452#[serde(rename_all = "snake_case")]
453struct DeeplTranslationResponse {
454    translations: Vec<DeeplTranslationResponseInner>,
455}
456
457/// DeepL response JSON for each translations
458#[derive(serde::Deserialize)]
459#[serde(rename_all = "snake_case")]
460struct DeeplTranslationResponseInner {
461    #[allow(dead_code)]
462    detected_source_language: String,
463    text: String,
464}
465
466/// DeepL list glossaries response JSON
467#[derive(serde::Deserialize)]
468#[serde(rename_all = "snake_case")]
469struct DeeplListGlossariesResponse {
470    glossaries: Vec<DeeplGlossary>,
471}
472
473/// DeepL response JSON for each glossaries
474#[derive(serde::Deserialize, Debug)]
475#[serde(rename_all = "snake_case")]
476pub struct DeeplGlossary {
477    pub glossary_id: String,
478    pub name: String,
479    pub ready: bool,
480    pub source_lang: String,
481    pub target_lang: String,
482    pub creation_time: String,
483    pub entry_count: i32,
484}
485
486/// DeepL usage response JSON
487#[derive(serde::Deserialize)]
488#[serde(rename_all = "snake_case")]
489struct DeeplUsageResponse {
490    character_count: i32,
491    #[allow(dead_code)]
492    character_limit: i32,
493}
494
495#[cfg(test)]
496mod test {
497    use super::*;
498
499    #[tokio::test]
500    async fn plain_text_translation() {
501        let deepl = Deepl::new().unwrap();
502
503        let resp = deepl
504            .translate(
505                Language::En,
506                Language::De,
507                Formality::Default,
508                "Hello, World!",
509            )
510            .await
511            .unwrap();
512        assert_eq!(&resp, "Hallo, Welt!");
513    }
514}