languagetool_rust/
check.rs

1//! Structures for `check` requests and responses.
2
3use super::error::{Error, Result};
4#[cfg(feature = "annotate")]
5use annotate_snippets::{
6    display_list::{DisplayList, FormatOptions},
7    snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation},
8};
9#[cfg(feature = "cli")]
10use clap::{Args, Parser, ValueEnum};
11use serde::{Deserialize, Serialize, Serializer};
12#[cfg(feature = "cli")]
13use std::path::PathBuf;
14
15/// Requests
16
17/// Parse `v` is valid language code.
18///
19/// A valid language code is usually
20/// - a two character string matching pattern `[a-z]{2}
21/// - a five character string matching pattern `[a-z]{2}-[A-Z]{2}
22/// - or some more complex ascii string (see below)
23///
24/// Language code is case insensitive.
25///
26/// Therefore, a valid language code must match the following:
27///
28/// - `[a-zA-Z]{2,3}(-[a-zA-Z]{2}(-[a-zA-Z]+)*)?`
29///
30/// or
31///
32/// - "auto"
33///
34/// > Note: a valid language code does not mean that it exists.
35///
36/// # Examples
37///
38/// ```
39/// # use languagetool_rust::check::parse_language_code;
40/// assert!(parse_language_code("en").is_ok());
41///
42/// assert!(parse_language_code("en-US").is_ok());
43///
44/// assert!(parse_language_code("en-us").is_ok());
45///
46/// assert!(parse_language_code("ca-ES-valencia").is_ok());
47///
48/// assert!(parse_language_code("abcd").is_err());
49///
50/// assert!(parse_language_code("en_US").is_err());
51///
52/// assert!(parse_language_code("fr-french").is_err());
53///
54/// assert!(parse_language_code("some random text").is_err());
55/// ```
56#[cfg(feature = "cli")]
57pub fn parse_language_code(v: &str) -> Result<String> {
58    #[inline]
59    fn is_match(v: &str) -> bool {
60        let mut splits = v.split('-');
61
62        match splits.next() {
63            Some(s)
64                if (s.len() == 2 || s.len() == 3) && s.chars().all(|c| c.is_ascii_alphabetic()) => {
65            },
66            _ => return false,
67        }
68
69        match splits.next() {
70            Some(s) if s.len() != 2 || s.chars().any(|c| !c.is_ascii_alphabetic()) => return false,
71            Some(_) => (),
72            None => return true,
73        }
74        for s in splits {
75            if !s.chars().all(|c| c.is_ascii_alphabetic()) {
76                return false;
77            }
78        }
79        true
80    }
81
82    if v == "auto" || is_match(v) {
83        Ok(v.to_string())
84    } else {
85        Err(Error::InvalidValue(
86            "The value should be `\"auto\"` or match regex pattern: \
87             ^[a-zA-Z]{2,3}(-[a-zA-Z]{2}(-[a-zA-Z]+)*)?$"
88                .to_string(),
89        ))
90    }
91}
92
93/// Utility function to serialize a optional vector a strings
94/// into a comma separated list of strings.
95///
96/// This is required by reqwest's RequestBuilder, otherwise it
97/// will not work.
98pub(crate) fn serialize_option_vec_string<S>(
99    v: &Option<Vec<String>>,
100    serializer: S,
101) -> std::result::Result<S::Ok, S::Error>
102where
103    S: Serializer,
104{
105    match v {
106        Some(v) if v.len() == 1 => serializer.serialize_str(&v[0]),
107        Some(v) if v.len() > 1 => {
108            let size = v.iter().map(|s| s.len()).sum::<usize>() + v.len() - 1;
109            let mut string = String::with_capacity(size);
110
111            string.push_str(&v[0]);
112
113            for s in &v[1..] {
114                string.push(',');
115                string.push_str(s);
116            }
117
118            serializer.serialize_str(string.as_ref())
119        },
120        _ => serializer.serialize_none(),
121    }
122}
123
124#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
125#[non_exhaustive]
126#[serde(rename_all = "camelCase")]
127/// A portion of text to be checked.
128pub struct DataAnnotation {
129    /// If set, the markup will be interpreted as this.
130    #[serde(skip_serializing_if = "Option::is_none")]
131    pub interpret_as: Option<String>,
132    #[serde(skip_serializing_if = "Option::is_none")]
133    /// Text that should be treated as markup.
134    pub markup: Option<String>,
135    #[serde(skip_serializing_if = "Option::is_none")]
136    /// Text that should be treated as normal text.
137    pub text: Option<String>,
138}
139
140impl Default for DataAnnotation {
141    fn default() -> Self {
142        Self {
143            interpret_as: None,
144            markup: None,
145            text: Some(String::new()),
146        }
147    }
148}
149
150impl DataAnnotation {
151    /// Instantiate a new `DataAnnotation` with text only.
152    #[inline]
153    #[must_use]
154    pub fn new_text(text: String) -> Self {
155        Self {
156            interpret_as: None,
157            markup: None,
158            text: Some(text),
159        }
160    }
161
162    /// Instantiate a new `DataAnnotation` with markup only.
163    #[inline]
164    #[must_use]
165    pub fn new_markup(markup: String) -> Self {
166        Self {
167            interpret_as: None,
168            markup: Some(markup),
169            text: None,
170        }
171    }
172
173    /// Instantiate a new `DataAnnotation` with markup and its interpretation.
174    #[inline]
175    #[must_use]
176    pub fn new_interpreted_markup(markup: String, interpret_as: String) -> Self {
177        Self {
178            interpret_as: Some(interpret_as),
179            markup: Some(markup),
180            text: None,
181        }
182    }
183}
184
185#[cfg(test)]
186mod data_annotation_tests {
187
188    use crate::check::DataAnnotation;
189
190    #[test]
191    fn test_text() {
192        let da = DataAnnotation::new_text("Hello".to_string());
193
194        assert_eq!(da.text.unwrap(), "Hello".to_string());
195        assert!(da.markup.is_none());
196        assert!(da.interpret_as.is_none());
197    }
198
199    #[test]
200    fn test_markup() {
201        let da = DataAnnotation::new_markup("<a>Hello</a>".to_string());
202
203        assert!(da.text.is_none());
204        assert_eq!(da.markup.unwrap(), "<a>Hello</a>".to_string());
205        assert!(da.interpret_as.is_none());
206    }
207
208    #[test]
209    fn test_interpreted_markup() {
210        let da =
211            DataAnnotation::new_interpreted_markup("<a>Hello</a>".to_string(), "Hello".to_string());
212
213        assert!(da.text.is_none());
214        assert_eq!(da.markup.unwrap(), "<a>Hello</a>".to_string());
215        assert_eq!(da.interpret_as.unwrap(), "Hello".to_string());
216    }
217}
218
219/// Alternative text to be checked.
220#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq)]
221#[non_exhaustive]
222pub struct Data {
223    /// Vector of markup text, see [`DataAnnotation`].
224    pub annotation: Vec<DataAnnotation>,
225}
226
227impl<T: Into<DataAnnotation>> FromIterator<T> for Data {
228    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
229        let annotation = iter.into_iter().map(std::convert::Into::into).collect();
230        Data { annotation }
231    }
232}
233
234impl Serialize for Data {
235    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
236    where
237        S: serde::Serializer,
238    {
239        let mut map = std::collections::HashMap::new();
240        map.insert("annotation", &self.annotation);
241
242        serializer.serialize_str(&serde_json::to_string(&map).unwrap())
243    }
244}
245
246#[cfg(feature = "cli")]
247impl std::str::FromStr for Data {
248    type Err = Error;
249
250    fn from_str(s: &str) -> Result<Self> {
251        let v: Self = serde_json::from_str(s)?;
252        Ok(v)
253    }
254}
255
256/// Possible levels for additional rules.
257///
258/// Currently, `Level::Picky` adds additional rules
259/// with respect to `Level::Default`.
260#[derive(Clone, Default, Deserialize, Debug, PartialEq, Eq, Serialize)]
261#[cfg_attr(feature = "cli", derive(ValueEnum))]
262#[serde(rename_all = "lowercase")]
263#[non_exhaustive]
264pub enum Level {
265    /// Default level.
266    #[default]
267    Default,
268    /// Picky level.
269    Picky,
270}
271
272impl Level {
273    /// Return `true` if current level is the default one.
274    ///
275    /// # Examples
276    ///
277    /// ```
278    /// # use languagetool_rust::check::Level;
279    ///
280    /// let level: Level = Default::default();
281    ///
282    /// assert!(level.is_default());
283    /// ```
284    #[must_use]
285    pub fn is_default(&self) -> bool {
286        *self == Level::default()
287    }
288}
289
290/// Split a string into as few fragments as possible, where each fragment
291/// contains (if possible) a maximum of `n` characters. Pattern str `pat` is
292/// used for splitting.
293///
294/// # Examples
295///
296/// ```
297/// # use languagetool_rust::check::split_len;
298/// let s = "I have so many friends.
299/// They are very funny.
300/// I think I am very lucky to have them.
301/// One day, I will write them a poem.
302/// But, in the meantime, I write code.
303/// ";
304///
305/// let split = split_len(&s, 40, "\n");
306///
307/// assert_eq!(split.join(""), s);
308/// assert_eq!(
309///     split,
310///     vec![
311///         "I have so many friends.\n",
312///         "They are very funny.\n",
313///         "I think I am very lucky to have them.\n",
314///         "One day, I will write them a poem.\n",
315///         "But, in the meantime, I write code.\n"
316///     ]
317/// );
318///
319/// let split = split_len(&s, 80, "\n");
320///
321/// assert_eq!(
322///     split,
323///     vec![
324///         "I have so many friends.\nThey are very funny.\n",
325///         "I think I am very lucky to have them.\nOne day, I will write them a poem.\n",
326///         "But, in the meantime, I write code.\n"
327///     ]
328/// );
329///
330/// let s = "I have so many friends.
331/// They are very funny.
332/// I think I am very lucky to have them.
333///
334/// One day, I will write them a poem.
335/// But, in the meantime, I write code.
336/// ";
337///
338/// let split = split_len(&s, 80, "\n\n");
339///
340/// println!("{:?}", split);
341///
342/// assert_eq!(
343///     split,
344///     vec![
345///         "I have so many friends.\nThey are very funny.\nI think I am very lucky to have \
346///          them.\n\n",
347///         "One day, I will write them a poem.\nBut, in the meantime, I write code.\n"
348///     ]
349/// );
350/// ```
351#[must_use]
352pub fn split_len<'source>(s: &'source str, n: usize, pat: &str) -> Vec<&'source str> {
353    let mut vec: Vec<&'source str> = Vec::with_capacity(s.len() / n);
354    let mut splits = s.split_inclusive(pat);
355
356    let mut start = 0;
357    let mut i = 0;
358
359    if let Some(split) = splits.next() {
360        vec.push(split);
361    } else {
362        return Vec::new();
363    }
364
365    for split in splits {
366        let new_len = vec[i].len() + split.len();
367        if new_len < n {
368            vec[i] = &s[start..start + new_len];
369        } else {
370            vec.push(split);
371            start += vec[i].len();
372            i += 1;
373        }
374    }
375
376    vec
377}
378
379/// LanguageTool POST check request.
380///
381/// The main feature - check a text with LanguageTool for possible style and
382/// grammar issues.
383///
384/// The structure below tries to follow as closely as possible the JSON API
385/// described [here](https://languagetool.org/http-api/swagger-ui/#!/default/post_check).
386#[cfg_attr(feature = "cli", derive(Args))]
387#[derive(Clone, Deserialize, Debug, PartialEq, Eq, Serialize)]
388#[serde(rename_all = "camelCase")]
389#[non_exhaustive]
390pub struct CheckRequest {
391    /// The text to be checked. This or 'data' is required.
392    #[cfg_attr(
393        feature = "cli",
394        clap(short = 't', long, conflicts_with = "data", allow_hyphen_values(true))
395    )]
396    #[serde(skip_serializing_if = "Option::is_none")]
397    pub text: Option<String>,
398    /// The text to be checked, given as a JSON document that specifies what's
399    /// text and what's markup. This or 'text' is required.
400    ///
401    /// Markup will be ignored when looking for errors. Example text:
402    /// ```html
403    /// A <b>test</b>
404    /// ```
405    /// JSON for the example text:
406    /// ```json
407    /// {"annotation":[
408    ///  {"text": "A "},
409    ///  {"markup": "<b>"},
410    ///  {"text": "test"},
411    ///  {"markup": "</b>"}
412    /// ]}
413    /// ```
414    /// If you have markup that should be interpreted as whitespace, like `<p>`
415    /// in HTML, you can have it interpreted like this:
416    ///
417    /// ```json
418    /// {"markup": "<p>", "interpretAs": "\n\n"}
419    /// ```
420    /// The 'data' feature is not limited to HTML or XML, it can be used for any
421    /// kind of markup. Entities will need to be expanded in this input.
422    #[cfg_attr(feature = "cli", clap(short = 'd', long, conflicts_with = "text"))]
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub data: Option<Data>,
425    /// A language code like `en-US`, `de-DE`, `fr`, or `auto` to guess the
426    /// language automatically (see `preferredVariants` below).
427    ///
428    /// For languages with variants (English, German, Portuguese) spell checking
429    /// will only be activated when you specify the variant, e.g. `en-GB`
430    /// instead of just `en`.
431    #[cfg_attr(
432        all(feature = "cli", feature = "cli", feature = "cli"),
433        clap(
434            short = 'l',
435            long,
436            default_value = "auto",
437            value_parser = parse_language_code
438        )
439    )]
440    pub language: String,
441    /// Set to get Premium API access: Your username/email as used to log in at
442    /// languagetool.org.
443    #[cfg_attr(
444        feature = "cli",
445        clap(short = 'u', long, requires = "api_key", env = "LANGUAGETOOL_USERNAME")
446    )]
447    #[serde(skip_serializing_if = "Option::is_none")]
448    pub username: Option<String>,
449    /// Set to get Premium API access: [your API
450    /// key](https://languagetool.org/editor/settings/api).
451    #[cfg_attr(
452        feature = "cli",
453        clap(short = 'k', long, requires = "username", env = "LANGUAGETOOL_API_KEY")
454    )]
455    #[serde(skip_serializing_if = "Option::is_none")]
456    pub api_key: Option<String>,
457    /// Comma-separated list of dictionaries to include words from; uses special
458    /// default dictionary if this is unset.
459    #[cfg_attr(feature = "cli", clap(long))]
460    #[serde(serialize_with = "serialize_option_vec_string")]
461    pub dicts: Option<Vec<String>>,
462    /// A language code of the user's native language, enabling false friends
463    /// checks for some language pairs.
464    #[cfg_attr(feature = "cli", clap(long))]
465    #[serde(skip_serializing_if = "Option::is_none")]
466    pub mother_tongue: Option<String>,
467    /// Comma-separated list of preferred language variants.
468    ///
469    /// The language detector used with `language=auto` can detect e.g. English,
470    /// but it cannot decide whether British English or American English is
471    /// used. Thus this parameter can be used to specify the preferred variants
472    /// like `en-GB` and `de-AT`. Only available with `language=auto`. You
473    /// should set variants for at least German and English, as otherwise the
474    /// spell checking will not work for those, as no spelling dictionary can be
475    /// selected for just `en` or `de`.
476    #[cfg_attr(feature = "cli", clap(long, conflicts_with = "language"))]
477    #[serde(serialize_with = "serialize_option_vec_string")]
478    pub preferred_variants: Option<Vec<String>>,
479    /// IDs of rules to be enabled, comma-separated.
480    #[cfg_attr(feature = "cli", clap(long))]
481    #[serde(serialize_with = "serialize_option_vec_string")]
482    pub enabled_rules: Option<Vec<String>>,
483    /// IDs of rules to be disabled, comma-separated.
484    #[cfg_attr(feature = "cli", clap(long))]
485    #[serde(serialize_with = "serialize_option_vec_string")]
486    pub disabled_rules: Option<Vec<String>>,
487    /// IDs of categories to be enabled, comma-separated.
488    #[cfg_attr(feature = "cli", clap(long))]
489    #[serde(serialize_with = "serialize_option_vec_string")]
490    pub enabled_categories: Option<Vec<String>>,
491    /// IDs of categories to be disabled, comma-separated.
492    #[cfg_attr(feature = "cli", clap(long))]
493    #[serde(serialize_with = "serialize_option_vec_string")]
494    pub disabled_categories: Option<Vec<String>>,
495    /// If true, only the rules and categories whose IDs are specified with
496    /// `enabledRules` or `enabledCategories` are enabled.
497    #[cfg_attr(feature = "cli", clap(long))]
498    #[serde(skip_serializing_if = "is_false")]
499    pub enabled_only: bool,
500    /// If set to `picky`, additional rules will be activated, i.e. rules that
501    /// you might only find useful when checking formal text.
502    #[cfg_attr(
503        feature = "cli",
504        clap(long, default_value = "default", ignore_case = true, value_enum)
505    )]
506    #[serde(skip_serializing_if = "Level::is_default")]
507    pub level: Level,
508}
509
510impl Default for CheckRequest {
511    #[inline]
512    fn default() -> CheckRequest {
513        CheckRequest {
514            text: Default::default(),
515            data: Default::default(),
516            language: "auto".to_string(),
517            username: Default::default(),
518            api_key: Default::default(),
519            dicts: Default::default(),
520            mother_tongue: Default::default(),
521            preferred_variants: Default::default(),
522            enabled_rules: Default::default(),
523            disabled_rules: Default::default(),
524            enabled_categories: Default::default(),
525            disabled_categories: Default::default(),
526            enabled_only: Default::default(),
527            level: Default::default(),
528        }
529    }
530}
531
532#[inline]
533fn is_false(b: &bool) -> bool {
534    !(*b)
535}
536
537impl CheckRequest {
538    /// Set the text to be checked and remove potential data field.
539    #[must_use]
540    pub fn with_text(mut self, text: String) -> Self {
541        self.text = Some(text);
542        self.data = None;
543        self
544    }
545
546    /// Set the data to be checked and remove potential text field.
547    #[must_use]
548    pub fn with_data(mut self, data: Data) -> Self {
549        self.data = Some(data);
550        self.text = None;
551        self
552    }
553
554    /// Set the data (obtained from string) to be checked and remove potential
555    /// text field
556    pub fn with_data_str(self, data: &str) -> serde_json::Result<Self> {
557        Ok(self.with_data(serde_json::from_str(data)?))
558    }
559
560    /// Set the language of the text / data.
561    #[must_use]
562    pub fn with_language(mut self, language: String) -> Self {
563        self.language = language;
564        self
565    }
566
567    /// Return a copy of the text within the request.
568    ///
569    /// # Errors
570    ///
571    /// If both `self.text` and `self.data` are [`None`].
572    /// If any data annotation does not contain text or markup.
573    pub fn try_get_text(&self) -> Result<String> {
574        if let Some(ref text) = self.text {
575            Ok(text.clone())
576        } else if let Some(ref data) = self.data {
577            let mut text = String::new();
578            for da in data.annotation.iter() {
579                if let Some(ref t) = da.text {
580                    text.push_str(t.as_str());
581                } else if let Some(ref t) = da.markup {
582                    text.push_str(t.as_str());
583                } else {
584                    return Err(Error::InvalidDataAnnotation(
585                        "missing either text or markup field in {da:?}".to_string(),
586                    ));
587                }
588            }
589            Ok(text)
590        } else {
591            Err(Error::InvalidRequest(
592                "missing either text or data field".to_string(),
593            ))
594        }
595    }
596
597    /// Return a copy of the text within the request.
598    /// Call [`CheckRequest::try_get_text`] but panic on error.
599    ///
600    /// # Panics
601    ///
602    /// If both `self.text` and `self.data` are [`None`].
603    /// If any data annotation does not contain text or markup.
604    #[must_use]
605    pub fn get_text(&self) -> String {
606        self.try_get_text().unwrap()
607    }
608
609    /// Split this request into multiple, using [`split_len`] function to split
610    /// text.
611    ///
612    /// # Errors
613    ///
614    /// If `self.text` is none.
615    pub fn try_split(&self, n: usize, pat: &str) -> Result<Vec<Self>> {
616        let text = self
617            .text
618            .as_ref()
619            .ok_or(Error::InvalidRequest("missing text field".to_string()))?;
620
621        Ok(split_len(text.as_str(), n, pat)
622            .iter()
623            .map(|text_fragment| self.clone().with_text(text_fragment.to_string()))
624            .collect())
625    }
626
627    /// Split this request into multiple, using [`split_len`] function to split
628    /// text.
629    /// Call [`CheckRequest::try_split`] but panic on error.
630    ///
631    /// # Panics
632    ///
633    /// If `self.text` is none.
634    #[must_use]
635    pub fn split(&self, n: usize, pat: &str) -> Vec<Self> {
636        self.try_split(n, pat).unwrap()
637    }
638}
639
640/// Parse a string slice into a [`PathBuf`], and error if the file does not
641/// exist.
642#[cfg(feature = "cli")]
643fn parse_filename(s: &str) -> Result<PathBuf> {
644    let path_buf: PathBuf = s.parse().unwrap();
645
646    if path_buf.is_file() {
647        Ok(path_buf)
648    } else {
649        Err(Error::InvalidFilename(s.to_string()))
650    }
651}
652
653/// Check text using LanguageTool server.
654#[cfg(feature = "cli")]
655#[derive(Debug, Parser)]
656pub struct CheckCommand {
657    /// If present, raw JSON output will be printed instead of annotated text.
658    /// This has no effect if `--data` is used, because it is never
659    /// annotated.
660    #[cfg(feature = "cli")]
661    #[clap(short = 'r', long)]
662    pub raw: bool,
663    /// If present, more context (i.e., line number and line offset) will be
664    /// added to response.
665    #[clap(short = 'm', long, hide = true)]
666    #[deprecated(
667        since = "2.0.0",
668        note = "Do not use this, it is only kept for backwards compatibility with v1"
669    )]
670    pub more_context: bool,
671    /// Sets the maximum number of characters before splitting.
672    #[clap(long, default_value_t = 1500)]
673    pub max_length: usize,
674    /// If text is too long, will split on this pattern.
675    #[clap(long, default_value = "\n\n")]
676    pub split_pattern: String,
677    /// Max. number of suggestions kept. If negative, all suggestions are kept.
678    #[clap(long, default_value_t = 5, allow_negative_numbers = true)]
679    pub max_suggestions: isize,
680    /// Inner [`CheckRequest`].
681    #[command(flatten)]
682    pub request: CheckRequest,
683    /// Optional filenames from which input is read.
684    #[arg(conflicts_with_all(["text", "data"]), value_parser = parse_filename)]
685    pub filenames: Vec<PathBuf>,
686}
687
688#[cfg(test)]
689mod request_tests {
690
691    use crate::CheckRequest;
692
693    #[test]
694    fn test_with_text() {
695        let req = CheckRequest::default().with_text("hello".to_string());
696
697        assert_eq!(req.text.unwrap(), "hello".to_string());
698        assert!(req.data.is_none());
699    }
700
701    #[test]
702    fn test_with_data() {
703        let req = CheckRequest::default().with_text("hello".to_string());
704
705        assert_eq!(req.text.unwrap(), "hello".to_string());
706        assert!(req.data.is_none());
707    }
708}
709
710/// Reponses
711
712/// Detected language from check request.
713#[allow(clippy::derive_partial_eq_without_eq)]
714#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
715#[non_exhaustive]
716pub struct DetectedLanguage {
717    /// Language code, e.g., `"sk-SK"` for Slovak.
718    pub code: String,
719    /// Confidence level, from 0 to 1.
720    #[cfg(feature = "unstable")]
721    pub confidence: Option<f64>,
722    /// Language name, e.g., `"Slovak"`.
723    pub name: String,
724    /// Source (file) for the language detection.
725    #[cfg(feature = "unstable")]
726    pub source: Option<String>,
727}
728
729/// Language information in check response.
730#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
731#[serde(rename_all = "camelCase")]
732#[non_exhaustive]
733pub struct LanguageResponse {
734    /// Language code, e.g., `"sk-SK"` for Slovak.
735    pub code: String,
736    /// Detected language from provided request.
737    pub detected_language: DetectedLanguage,
738    /// Language name, e.g., `"Slovak"`.
739    pub name: String,
740}
741
742/// Match context in check response.
743#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
744#[non_exhaustive]
745pub struct Context {
746    /// Length of the match.
747    pub length: usize,
748    /// Char index at which the match starts.
749    pub offset: usize,
750    /// Contextual text around the match.
751    pub text: String,
752}
753
754/// More context, post-processed in check response.
755#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
756#[non_exhaustive]
757pub struct MoreContext {
758    /// Line number where match occured.
759    pub line_number: usize,
760    /// Char index at which the match starts on the current line.
761    pub line_offset: usize,
762}
763
764/// Possible replacement for a given match in check response.
765#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
766#[non_exhaustive]
767pub struct Replacement {
768    /// Possible replacement value.
769    pub value: String,
770}
771
772impl From<String> for Replacement {
773    fn from(value: String) -> Self {
774        Self { value }
775    }
776}
777
778impl From<&str> for Replacement {
779    fn from(value: &str) -> Self {
780        value.to_string().into()
781    }
782}
783
784/// A rule category.
785#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
786#[non_exhaustive]
787pub struct Category {
788    /// Category id.
789    pub id: String,
790    /// Category name.
791    pub name: String,
792}
793
794/// A possible url of a rule in a check response.
795#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
796#[non_exhaustive]
797pub struct Url {
798    /// Url value.
799    pub value: String,
800}
801
802/// The rule that was not satisfied in a given match.
803#[derive(Clone, PartialEq, Eq, Debug, Deserialize, Serialize)]
804#[serde(rename_all = "camelCase")]
805#[non_exhaustive]
806pub struct Rule {
807    /// Rule category.
808    pub category: Category,
809    /// Rule description.
810    pub description: String,
811    /// Rule id.
812    pub id: String,
813    /// Indicate if the rule is from the premium API.
814    #[cfg(feature = "unstable")]
815    pub is_premium: Option<bool>,
816    /// Issue type.
817    pub issue_type: String,
818    /// Rule source file.
819    #[cfg(feature = "unstable")]
820    pub source_file: Option<String>,
821    /// Rule sub id.
822    pub sub_id: Option<String>,
823    /// Rule list of urls.
824    pub urls: Option<Vec<Url>>,
825}
826
827/// Type of a given match.
828#[derive(PartialEq, Eq, Clone, Debug, Deserialize, Serialize)]
829#[serde(rename_all = "camelCase")]
830#[non_exhaustive]
831pub struct Type {
832    /// Type name.
833    pub type_name: String,
834}
835
836/// Grammatical error match.
837#[derive(PartialEq, Eq, Clone, Debug, Deserialize, Serialize)]
838#[serde(rename_all = "camelCase")]
839#[non_exhaustive]
840pub struct Match {
841    /// Match context.
842    pub context: Context,
843    /// Unknown: please fill a [PR](https://github.com/jeertmans/languagetool-rust/pulls) of your
844    /// know that this attribute is used for.
845    #[cfg(feature = "unstable")]
846    pub context_for_sure_match: isize,
847    /// Unknown: please fill a [PR](https://github.com/jeertmans/languagetool-rust/pulls) of your
848    /// know that this attribute is used for.
849    #[cfg(feature = "unstable")]
850    pub ignore_for_incomplete_sentence: bool,
851    /// Match length.
852    pub length: usize,
853    /// Error message.
854    pub message: String,
855    /// More context to match, post-processed using original text.
856    #[serde(skip_serializing_if = "Option::is_none")]
857    pub more_context: Option<MoreContext>,
858    /// Char index at which the match start.
859    pub offset: usize,
860    /// List of possible replacements (if applies).
861    pub replacements: Vec<Replacement>,
862    /// Match rule that was not satisfied.
863    pub rule: Rule,
864    /// Sentence in which the error was found.
865    pub sentence: String,
866    /// Short message about the error.
867    pub short_message: String,
868    /// Match type.
869    #[cfg(feature = "unstable")]
870    #[serde(rename = "type")]
871    pub type_: Type,
872}
873
874/// LanguageTool software details.
875#[derive(Clone, PartialEq, Eq, Debug, Deserialize, Serialize)]
876#[serde(rename_all = "camelCase")]
877#[non_exhaustive]
878pub struct Software {
879    /// LanguageTool API version.
880    pub api_version: usize,
881    /// Some information about build date.
882    pub build_date: String,
883    /// Name (should be `"LanguageTool"`).
884    pub name: String,
885    /// Tell whether the server uses premium API or not.
886    pub premium: bool,
887    /// Sentence that indicates if using premium API would find more errors.
888    #[cfg(feature = "unstable")]
889    pub premium_hint: Option<String>,
890    /// Unknown: please fill a [PR](https://github.com/jeertmans/languagetool-rust/pulls) of your
891    /// know that this attribute is used for.
892    pub status: String,
893    /// LanguageTool version.
894    pub version: String,
895}
896
897/// Warnings about check response.
898#[derive(Clone, PartialEq, Eq, Debug, Deserialize, Serialize)]
899#[serde(rename_all = "camelCase")]
900#[non_exhaustive]
901pub struct Warnings {
902    /// Indicate if results are incomplete.
903    pub incomplete_results: bool,
904}
905
906/// LanguageTool POST check response.
907#[derive(Clone, PartialEq, Debug, Deserialize, Serialize)]
908#[serde(rename_all = "camelCase")]
909#[non_exhaustive]
910pub struct CheckResponse {
911    /// Language information.
912    pub language: LanguageResponse,
913    /// List of error matches.
914    pub matches: Vec<Match>,
915    /// Ranges ([start, end]) of sentences.
916    #[cfg(feature = "unstable")]
917    pub sentence_ranges: Option<Vec<[usize; 2]>>,
918    /// LanguageTool software information.
919    pub software: Software,
920    /// Possible warnings.
921    #[cfg(feature = "unstable")]
922    pub warnings: Option<Warnings>,
923}
924
925impl CheckResponse {
926    /// Return an iterator over matches.
927    pub fn iter_matches(&self) -> std::slice::Iter<'_, Match> {
928        self.matches.iter()
929    }
930
931    /// Return an iterator over mutable matches.
932    pub fn iter_matches_mut(&mut self) -> std::slice::IterMut<'_, Match> {
933        self.matches.iter_mut()
934    }
935
936    /// Creates an annotated string from current response.
937    #[cfg(feature = "annotate")]
938    #[must_use]
939    pub fn annotate(&self, text: &str, origin: Option<&str>, color: bool) -> String {
940        if self.matches.is_empty() {
941            return "No error were found in provided text".to_string();
942        }
943        let replacements: Vec<_> = self
944            .matches
945            .iter()
946            .map(|m| {
947                m.replacements.iter().fold(String::new(), |mut acc, r| {
948                    if !acc.is_empty() {
949                        acc.push_str(", ");
950                    }
951                    acc.push_str(&r.value);
952                    acc
953                })
954            })
955            .collect();
956
957        let snippets = self.matches.iter().zip(replacements.iter()).map(|(m, r)| {
958            Snippet {
959                title: Some(Annotation {
960                    label: Some(&m.message),
961                    id: Some(&m.rule.id),
962                    annotation_type: AnnotationType::Error,
963                }),
964                footer: vec![],
965                slices: vec![Slice {
966                    source: &m.context.text,
967                    line_start: 1 + text.chars().take(m.offset).filter(|c| *c == '\n').count(),
968                    origin,
969                    fold: true,
970                    annotations: vec![
971                        SourceAnnotation {
972                            label: &m.rule.description,
973                            annotation_type: AnnotationType::Error,
974                            range: (m.context.offset, m.context.offset + m.context.length),
975                        },
976                        SourceAnnotation {
977                            label: r,
978                            annotation_type: AnnotationType::Help,
979                            range: (m.context.offset, m.context.offset + m.context.length),
980                        },
981                    ],
982                }],
983                opt: FormatOptions {
984                    color,
985                    ..Default::default()
986                },
987            }
988        });
989
990        let mut annotation = String::new();
991
992        for snippet in snippets {
993            if !annotation.is_empty() {
994                annotation.push('\n');
995            }
996            annotation.push_str(&DisplayList::from(snippet).to_string());
997        }
998        annotation
999    }
1000}
1001
1002/// Check response with additional context.
1003///
1004/// This structure exists to keep a link between a check response
1005/// and the original text that was checked.
1006#[derive(Debug, Clone, PartialEq)]
1007pub struct CheckResponseWithContext {
1008    /// Original text that was checked by LT.
1009    pub text: String,
1010    /// Check response.
1011    pub response: CheckResponse,
1012    /// Text's length.
1013    pub text_length: usize,
1014}
1015
1016impl CheckResponseWithContext {
1017    /// Bind a check response with its original text.
1018    #[must_use]
1019    pub fn new(text: String, response: CheckResponse) -> Self {
1020        let text_length = text.chars().count();
1021        Self {
1022            text,
1023            response,
1024            text_length,
1025        }
1026    }
1027
1028    /// Return an iterator over matches.
1029    pub fn iter_matches(&self) -> std::slice::Iter<'_, Match> {
1030        self.response.iter_matches()
1031    }
1032
1033    /// Return an iterator over mutable matches.
1034    pub fn iter_matches_mut(&mut self) -> std::slice::IterMut<'_, Match> {
1035        self.response.iter_matches_mut()
1036    }
1037
1038    /// Return an iterator over matches and correspondig line number and line
1039    /// offset.
1040    #[must_use]
1041    pub fn iter_match_positions(&self) -> MatchPositions<'_, std::slice::Iter<'_, Match>> {
1042        self.into()
1043    }
1044
1045    /// Append a check response to the current while
1046    /// adjusting the matches' offsets.
1047    ///
1048    /// This is especially useful when a text was split in multiple requests.
1049    #[must_use]
1050    pub fn append(mut self, mut other: Self) -> Self {
1051        let offset = self.text_length;
1052        for m in other.iter_matches_mut() {
1053            m.offset += offset;
1054        }
1055
1056        #[cfg(feature = "unstable")]
1057        if let Some(ref mut sr_other) = other.response.sentence_ranges {
1058            match self.response.sentence_ranges {
1059                Some(ref mut sr_self) => {
1060                    sr_self.append(sr_other);
1061                },
1062                None => {
1063                    std::mem::swap(
1064                        &mut self.response.sentence_ranges,
1065                        &mut other.response.sentence_ranges,
1066                    );
1067                },
1068            }
1069        }
1070
1071        self.response.matches.append(&mut other.response.matches);
1072        self.text.push_str(other.text.as_str());
1073        self.text_length += other.text_length;
1074        self
1075    }
1076}
1077
1078impl From<CheckResponseWithContext> for CheckResponse {
1079    #[allow(clippy::needless_borrow)]
1080    fn from(mut resp: CheckResponseWithContext) -> Self {
1081        let iter: MatchPositions<'_, std::slice::IterMut<'_, Match>> = (&mut resp).into();
1082
1083        for (line_number, line_offset, m) in iter {
1084            m.more_context = Some(MoreContext {
1085                line_number,
1086                line_offset,
1087            });
1088        }
1089        resp.response
1090    }
1091}
1092
1093/// Iterator over matches and their corresponding line number and line offset.
1094#[derive(Clone, Debug)]
1095pub struct MatchPositions<'source, T> {
1096    text_chars: std::str::Chars<'source>,
1097    matches: T,
1098    line_number: usize,
1099    line_offset: usize,
1100    offset: usize,
1101}
1102
1103impl<'source> From<&'source CheckResponseWithContext>
1104    for MatchPositions<'source, std::slice::Iter<'source, Match>>
1105{
1106    fn from(response: &'source CheckResponseWithContext) -> Self {
1107        MatchPositions {
1108            text_chars: response.text.chars(),
1109            matches: response.iter_matches(),
1110            line_number: 1,
1111            line_offset: 0,
1112            offset: 0,
1113        }
1114    }
1115}
1116
1117impl<'source> From<&'source mut CheckResponseWithContext>
1118    for MatchPositions<'source, std::slice::IterMut<'source, Match>>
1119{
1120    fn from(response: &'source mut CheckResponseWithContext) -> Self {
1121        MatchPositions {
1122            text_chars: response.text.chars(),
1123            matches: response.response.iter_matches_mut(),
1124            line_number: 1,
1125            line_offset: 0,
1126            offset: 0,
1127        }
1128    }
1129}
1130
1131impl<'source, T> MatchPositions<'source, T> {
1132    /// Set the line number to a give value.
1133    ///
1134    /// By default, the first line number is 1.
1135    pub fn set_line_number(mut self, line_number: usize) -> Self {
1136        self.line_number = line_number;
1137        self
1138    }
1139
1140    fn update_line_number_and_offset(&mut self, m: &Match) {
1141        let n = m.offset - self.offset;
1142        for _ in 0..n {
1143            match self.text_chars.next() {
1144                Some('\n') => {
1145                    self.line_number += 1;
1146                    self.line_offset = 0;
1147                },
1148                None => {
1149                    panic!(
1150                        "text is shorter than expected, are you sure this text was the one used \
1151                         for the check request?"
1152                    )
1153                },
1154                _ => self.line_offset += 1,
1155            }
1156        }
1157        self.offset = m.offset;
1158    }
1159}
1160
1161impl<'source> Iterator for MatchPositions<'source, std::slice::Iter<'source, Match>> {
1162    type Item = (usize, usize, &'source Match);
1163
1164    fn next(&mut self) -> Option<Self::Item> {
1165        if let Some(m) = self.matches.next() {
1166            self.update_line_number_and_offset(m);
1167            Some((self.line_number, self.line_offset, m))
1168        } else {
1169            None
1170        }
1171    }
1172}
1173
1174impl<'source> Iterator for MatchPositions<'source, std::slice::IterMut<'source, Match>> {
1175    type Item = (usize, usize, &'source mut Match);
1176
1177    fn next(&mut self) -> Option<Self::Item> {
1178        if let Some(m) = self.matches.next() {
1179            self.update_line_number_and_offset(m);
1180            Some((self.line_number, self.line_offset, m))
1181        } else {
1182            None
1183        }
1184    }
1185}
1186
1187#[cfg(test)]
1188mod tests {
1189    use super::*;
1190
1191    #[derive(Debug)]
1192    enum Token<'source> {
1193        Text(&'source str),
1194        Skip(&'source str),
1195    }
1196
1197    #[derive(Debug, Clone)]
1198    struct ParseTokenError;
1199
1200    impl<'source> From<&'source str> for Token<'source> {
1201        fn from(s: &'source str) -> Self {
1202            if s.chars().all(|c| c.is_ascii_alphabetic()) {
1203                Token::Text(s)
1204            } else {
1205                Token::Skip(s)
1206            }
1207        }
1208    }
1209
1210    impl<'source> From<Token<'source>> for DataAnnotation {
1211        fn from(token: Token<'source>) -> Self {
1212            match token {
1213                Token::Text(s) => DataAnnotation::new_text(s.to_string()),
1214                Token::Skip(s) => DataAnnotation::new_markup(s.to_string()),
1215            }
1216        }
1217    }
1218
1219    #[test]
1220    fn test_data_annotation() {
1221        let words: Vec<&str> = "My name is Q34XY".split(' ').collect();
1222        let data: Data = words.iter().map(|w| Token::from(*w)).collect();
1223
1224        let expected_data = Data {
1225            annotation: vec![
1226                DataAnnotation::new_text("My".to_string()),
1227                DataAnnotation::new_text("name".to_string()),
1228                DataAnnotation::new_text("is".to_string()),
1229                DataAnnotation::new_markup("Q34XY".to_string()),
1230            ],
1231        };
1232
1233        assert_eq!(data, expected_data);
1234    }
1235
1236    #[test]
1237    fn test_serialize_option_vec_string() {
1238        use serde::Serialize;
1239
1240        #[derive(Serialize)]
1241        struct Foo {
1242            #[serde(serialize_with = "serialize_option_vec_string")]
1243            values: Option<Vec<String>>,
1244        }
1245
1246        impl Foo {
1247            fn new<I, T>(values: I) -> Self
1248            where
1249                I: IntoIterator<Item = T>,
1250                T: ToString,
1251            {
1252                Self {
1253                    values: Some(values.into_iter().map(|v| v.to_string()).collect()),
1254                }
1255            }
1256            fn none() -> Self {
1257                Self { values: None }
1258            }
1259        }
1260
1261        let got = serde_json::to_string(&Foo::new(vec!["en-US", "de-DE"])).unwrap();
1262        assert_eq!(got, r#"{"values":"en-US,de-DE"}"#);
1263
1264        let got = serde_json::to_string(&Foo::new(vec!["en-US"])).unwrap();
1265        assert_eq!(got, r#"{"values":"en-US"}"#);
1266
1267        let got = serde_json::to_string(&Foo::new(Vec::<String>::new())).unwrap();
1268        assert_eq!(got, r#"{"values":null}"#);
1269
1270        let got = serde_json::to_string(&Foo::none()).unwrap();
1271        assert_eq!(got, r#"{"values":null}"#);
1272    }
1273}
languagetool_rust/check.rs

languagetool_rust/
check.rs