languagetool_rust/api/check/
data_annotations.rs1use crate::error::{Error, Result};
4
5use std::{borrow::Cow, mem};
6
7use lifetime::IntoStatic;
8use serde::{Deserialize, Serialize};
9
10#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize, Hash, IntoStatic)]
12#[non_exhaustive]
13#[serde(rename_all = "camelCase")]
14pub struct DataAnnotation<'source> {
15 #[serde(skip_serializing_if = "Option::is_none")]
19 pub text: Option<Cow<'source, str>>,
20 #[serde(skip_serializing_if = "Option::is_none")]
24 pub markup: Option<Cow<'source, str>>,
25 #[serde(skip_serializing_if = "Option::is_none")]
27 pub interpret_as: Option<Cow<'source, str>>,
28}
29
30impl<'source> DataAnnotation<'source> {
31 #[inline]
33 #[must_use]
34 pub fn new_text<T: Into<Cow<'source, str>>>(text: T) -> Self {
35 Self {
36 text: Some(text.into()),
37 markup: None,
38 interpret_as: None,
39 }
40 }
41
42 #[inline]
44 #[must_use]
45 pub fn new_markup<M: Into<Cow<'source, str>>>(markup: M) -> Self {
46 Self {
47 text: None,
48 markup: Some(markup.into()),
49 interpret_as: None,
50 }
51 }
52
53 #[inline]
55 #[must_use]
56 pub fn new_interpreted_markup<M: Into<Cow<'source, str>>, I: Into<Cow<'source, str>>>(
57 markup: M,
58 interpret_as: I,
59 ) -> Self {
60 Self {
61 interpret_as: Some(interpret_as.into()),
62 markup: Some(markup.into()),
63 text: None,
64 }
65 }
66
67 pub fn try_get_text(&self) -> Result<Cow<'source, str>> {
73 if let Some(ref text) = self.text {
74 Ok(text.clone())
75 } else if let Some(ref markup) = self.markup {
76 Ok(markup.clone())
77 } else {
78 Err(Error::InvalidDataAnnotation(format!(
79 "missing either text or markup field in {self:?}"
80 )))
81 }
82 }
83}
84
85#[cfg(test)]
86mod data_annotation_tests {
87
88 use super::DataAnnotation;
89
90 #[test]
91 fn test_text() {
92 let da = DataAnnotation::new_text("Hello");
93
94 assert_eq!(da.text.unwrap(), "Hello");
95 assert!(da.markup.is_none());
96 assert!(da.interpret_as.is_none());
97 }
98
99 #[test]
100 fn test_markup() {
101 let da = DataAnnotation::new_markup("<a>Hello</a>");
102
103 assert!(da.text.is_none());
104 assert_eq!(da.markup.unwrap(), "<a>Hello</a>");
105 assert!(da.interpret_as.is_none());
106 }
107
108 #[test]
109 fn test_interpreted_markup() {
110 let da = DataAnnotation::new_interpreted_markup("<a>Hello</a>", "Hello");
111
112 assert!(da.text.is_none());
113 assert_eq!(da.markup.unwrap(), "<a>Hello</a>");
114 assert_eq!(da.interpret_as.unwrap(), "Hello");
115 }
116}
117
118#[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Hash)]
120#[non_exhaustive]
121pub struct Data<'source> {
122 pub annotation: Vec<DataAnnotation<'source>>,
124}
125
126impl Data<'_> {
127 #[must_use]
133 pub fn split(self, n: usize, pat: &str) -> Vec<Self> {
134 let mut break_point_lengths = vec![];
137 let mut len = 0;
138 for (i, ann) in self.annotation.iter().enumerate() {
139 len +=
140 ann.text.as_deref().unwrap_or("").len() + ann.markup.as_deref().unwrap_or("").len();
141 if ann.text.as_ref().is_some_and(|t| t.contains(pat)) {
142 break_point_lengths.push((i, len));
143 }
144 }
145
146 let mut break_points: Vec<usize> = vec![];
148 if break_point_lengths.len() > 1 {
149 let (mut i, mut ii) = (0, 1);
150 let (mut base, mut curr) = (0, 0);
151 while ii < break_point_lengths.len() {
152 curr += break_point_lengths[i].1 - base;
153
154 if break_point_lengths[ii].1 - base + curr > n {
155 break_points.push(break_point_lengths[i].0);
156 base = break_point_lengths[i].1;
157 curr = 0;
158 }
159
160 i += 1;
161 ii += 1;
162 }
163 }
164
165 let mut split = Vec::with_capacity(break_points.len());
167 let mut iter = self.into_iter();
168 let mut taken = 0;
169 let mut annotations = vec![];
170 for break_point in break_points {
171 while taken != break_point + 1 {
172 annotations.push(iter.next().unwrap());
173 taken += 1;
174 }
175 split.push(Data::from_iter(mem::take(&mut annotations)));
176 }
177
178 split
179 }
180}
181
182impl IntoStatic for Data<'_> {
183 type Static = Data<'static>;
184 fn into_static(self) -> Self::Static {
185 Data {
186 annotation: self
187 .annotation
188 .into_iter()
189 .map(IntoStatic::into_static)
190 .collect(),
191 }
192 }
193}
194
195impl<'source, T: Into<DataAnnotation<'source>>> FromIterator<T> for Data<'source> {
196 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
197 let annotation = iter.into_iter().map(std::convert::Into::into).collect();
198 Data { annotation }
199 }
200}
201
202impl<'source> IntoIterator for Data<'source> {
203 type Item = DataAnnotation<'source>;
204 type IntoIter = std::vec::IntoIter<Self::Item>;
205
206 fn into_iter(self) -> Self::IntoIter {
207 self.annotation.into_iter()
208 }
209}
210
211impl Serialize for Data<'_> {
212 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
213 where
214 S: serde::Serializer,
215 {
216 let mut map = std::collections::HashMap::new();
217 map.insert("annotation", &self.annotation);
218
219 serializer.serialize_str(&serde_json::to_string(&map).unwrap())
220 }
221}
222
223#[cfg(feature = "cli")]
224impl std::str::FromStr for Data<'_> {
225 type Err = Error;
226
227 fn from_str(s: &str) -> Result<Self> {
228 let v: Self = serde_json::from_str(s)?;
229 Ok(v)
230 }
231}
232
233#[cfg(test)]
234mod tests {
235 use std::borrow::Cow;
236
237 use super::super::{Data, DataAnnotation};
238
239 #[derive(Debug)]
240 enum Token<'source> {
241 Text(&'source str),
242 Skip(&'source str),
243 }
244
245 impl<'source> From<&'source str> for Token<'source> {
246 fn from(s: &'source str) -> Self {
247 if s.chars().all(|c| c.is_ascii_alphabetic()) {
248 Token::Text(s)
249 } else {
250 Token::Skip(s)
251 }
252 }
253 }
254
255 impl<'source> From<Token<'source>> for DataAnnotation<'source> {
256 fn from(token: Token<'source>) -> Self {
257 match token {
258 Token::Text(s) => DataAnnotation::new_text(s),
259 Token::Skip(s) => DataAnnotation::new_markup(s),
260 }
261 }
262 }
263
264 #[test]
265 fn test_data_annotation() {
266 let words: Vec<&str> = "My name is Q34XY".split(' ').collect();
267 let data: Data = words.iter().map(|w| Token::from(*w)).collect();
268
269 let expected_data = Data {
270 annotation: vec![
271 DataAnnotation::new_text("My"),
272 DataAnnotation::new_text("name"),
273 DataAnnotation::new_text("is"),
274 DataAnnotation::new_markup("Q34XY"),
275 ],
276 };
277
278 assert_eq!(data, expected_data);
279 }
280
281 #[test]
282 fn test_try_get_text() {
283 const TEXT: &str = "Lorem Ipsum";
284 assert_eq!(
285 DataAnnotation::new_text(TEXT).try_get_text().unwrap(),
286 Cow::from(TEXT)
287 );
288 assert_eq!(
289 DataAnnotation::new_markup(TEXT).try_get_text().unwrap(),
290 Cow::from(TEXT)
291 );
292 assert!((DataAnnotation {
293 text: None,
294 markup: None,
295 interpret_as: None
296 })
297 .try_get_text()
298 .is_err());
299 }
300}