1use std::borrow::Cow;
4use std::collections::BTreeMap;
5use std::convert::TryFrom;
6use std::fmt;
7
8use itertools::Itertools;
9use std::ops::Deref;
10use udgraph::graph::{DepGraph, Node, Sentence};
11use udgraph::token::{Features, Misc};
12
13use crate::error::ParseError;
14
15#[derive(Debug, Eq, PartialEq)]
17pub struct ConlluFeatures<'a>(Cow<'a, Features>);
18
19impl<'a> ConlluFeatures<'a> {
20 pub fn borrowed(features: &'a Features) -> Self {
21 ConlluFeatures(Cow::Borrowed(features))
22 }
23
24 pub fn into_owned(self) -> Features {
25 self.0.into_owned()
26 }
27}
28
29impl ConlluFeatures<'static> {
30 pub fn owned(features: Features) -> Self {
31 ConlluFeatures(Cow::Owned(features))
32 }
33
34 fn parse_features(feature_string: impl AsRef<str>) -> Result<Self, ParseError> {
35 let mut features = BTreeMap::new();
36
37 if feature_string.as_ref() == "_" {
38 return Ok(ConlluFeatures(Cow::Owned(Features::new())));
39 }
40
41 for fv in feature_string.as_ref().split('|') {
42 let idx = fv.find('=').ok_or(ParseError::IncorrectFeatureField {
43 value: fv.to_owned(),
44 })?;
45
46 features.insert(fv[..idx].to_owned(), fv[idx + 1..].to_owned());
47 }
48
49 Ok(ConlluFeatures::owned(features.into()))
50 }
51}
52
53impl<'a> Deref for ConlluFeatures<'a> {
54 type Target = BTreeMap<String, String>;
55
56 fn deref(&self) -> &Self::Target {
57 &**self.0
58 }
59}
60
61impl<'a> fmt::Display for ConlluFeatures<'a> {
62 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63 if self.0.is_empty() {
64 write!(f, "_")
65 } else {
66 let features_str = self.0.iter().map(|(k, v)| format!("{}={}", k, v)).join("|");
67 write!(f, "{}", features_str)
68 }
69 }
70}
71
72impl TryFrom<&str> for ConlluFeatures<'static> {
73 type Error = ParseError;
74
75 fn try_from(feature_string: &str) -> Result<Self, Self::Error> {
76 Self::parse_features(feature_string)
77 }
78}
79
80pub struct ConlluMisc<'a>(Cow<'a, Misc>);
82
83impl<'a> ConlluMisc<'a> {
84 pub fn borrowed(misc: &'a Misc) -> Self {
85 ConlluMisc(Cow::Borrowed(misc))
86 }
87
88 pub fn into_owned(self) -> Misc {
89 self.0.into_owned()
90 }
91}
92
93impl ConlluMisc<'static> {
94 pub fn owned(misc: Misc) -> Self {
95 ConlluMisc(Cow::Owned(misc))
96 }
97
98 pub fn parse_misc(misc_string: impl AsRef<str>) -> BTreeMap<String, Option<String>> {
99 let mut features = BTreeMap::new();
100
101 for fv in misc_string.as_ref().split('|') {
102 let fv: &str = fv;
103 let (k, v) = fv
104 .find('=')
105 .map(|idx| (fv[..idx].to_owned(), Some(fv[idx + 1..].to_owned())))
106 .unwrap_or_else(|| (fv.to_owned(), None));
107 features.insert(k, v);
108 }
109
110 features
111 }
112}
113
114impl<'a> fmt::Display for ConlluMisc<'a> {
115 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
116 if self.0.is_empty() {
117 write!(f, "_")
118 } else {
119 let misc_str = self
120 .0
121 .iter()
122 .map(|(k, v)| match *v {
123 Some(ref v) => format!("{}={}", k, v),
124 None => k.to_owned(),
125 })
126 .join("|");
127 write!(f, "{}", misc_str)
128 }
129 }
130}
131
132impl From<&str> for ConlluMisc<'static> {
133 fn from(misc_string: &str) -> Self {
134 ConlluMisc::owned(Self::parse_misc(misc_string).into())
135 }
136}
137
138pub struct ConlluSentence<'a>(&'a Sentence);
140
141impl<'a> ConlluSentence<'a> {
142 pub fn borrowed(sentence: &'a Sentence) -> Self {
143 ConlluSentence(sentence)
144 }
145}
146
147impl<'a> fmt::Display for ConlluSentence<'a> {
148 fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
149 for comment in self.0.comments() {
150 writeln!(fmt, "{}", comment)?
151 }
152
153 for i in 1..self.0.len() {
154 let token = match self.0[i] {
155 Node::Token(ref token) => token,
156 Node::Root => unreachable!(),
157 };
158
159 let (head, head_rel) = triple_to_string(&self.0.dep_graph(), i);
160
161 writeln!(
162 fmt,
163 "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
164 i,
165 token.form(),
166 token.lemma().unwrap_or("_"),
167 token.upos().unwrap_or("_"),
168 token.xpos().unwrap_or("_"),
169 ConlluFeatures::borrowed(token.features()),
170 head.unwrap_or_else(|| "_".to_string()),
171 head_rel.unwrap_or_else(|| "_".to_string()),
172 token.deps().unwrap_or("_"),
173 ConlluMisc::borrowed(token.misc())
174 )?;
175 }
176
177 Ok(())
178 }
179}
180
181fn triple_to_string(g: &DepGraph, dependent: usize) -> (Option<String>, Option<String>) {
182 let head_triple = g.head(dependent);
184 let head = head_triple.as_ref().map(|t| t.head().to_string());
185 let head_rel = head_triple
186 .as_ref()
187 .map(|t| t.relation().unwrap_or("_").to_string());
188
189 (head, head_rel)
190}
191
192#[cfg(test)]
193mod tests {
194 use std::collections::BTreeMap;
195 use std::convert::TryFrom;
196 use std::iter::FromIterator;
197
198 use maplit::btreemap;
199 use udgraph::token::{Features, Token, TokenBuilder};
200
201 use crate::display::ConlluFeatures;
202 use crate::error::ParseError;
203
204 #[test]
205 fn features_from_iter_as_string() {
206 let feature_map = btreemap! {
207 "feature2" => "y",
208 "feature1" => "x"
209 };
210
211 let features = Features::from_iter(feature_map);
212 let features_string: String = ConlluFeatures::borrowed(&features).to_string();
213
214 assert_eq!(features_string, "feature1=x|feature2=y");
215
216 assert_eq!(ConlluFeatures::owned(Features::new()).to_string(), "_");
217 }
218
219 #[test]
220 fn features_with_colons() {
221 let f = "Some=feature=with|additional=colons";
222 let features = ConlluFeatures::try_from(f).unwrap();
223 let some = features.get("Some").unwrap();
224 assert_eq!(some, "feature=with");
225 let additional = features.get("additional").unwrap();
226 assert_eq!(additional, "colons");
227 }
228
229 #[test]
230 fn feature_without_value_results_in_error() {
231 assert_eq!(
232 ConlluFeatures::try_from("c=d|a"),
233 Err(ParseError::IncorrectFeatureField {
234 value: "a".to_string()
235 })
236 );
237 }
238
239 #[test]
240 fn eq_features_is_order_insensitive() {
241 let token1: Token = TokenBuilder::new("a")
242 .features(ConlluFeatures::try_from("a=b|c=d").unwrap().into_owned())
243 .into();
244 let token2 = TokenBuilder::new("a")
245 .features(ConlluFeatures::try_from("c=d|a=b").unwrap().into_owned())
246 .into();
247
248 assert_eq!(token1, token2);
249 }
250
251 #[test]
252 fn parse_empty_features() {
253 assert_eq!(
254 ConlluFeatures::try_from("_").unwrap().into_owned(),
255 Features::new()
256 );
257 }
258
259 #[test]
260 fn features() {
261 let tokens = token_with_features();
262 let features = features_correct();
263
264 for (token, correct) in tokens.iter().zip(features) {
265 let kv = &**token.features();
266 assert_eq!(&correct, kv);
267 }
268 }
269
270 fn token_with_features() -> Vec<Token> {
271 vec![
272 TokenBuilder::new("Gilles")
273 .lemma("Gilles")
274 .upos("N")
275 .xpos("NE")
276 .features(
277 ConlluFeatures::try_from("case=nominative|number=singular|gender=masculine")
278 .unwrap()
279 .into_owned(),
280 )
281 .into(),
282 TokenBuilder::new("Deleuze")
283 .lemma("Deleuze")
284 .upos("N")
285 .xpos("NE")
286 .features(
287 ConlluFeatures::try_from("case=nominative|number=singular|gender=masculine")
288 .unwrap()
289 .into_owned(),
290 )
291 .into(),
292 ]
293 }
294
295 fn features_correct() -> Vec<BTreeMap<String, String>> {
296 let mut correct = BTreeMap::new();
297 correct.insert("case".to_owned(), "nominative".to_owned());
298 correct.insert("number".to_owned(), "singular".to_owned());
299 correct.insert("gender".to_owned(), "masculine".to_owned());
300
301 vec![correct.clone(), correct]
302 }
303}