conllu/
display.rs

1//! Wrappers to display `udgraph` datastructures in CoNLL-U format.
2
3use std::borrow::Cow;
4use std::collections::BTreeMap;
5use std::convert::TryFrom;
6use std::fmt;
7
8use itertools::Itertools;
9use std::ops::Deref;
10use udgraph::graph::{DepGraph, Node, Sentence};
11use udgraph::token::{Features, Misc};
12
13use crate::error::ParseError;
14
15/// Wrapper to display/parse features in CoNLL-U format.
16#[derive(Debug, Eq, PartialEq)]
17pub struct ConlluFeatures<'a>(Cow<'a, Features>);
18
19impl<'a> ConlluFeatures<'a> {
20    pub fn borrowed(features: &'a Features) -> Self {
21        ConlluFeatures(Cow::Borrowed(features))
22    }
23
24    pub fn into_owned(self) -> Features {
25        self.0.into_owned()
26    }
27}
28
29impl ConlluFeatures<'static> {
30    pub fn owned(features: Features) -> Self {
31        ConlluFeatures(Cow::Owned(features))
32    }
33
34    fn parse_features(feature_string: impl AsRef<str>) -> Result<Self, ParseError> {
35        let mut features = BTreeMap::new();
36
37        if feature_string.as_ref() == "_" {
38            return Ok(ConlluFeatures(Cow::Owned(Features::new())));
39        }
40
41        for fv in feature_string.as_ref().split('|') {
42            let idx = fv.find('=').ok_or(ParseError::IncorrectFeatureField {
43                value: fv.to_owned(),
44            })?;
45
46            features.insert(fv[..idx].to_owned(), fv[idx + 1..].to_owned());
47        }
48
49        Ok(ConlluFeatures::owned(features.into()))
50    }
51}
52
53impl<'a> Deref for ConlluFeatures<'a> {
54    type Target = BTreeMap<String, String>;
55
56    fn deref(&self) -> &Self::Target {
57        &**self.0
58    }
59}
60
61impl<'a> fmt::Display for ConlluFeatures<'a> {
62    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63        if self.0.is_empty() {
64            write!(f, "_")
65        } else {
66            let features_str = self.0.iter().map(|(k, v)| format!("{}={}", k, v)).join("|");
67            write!(f, "{}", features_str)
68        }
69    }
70}
71
72impl TryFrom<&str> for ConlluFeatures<'static> {
73    type Error = ParseError;
74
75    fn try_from(feature_string: &str) -> Result<Self, Self::Error> {
76        Self::parse_features(feature_string)
77    }
78}
79
80/// Wrapper to display/parse miscellaneous features in CoNLL-U format.
81pub struct ConlluMisc<'a>(Cow<'a, Misc>);
82
83impl<'a> ConlluMisc<'a> {
84    pub fn borrowed(misc: &'a Misc) -> Self {
85        ConlluMisc(Cow::Borrowed(misc))
86    }
87
88    pub fn into_owned(self) -> Misc {
89        self.0.into_owned()
90    }
91}
92
93impl ConlluMisc<'static> {
94    pub fn owned(misc: Misc) -> Self {
95        ConlluMisc(Cow::Owned(misc))
96    }
97
98    pub fn parse_misc(misc_string: impl AsRef<str>) -> BTreeMap<String, Option<String>> {
99        let mut features = BTreeMap::new();
100
101        for fv in misc_string.as_ref().split('|') {
102            let fv: &str = fv;
103            let (k, v) = fv
104                .find('=')
105                .map(|idx| (fv[..idx].to_owned(), Some(fv[idx + 1..].to_owned())))
106                .unwrap_or_else(|| (fv.to_owned(), None));
107            features.insert(k, v);
108        }
109
110        features
111    }
112}
113
114impl<'a> fmt::Display for ConlluMisc<'a> {
115    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
116        if self.0.is_empty() {
117            write!(f, "_")
118        } else {
119            let misc_str = self
120                .0
121                .iter()
122                .map(|(k, v)| match *v {
123                    Some(ref v) => format!("{}={}", k, v),
124                    None => k.to_owned(),
125                })
126                .join("|");
127            write!(f, "{}", misc_str)
128        }
129    }
130}
131
132impl From<&str> for ConlluMisc<'static> {
133    fn from(misc_string: &str) -> Self {
134        ConlluMisc::owned(Self::parse_misc(misc_string).into())
135    }
136}
137
138/// Wrapper to display/parse a sentence graph in CoNLL-U format.
139pub struct ConlluSentence<'a>(&'a Sentence);
140
141impl<'a> ConlluSentence<'a> {
142    pub fn borrowed(sentence: &'a Sentence) -> Self {
143        ConlluSentence(sentence)
144    }
145}
146
147impl<'a> fmt::Display for ConlluSentence<'a> {
148    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
149        for comment in self.0.comments() {
150            writeln!(fmt, "{}", comment)?
151        }
152
153        for i in 1..self.0.len() {
154            let token = match self.0[i] {
155                Node::Token(ref token) => token,
156                Node::Root => unreachable!(),
157            };
158
159            let (head, head_rel) = triple_to_string(&self.0.dep_graph(), i);
160
161            writeln!(
162                fmt,
163                "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
164                i,
165                token.form(),
166                token.lemma().unwrap_or("_"),
167                token.upos().unwrap_or("_"),
168                token.xpos().unwrap_or("_"),
169                ConlluFeatures::borrowed(token.features()),
170                head.unwrap_or_else(|| "_".to_string()),
171                head_rel.unwrap_or_else(|| "_".to_string()),
172                token.deps().unwrap_or("_"),
173                ConlluMisc::borrowed(token.misc())
174            )?;
175        }
176
177        Ok(())
178    }
179}
180
181fn triple_to_string(g: &DepGraph, dependent: usize) -> (Option<String>, Option<String>) {
182    //  XXX:return string reference for relation.
183    let head_triple = g.head(dependent);
184    let head = head_triple.as_ref().map(|t| t.head().to_string());
185    let head_rel = head_triple
186        .as_ref()
187        .map(|t| t.relation().unwrap_or("_").to_string());
188
189    (head, head_rel)
190}
191
192#[cfg(test)]
193mod tests {
194    use std::collections::BTreeMap;
195    use std::convert::TryFrom;
196    use std::iter::FromIterator;
197
198    use maplit::btreemap;
199    use udgraph::token::{Features, Token, TokenBuilder};
200
201    use crate::display::ConlluFeatures;
202    use crate::error::ParseError;
203
204    #[test]
205    fn features_from_iter_as_string() {
206        let feature_map = btreemap! {
207            "feature2" => "y",
208            "feature1" => "x"
209        };
210
211        let features = Features::from_iter(feature_map);
212        let features_string: String = ConlluFeatures::borrowed(&features).to_string();
213
214        assert_eq!(features_string, "feature1=x|feature2=y");
215
216        assert_eq!(ConlluFeatures::owned(Features::new()).to_string(), "_");
217    }
218
219    #[test]
220    fn features_with_colons() {
221        let f = "Some=feature=with|additional=colons";
222        let features = ConlluFeatures::try_from(f).unwrap();
223        let some = features.get("Some").unwrap();
224        assert_eq!(some, "feature=with");
225        let additional = features.get("additional").unwrap();
226        assert_eq!(additional, "colons");
227    }
228
229    #[test]
230    fn feature_without_value_results_in_error() {
231        assert_eq!(
232            ConlluFeatures::try_from("c=d|a"),
233            Err(ParseError::IncorrectFeatureField {
234                value: "a".to_string()
235            })
236        );
237    }
238
239    #[test]
240    fn eq_features_is_order_insensitive() {
241        let token1: Token = TokenBuilder::new("a")
242            .features(ConlluFeatures::try_from("a=b|c=d").unwrap().into_owned())
243            .into();
244        let token2 = TokenBuilder::new("a")
245            .features(ConlluFeatures::try_from("c=d|a=b").unwrap().into_owned())
246            .into();
247
248        assert_eq!(token1, token2);
249    }
250
251    #[test]
252    fn parse_empty_features() {
253        assert_eq!(
254            ConlluFeatures::try_from("_").unwrap().into_owned(),
255            Features::new()
256        );
257    }
258
259    #[test]
260    fn features() {
261        let tokens = token_with_features();
262        let features = features_correct();
263
264        for (token, correct) in tokens.iter().zip(features) {
265            let kv = &**token.features();
266            assert_eq!(&correct, kv);
267        }
268    }
269
270    fn token_with_features() -> Vec<Token> {
271        vec![
272            TokenBuilder::new("Gilles")
273                .lemma("Gilles")
274                .upos("N")
275                .xpos("NE")
276                .features(
277                    ConlluFeatures::try_from("case=nominative|number=singular|gender=masculine")
278                        .unwrap()
279                        .into_owned(),
280                )
281                .into(),
282            TokenBuilder::new("Deleuze")
283                .lemma("Deleuze")
284                .upos("N")
285                .xpos("NE")
286                .features(
287                    ConlluFeatures::try_from("case=nominative|number=singular|gender=masculine")
288                        .unwrap()
289                        .into_owned(),
290                )
291                .into(),
292        ]
293    }
294
295    fn features_correct() -> Vec<BTreeMap<String, String>> {
296        let mut correct = BTreeMap::new();
297        correct.insert("case".to_owned(), "nominative".to_owned());
298        correct.insert("number".to_owned(), "singular".to_owned());
299        correct.insert("gender".to_owned(), "masculine".to_owned());
300
301        vec![correct.clone(), correct]
302    }
303}