syntaxdot_encoders/depseq/
error.rs

1use std::fmt;
2
3use thiserror::Error;
4use udgraph::graph::{Node, Sentence};
5use udgraph::token::Token;
6
7/// Encoder errors.
8#[derive(Clone, Debug, Eq, Error, PartialEq)]
9pub enum EncodeError {
10    /// The token does not have a head.
11    MissingHead { token: usize, sent: Vec<String> },
12
13    /// The token's head does not have a part-of-speech.
14    MissingPos { sent: Vec<String>, token: usize },
15
16    /// The token does not have a dependency relation.
17    MissingRelation { token: usize, sent: Vec<String> },
18}
19
20impl EncodeError {
21    /// Construct `EncodeError::MissingHead` from a CoNLL-X graph.
22    ///
23    /// Construct an error. `token` is the node index for which the
24    /// error applies in `sentence`.
25    pub fn missing_head(token: usize, sentence: &Sentence) -> EncodeError {
26        EncodeError::MissingHead {
27            sent: Self::sentence_to_forms(sentence),
28            token: token - 1,
29        }
30    }
31
32    /// Construct `EncodeError::MissingPOS` from a CoNLL-X graph.
33    ///
34    /// Construct an error. `token` is the node index for which the
35    /// error applies in `sentence`.
36    pub fn missing_pos(token: usize, sentence: &Sentence) -> EncodeError {
37        EncodeError::MissingPos {
38            sent: Self::sentence_to_forms(sentence),
39            token: token - 1,
40        }
41    }
42
43    /// Construct `EncodeError::MissingRelation` from a CoNLL-X graph.
44    ///
45    /// Construct an error. `token` is the node index for which the
46    /// error applies in `sentence`.
47    pub fn missing_relation(token: usize, sentence: &Sentence) -> EncodeError {
48        EncodeError::MissingRelation {
49            sent: Self::sentence_to_forms(sentence),
50            token: token - 1,
51        }
52    }
53
54    fn format_bracketed(bracket_idx: usize, tokens: &[String]) -> String {
55        let mut tokens = tokens.to_owned();
56        tokens.insert(bracket_idx + 1, "]".to_string());
57        tokens.insert(bracket_idx, "[".to_string());
58
59        tokens.join(" ")
60    }
61
62    fn sentence_to_forms(sentence: &Sentence) -> Vec<String> {
63        sentence
64            .iter()
65            .filter_map(Node::token)
66            .map(Token::form)
67            .map(ToOwned::to_owned)
68            .collect()
69    }
70}
71
72impl fmt::Display for EncodeError {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        use EncodeError::*;
75
76        match self {
77            MissingHead { token, sent } => write!(
78                f,
79                "Token does not have a head:\n\n{}\n",
80                Self::format_bracketed(*token, sent),
81            ),
82            MissingPos { token, sent } => write!(
83                f,
84                "Head of token '{}' does not have a part-of-speech:\n\n{}\n",
85                sent[*token],
86                Self::format_bracketed(*token, sent),
87            ),
88            MissingRelation { token, sent } => write!(
89                f,
90                "Token does not have a dependency relation:\n\n{}\n",
91                Self::format_bracketed(*token, sent),
92            ),
93        }
94    }
95}
96
97/// Decoder errors.
98#[derive(Clone, Copy, Debug, Eq, Error, PartialEq)]
99pub(crate) enum DecodeError {
100    /// The head position is out of bounds.
101    #[error("position out of bounds")]
102    PositionOutOfBounds,
103
104    /// The head part-of-speech tag does not occur in the sentence.
105    #[error("unknown part-of-speech tag")]
106    InvalidPos,
107}