1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#![deny(missing_docs)]
#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]

//! HTS-style full-context label question parser and matcher.
//!
//! The main structure for parsing and matching is [`AllQuestion`].
//! It can parse most patterns, but it cannot parse some of them.
//! For details, please see [Condition for parsing as AllQuestion].
//!
//! [Condition for parsing as AllQuestion]: #condition-for-parsing-as-allquestion
//!
//! ```rust
//! # use std::error::Error;
//! use jlabel::Label;
//! use jlabel_question::{AllQuestion, QuestionMatcher};
//!
//! use std::str::FromStr;
//!
//! # fn main() -> Result<(), Box<dyn Error>> {
//! let question = AllQuestion::parse(&["*/A:-??+*", "*/A:-?+*"])?;
//! let label_str = concat!(
//!     "sil^n-i+h=o",
//!     "/A:-3+1+7",
//!     "/B:xx-xx_xx",
//!     "/C:02_xx+xx",
//!     "/D:02+xx_xx",
//!     "/E:xx_xx!xx_xx-xx",
//!     "/F:7_4#0_xx@1_3|1_12",
//!     "/G:4_4%0_xx_1",
//!     "/H:xx_xx",
//!     "/I:3-12@1+2&1-8|1+41",
//!     "/J:5_29",
//!     "/K:2+8-41"
//! );
//! assert!(question.test(&label_str.parse()?));
//! #
//! #     Ok(())
//! # }
//! ```
//!
//! ## Condition for parsing as [`AllQuestion`]
//!
//! Here is the necessary condition for the pattern to succeed in parsing as [`AllQuestion`],
//! but some questions may not succeed even if they fulfill these requirements.
//!
//! - The patterns must be valid as htsvoice question pattern.
//!   - Using `*` and `?` as wildcard, matches the entire full-context label.
//!   - The pattern that cannot match full-context label in any situation (e.g. `*/A:-?????+*`) are not allowed.
//!   - Minus sign (`-`) in numerical field can only be used in the first element of `A` (`A1`).
//! - All the patterns must be about the same position
//!   - e.g. The first pattern is about the first element of Phoneme, the second pattern is about the last element of field `J`, is *not* allowed.
//! - Each pattern must *not* have conditions on two or more positions.
//! - When the pattern is about position of numerical field (except for categorical field such as `B`, `C`, or `D`),
//!   - The pattern must be continuous.
//!
//! ## Fallback
//!
//! As [`AllQuestion`] parsing does not always suceed (even if the pattern is correct),
//! you may need to write fallback for that.
//!
//! If you just want to ignore those pattern, you can simply return `false` instead of the result of `test()`.
//!
//! If you need to successfully parse pattern which [`AllQuestion`] fails to parse,
//! [`regex::RegexQuestion`] is the best choice.
//!
//! ```rust
//! # #[cfg(feature = "regex")]
//! # {
//! use jlabel::Label;
//! use jlabel_question::{regex::RegexQuestion, AllQuestion, ParseError, QuestionMatcher};
//!
//! enum Pattern {
//!     AllQustion(AllQuestion),
//!     Regex(RegexQuestion),
//! }
//! impl Pattern {
//!     fn parse(patterns: &[&str]) -> Result<Self, ParseError> {
//!         match AllQuestion::parse(patterns) {
//!             Ok(question) => Ok(Self::AllQustion(question)),
//!             Err(_) => Ok(Self::Regex(RegexQuestion::parse(patterns)?)),
//!         }
//!     }
//!     fn test(&self, label: &Label) -> bool {
//!         match self {
//!             Self::AllQustion(question) => question.test(label),
//!             Self::Regex(question) => question.test(label),
//!         }
//!     }
//! }
//! # }
//! ```

pub mod parse_position;
pub mod position;

#[cfg(feature = "regex")]
pub mod regex;

use std::num::ParseIntError;

use position::{
    AllPosition, BooleanPosition, CategoryPosition, PhonePosition, Position, SignedRangePosition,
    UndefinedPotision, UnsignedRangePosition,
};

use jlabel::Label;
use parse_position::{estimate_position, PositionError};

/// Errors from jlabel-question.
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum ParseError {
    /// Some patterns are pointing at position different from
    /// which the first pattern is pointing at.
    #[error("Position mismatch")]
    PositionMismatch,

    /// The pattern failed to parse.
    #[error("Invalid position")]
    InvalidPosition(#[from] PositionError),

    /// The pattern or range is empty, so jlabel-question cannot parse it.
    #[error("Empty patterns or range")]
    Empty,

    /// The range is incontinuous or not arranged in ascending order.
    #[error("Incontinuous range")]
    IncontinuousRange,

    /// Failed to parse integer field in a pattern containing wildcard.
    /// This might result from incorrect number of wildcards.
    #[error("Failed wildcard: {0}")]
    FailWildcard(ParseIntError),

    /// Failed to parse integer field in a pattern without wildcard.
    /// This might result from incorrect position of wildcard such as `1?2`.
    #[error("Failed literal: {0}")]
    FailLiteral(ParseIntError),

    /// Failed to parse boolean field.
    /// Boolean fields must be either `0` or `1` (except for `xx` which means empty).
    #[error("Invalid boolean: {0}")]
    InvalidBoolean(String),

    #[cfg(feature = "regex")]
    /// Failed to build regex parser from the pattern.
    #[error("Failed regex")]
    FailRegex,
}

macro_rules! match_position {
    ($position:expr, $ranges:expr, [$($name:ident),*]) => {
        match $position {
            $(
                AllPosition::$name(position) => Ok(AllQuestion::$name(Question::new(position, $ranges)?)),
            )*
        }
    };
}

/// Parses the question, and tests it aganinst given full-context label.
pub trait QuestionMatcher
where
    Self: Sized,
{
    /// Parses question patterns in string, and if succeeds, returns the parsed question.
    fn parse(patterns: &[&str]) -> Result<Self, ParseError>;

    /// Checks if the full-context label matches the question.
    ///
    /// If you want to `test` on string label, parse it using `Label::from_str()` beforehand.
    fn test(&self, label: &Label) -> bool;
}

/// A main structure representing question.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AllQuestion {
    /// Question about phone fields of full-context label
    Phone(Question<PhonePosition>),
    /// Question about signed integer fields of full-context label
    SignedRange(Question<SignedRangePosition>),
    /// Question about unsigned integer fields of full-context label
    UnsignedRange(Question<UnsignedRangePosition>),
    /// Question about boolean fields of full-context label
    Boolean(Question<BooleanPosition>),
    /// Question about numerical categorical fields of full-context label
    Category(Question<CategoryPosition>),
    /// Question about undefined (always `xx`) fields of full-context label
    Undefined(Question<UndefinedPotision>),
}

impl QuestionMatcher for AllQuestion {
    fn parse(patterns: &[&str]) -> Result<Self, ParseError> {
        let mut position = None;
        let mut ranges = Vec::with_capacity(patterns.len());

        for pattern in patterns {
            let (pos, range) = estimate_position(pattern)?;

            if let Some(position) = position {
                if pos != position {
                    return Err(ParseError::PositionMismatch);
                }
            } else {
                position = Some(pos);
            }

            ranges.push(range);
        }

        match_position!(
            position.ok_or(ParseError::Empty)?,
            &ranges,
            [
                Phone,
                SignedRange,
                UnsignedRange,
                Boolean,
                Category,
                Undefined
            ]
        )
    }
    fn test(&self, label: &Label) -> bool {
        match self {
            Self::Phone(q) => q.test(label),
            Self::SignedRange(q) => q.test(label),
            Self::UnsignedRange(q) => q.test(label),
            Self::Boolean(q) => q.test(label),
            Self::Category(q) => q.test(label),
            Self::Undefined(q) => q.test(label),
        }
    }
}

/// An inner structure representing a pair of position and range.
///
/// Used in variants of [`AllQuestion`]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Question<P: Position> {
    /// The position this question matches to.
    pub position: P,
    /// The parsed range
    pub range: Option<P::Range>,
}

impl<P: Position> Question<P> {
    /// Parse question pattern
    pub fn new(position: P, ranges: &[&str]) -> Result<Self, ParseError> {
        match ranges {
            ["xx"] => Ok(Self {
                range: None,
                position,
            }),
            ranges => Ok(Self {
                range: Some(position.range(ranges)?),
                position,
            }),
        }
    }

    /// Check if this question matches
    pub fn test(&self, label: &Label) -> bool {
        match (&self.range, self.position.get(label)) {
            (Some(range), Some(target)) => self.position.test(range, target),
            (None, None) => true,
            _ => false,
        }
    }
}

#[cfg(test)]
mod tests;