jlabel_question/lib.rs
1#![deny(missing_docs)]
2#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
3
4//! HTS-style full-context label question parser and matcher.
5//!
6//! The main structure for parsing and matching is [`AllQuestion`].
7//! It can parse most patterns, but it cannot parse some of them.
8//! For details, please see [Condition for parsing as AllQuestion].
9//!
10//! [Condition for parsing as AllQuestion]: #condition-for-parsing-as-allquestion
11//!
12//! ```rust
13//! # use std::error::Error;
14//! use jlabel::Label;
15//! use jlabel_question::{AllQuestion, QuestionMatcher};
16//!
17//! use std::str::FromStr;
18//!
19//! # fn main() -> Result<(), Box<dyn Error>> {
20//! let question = AllQuestion::parse(&["*/A:-??+*", "*/A:-?+*"])?;
21//! let label_str = concat!(
22//! "sil^n-i+h=o",
23//! "/A:-3+1+7",
24//! "/B:xx-xx_xx",
25//! "/C:02_xx+xx",
26//! "/D:02+xx_xx",
27//! "/E:xx_xx!xx_xx-xx",
28//! "/F:7_4#0_xx@1_3|1_12",
29//! "/G:4_4%0_xx_1",
30//! "/H:xx_xx",
31//! "/I:3-12@1+2&1-8|1+41",
32//! "/J:5_29",
33//! "/K:2+8-41"
34//! );
35//! assert!(question.test(&label_str.parse()?));
36//! #
37//! # Ok(())
38//! # }
39//! ```
40//!
41//! ## Condition for parsing as [`AllQuestion`]
42//!
43//! Here is the necessary condition for the pattern to succeed in parsing as [`AllQuestion`],
44//! but some questions may not succeed even if they fulfill these requirements.
45//!
46//! - The patterns must be valid as htsvoice question pattern.
47//! - Using `*` and `?` as wildcard, matches the entire full-context label.
48//! - The pattern that cannot match full-context label in any situation (e.g. `*/A:-?????+*`) are not allowed.
49//! - Minus sign (`-`) in numerical field can only be used in the first element of `A` (`A1`).
50//! - All the patterns must be about the same position
51//! - e.g. The first pattern is about the first element of Phoneme, the second pattern is about the last element of field `J`, is *not* allowed.
52//! - Each pattern must *not* have conditions on two or more positions.
53//! - When the pattern is about position of numerical field (except for categorical field such as `B`, `C`, or `D`),
54//! - The pattern must be continuous.
55//!
56//! ## Fallback
57//!
58//! As [`AllQuestion`] parsing does not always suceed (even if the pattern is correct),
59//! you may need to write fallback for that.
60//!
61//! If you just want to ignore those pattern, you can simply return `false` instead of the result of `test()`.
62//!
63//! If you need to successfully parse pattern which [`AllQuestion`] fails to parse,
64//! [`regex::RegexQuestion`] is the best choice.
65//!
66//! ```rust
67//! # #[cfg(feature = "regex")]
68//! # {
69//! use jlabel::Label;
70//! use jlabel_question::{regex::RegexQuestion, AllQuestion, ParseError, QuestionMatcher};
71//!
72//! enum Pattern {
73//! AllQustion(AllQuestion),
74//! Regex(RegexQuestion),
75//! }
76//! impl Pattern {
77//! fn parse(patterns: &[&str]) -> Result<Self, ParseError> {
78//! match AllQuestion::parse(patterns) {
79//! Ok(question) => Ok(Self::AllQustion(question)),
80//! Err(_) => Ok(Self::Regex(RegexQuestion::parse(patterns)?)),
81//! }
82//! }
83//! fn test(&self, label: &Label) -> bool {
84//! match self {
85//! Self::AllQustion(question) => question.test(label),
86//! Self::Regex(question) => question.test(label),
87//! }
88//! }
89//! }
90//! # }
91//! ```
92
93pub mod parse_position;
94pub mod position;
95
96#[cfg(feature = "regex")]
97pub mod regex;
98
99use std::num::ParseIntError;
100
101use position::{
102 AllPosition, BooleanPosition, CategoryPosition, PhonePosition, Position, SignedRangePosition,
103 UndefinedPotision, UnsignedRangePosition,
104};
105
106use jlabel::Label;
107use parse_position::{estimate_position, PositionError};
108
109#[cfg(feature = "serde")]
110use serde::{Deserialize, Serialize};
111
112/// Errors from jlabel-question.
113#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
114pub enum ParseError {
115 /// Some patterns are pointing at position different from
116 /// which the first pattern is pointing at.
117 #[error("Position mismatch")]
118 PositionMismatch,
119
120 /// The pattern failed to parse.
121 #[error("Invalid position")]
122 InvalidPosition(#[from] PositionError),
123
124 /// The pattern or range is empty, so jlabel-question cannot parse it.
125 #[error("Empty patterns or range")]
126 Empty,
127
128 /// The range is incontinuous or not arranged in ascending order.
129 #[error("Incontinuous range")]
130 IncontinuousRange,
131
132 /// Failed to parse integer field in a pattern containing wildcard.
133 /// This might result from incorrect number of wildcards.
134 #[error("Failed wildcard: {0}")]
135 FailWildcard(ParseIntError),
136
137 /// Failed to parse integer field in a pattern without wildcard.
138 /// This might result from incorrect position of wildcard such as `1?2`.
139 #[error("Failed literal: {0}")]
140 FailLiteral(ParseIntError),
141
142 /// Failed to parse boolean field.
143 /// Boolean fields must be either `0` or `1` (except for `xx` which means empty).
144 #[error("Invalid boolean: {0}")]
145 InvalidBoolean(String),
146
147 #[cfg(feature = "regex")]
148 /// Failed to build regex parser from the pattern.
149 #[error("Failed regex")]
150 FailRegex,
151}
152
153macro_rules! match_position {
154 ($position:expr, $ranges:expr, [$($name:ident),*]) => {
155 match $position {
156 $(
157 AllPosition::$name(position) => Ok(AllQuestion::$name(Question::new(position, $ranges)?)),
158 )*
159 }
160 };
161}
162
163/// Parses the question, and tests it aganinst given full-context label.
164pub trait QuestionMatcher
165where
166 Self: Sized,
167{
168 /// Parses question patterns in string, and if succeeds, returns the parsed question.
169 fn parse<S: AsRef<str>>(patterns: &[S]) -> Result<Self, ParseError>;
170
171 /// Checks if the full-context label matches the question.
172 ///
173 /// If you want to `test` on string label, parse it using `Label::from_str()` beforehand.
174 fn test(&self, label: &Label) -> bool;
175}
176
177/// A main structure representing question.
178#[derive(Debug, Clone, PartialEq, Eq)]
179#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
180pub enum AllQuestion {
181 /// Question about phone fields of full-context label
182 Phone(Question<PhonePosition>),
183 /// Question about signed integer fields of full-context label
184 SignedRange(Question<SignedRangePosition>),
185 /// Question about unsigned integer fields of full-context label
186 UnsignedRange(Question<UnsignedRangePosition>),
187 /// Question about boolean fields of full-context label
188 Boolean(Question<BooleanPosition>),
189 /// Question about numerical categorical fields of full-context label
190 Category(Question<CategoryPosition>),
191 /// Question about undefined (always `xx`) fields of full-context label
192 Undefined(Question<UndefinedPotision>),
193}
194
195impl QuestionMatcher for AllQuestion {
196 fn parse<S: AsRef<str>>(patterns: &[S]) -> Result<Self, ParseError> {
197 let mut position = None;
198 let mut ranges = Vec::with_capacity(patterns.len());
199
200 for pattern in patterns {
201 let (pos, range) = estimate_position(pattern.as_ref())?;
202
203 if let Some(position) = position {
204 if pos != position {
205 return Err(ParseError::PositionMismatch);
206 }
207 } else {
208 position = Some(pos);
209 }
210
211 ranges.push(range);
212 }
213
214 match_position!(
215 position.ok_or(ParseError::Empty)?,
216 &ranges,
217 [
218 Phone,
219 SignedRange,
220 UnsignedRange,
221 Boolean,
222 Category,
223 Undefined
224 ]
225 )
226 }
227 fn test(&self, label: &Label) -> bool {
228 match self {
229 Self::Phone(q) => q.test(label),
230 Self::SignedRange(q) => q.test(label),
231 Self::UnsignedRange(q) => q.test(label),
232 Self::Boolean(q) => q.test(label),
233 Self::Category(q) => q.test(label),
234 Self::Undefined(q) => q.test(label),
235 }
236 }
237}
238
239/// An inner structure representing a pair of position and range.
240///
241/// Used in variants of [`AllQuestion`]
242#[derive(Debug, Clone, PartialEq, Eq)]
243#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
244pub struct Question<P: Position> {
245 /// The position this question matches to.
246 pub position: P,
247 /// The parsed range
248 #[cfg_attr(
249 feature = "serde",
250 serde(bound(
251 serialize = "P::Range: Serialize",
252 deserialize = "P::Range: Deserialize<'de>"
253 ))
254 )]
255 pub range: Option<P::Range>,
256}
257
258impl<P: Position> Question<P> {
259 /// Parse question pattern
260 pub fn new(position: P, ranges: &[&str]) -> Result<Self, ParseError> {
261 match ranges {
262 ["xx"] => Ok(Self {
263 range: None,
264 position,
265 }),
266 ranges => Ok(Self {
267 range: Some(position.range(ranges)?),
268 position,
269 }),
270 }
271 }
272
273 /// Check if this question matches
274 pub fn test(&self, label: &Label) -> bool {
275 match (&self.range, self.position.get(label)) {
276 (Some(range), Some(target)) => self.position.test(range, target),
277 (None, None) => true,
278 _ => false,
279 }
280 }
281}
282
283#[cfg(test)]
284mod tests;