jlabel_question/
position.rs

1//! Structures for position
2
3use std::{fmt::Debug, ops::Range};
4
5use crate::Label;
6
7use super::ParseError;
8
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12/// Enum that represent all positions
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum AllPosition {
15    /// Phone fields
16    Phone(PhonePosition),
17    /// Signed integer fields
18    SignedRange(SignedRangePosition),
19    /// Unsigned integer fields
20    UnsignedRange(UnsignedRangePosition),
21    /// Boolean fields
22    Boolean(BooleanPosition),
23    /// Numerical categorical fields
24    Category(CategoryPosition),
25    /// Undefined (always `xx`) fields
26    Undefined(UndefinedPotision),
27}
28
29macro_rules! as_ref_map {
30    ($label:ident.$block:ident.$prop:ident) => {
31        $label.$block.as_ref().map(|b| &b.$prop)
32    };
33}
34
35macro_rules! as_ref_and_then {
36    ($label:ident.$block:ident.$prop:ident) => {
37        $label.$block.as_ref().and_then(|b| b.$prop.as_ref())
38    };
39}
40
41/// The trait that Position requires to implement
42pub trait Position {
43    /// The type of match target
44    type Target;
45    /// The type of range
46    type Range;
47
48    /// Parse range strings
49    fn range(&self, ranges: &[&str]) -> Result<Self::Range, ParseError>;
50    /// Get part of [`Label`] this position matches to.
51    fn get<'a>(&self, label: &'a Label) -> Option<&'a Self::Target>;
52    /// Check if the range matches target
53    fn test(&self, range: &Self::Range, target: &Self::Target) -> bool;
54}
55
56/// Positions of phone fields
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
59#[allow(missing_docs)]
60pub enum PhonePosition {
61    P1,
62    P2,
63    P3,
64    P4,
65    P5,
66}
67
68impl Position for PhonePosition {
69    type Target = String;
70    type Range = Vec<String>;
71
72    fn range(&self, ranges: &[&str]) -> Result<Self::Range, ParseError> {
73        Ok(ranges.iter().map(|s| s.to_string()).collect())
74    }
75
76    fn get<'a>(&self, label: &'a Label) -> Option<&'a Self::Target> {
77        match self {
78            Self::P1 => label.phoneme.p2.as_ref(),
79            Self::P2 => label.phoneme.p1.as_ref(),
80            Self::P3 => label.phoneme.c.as_ref(),
81            Self::P4 => label.phoneme.n1.as_ref(),
82            Self::P5 => label.phoneme.n2.as_ref(),
83        }
84    }
85
86    fn test(&self, range: &Self::Range, target: &Self::Target) -> bool {
87        range.contains(target)
88    }
89}
90
91/// Positions with signed integer type
92#[derive(Debug, Clone, Copy, PartialEq, Eq)]
93#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
94#[allow(missing_docs)]
95pub enum SignedRangePosition {
96    A1,
97}
98
99impl Position for SignedRangePosition {
100    type Target = i8;
101    type Range = Range<i8>;
102
103    fn range(&self, ranges: &[&str]) -> Result<Self::Range, ParseError> {
104        let parsed_ranges = ranges.iter().map(range_i8).collect::<Result<Vec<_>, _>>()?;
105        merge_ranges(parsed_ranges)
106    }
107
108    fn get<'a>(&self, label: &'a Label) -> Option<&'a Self::Target> {
109        match self {
110            Self::A1 => as_ref_map!(label.mora.relative_accent_position),
111        }
112    }
113
114    fn test(&self, range: &Self::Range, target: &Self::Target) -> bool {
115        range.contains(target)
116    }
117}
118
119fn range_i8<S: AsRef<str>>(s: S) -> Result<Range<i8>, ParseError> {
120    let range = match s.as_ref() {
121        "-??" => -99..-9,
122        "-?" => -9..0,
123        "?" => 0..10,
124        s if s.ends_with('?') => {
125            let d = s[..s.len() - 1]
126                .parse::<i8>()
127                .map_err(ParseError::FailWildcard)?;
128            debug_assert!(d >= 0);
129            d * 10..(d + 1) * 10
130        }
131        s => {
132            let d = s.parse::<i8>().map_err(ParseError::FailLiteral)?;
133            d..d + 1
134        }
135    };
136    Ok(range)
137}
138
139/// Positions with unsigned integer type
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
142#[allow(missing_docs)]
143pub enum UnsignedRangePosition {
144    A2,
145    A3,
146
147    E1,
148    E2,
149
150    F1,
151    F2,
152    F5,
153    F6,
154    F7,
155    F8,
156
157    G1,
158    G2,
159
160    H1,
161    H2,
162
163    I1,
164    I2,
165    I3,
166    I4,
167    I5,
168    I6,
169    I7,
170    I8,
171
172    J1,
173    J2,
174
175    K1,
176    K2,
177    K3,
178}
179
180impl Position for UnsignedRangePosition {
181    type Target = u8;
182    type Range = Range<u8>;
183
184    fn range(&self, ranges: &[&str]) -> Result<Self::Range, ParseError> {
185        let parsed_ranges = ranges.iter().map(range_u8).collect::<Result<Vec<_>, _>>()?;
186        merge_ranges(parsed_ranges)
187    }
188
189    fn get<'a>(&self, label: &'a Label) -> Option<&'a Self::Target> {
190        match self {
191            Self::A2 => as_ref_map!(label.mora.position_forward),
192            Self::A3 => as_ref_map!(label.mora.position_backward),
193            Self::E1 => as_ref_map!(label.accent_phrase_prev.mora_count),
194            Self::E2 => as_ref_map!(label.accent_phrase_prev.accent_position),
195            Self::F1 => as_ref_map!(label.accent_phrase_curr.mora_count),
196            Self::F2 => as_ref_map!(label.accent_phrase_curr.accent_position),
197            Self::F5 => as_ref_map!(label.accent_phrase_curr.accent_phrase_position_forward),
198            Self::F6 => as_ref_map!(label.accent_phrase_curr.accent_phrase_position_backward),
199            Self::F7 => as_ref_map!(label.accent_phrase_curr.mora_position_forward),
200            Self::F8 => as_ref_map!(label.accent_phrase_curr.mora_position_backward),
201            Self::G1 => as_ref_map!(label.accent_phrase_next.mora_count),
202            Self::G2 => as_ref_map!(label.accent_phrase_next.accent_position),
203            Self::H1 => as_ref_map!(label.breath_group_prev.accent_phrase_count),
204            Self::H2 => as_ref_map!(label.breath_group_prev.mora_count),
205            Self::I1 => as_ref_map!(label.breath_group_curr.accent_phrase_count),
206            Self::I2 => as_ref_map!(label.breath_group_curr.mora_count),
207            Self::I3 => as_ref_map!(label.breath_group_curr.breath_group_position_forward),
208            Self::I4 => as_ref_map!(label.breath_group_curr.breath_group_position_backward),
209            Self::I5 => as_ref_map!(label.breath_group_curr.accent_phrase_position_forward),
210            Self::I6 => as_ref_map!(label.breath_group_curr.accent_phrase_position_backward),
211            Self::I7 => as_ref_map!(label.breath_group_curr.mora_position_forward),
212            Self::I8 => as_ref_map!(label.breath_group_curr.mora_position_backward),
213            Self::J1 => as_ref_map!(label.breath_group_next.accent_phrase_count),
214            Self::J2 => as_ref_map!(label.breath_group_next.mora_count),
215            Self::K1 => Some(&label.utterance.breath_group_count),
216            Self::K2 => Some(&label.utterance.accent_phrase_count),
217            Self::K3 => Some(&label.utterance.mora_count),
218        }
219    }
220
221    fn test(&self, range: &Self::Range, target: &Self::Target) -> bool {
222        range.contains(target)
223    }
224}
225
226fn range_u8<S: AsRef<str>>(s: S) -> Result<Range<u8>, ParseError> {
227    let range = match s.as_ref() {
228        "?" => 1..10,
229        s if s.ends_with('?') => {
230            let d = s[..s.len() - 1]
231                .parse::<u8>()
232                .map_err(ParseError::FailWildcard)?;
233            d * 10..(d + 1) * 10
234        }
235        s => {
236            let d = s.parse::<u8>().map_err(ParseError::FailLiteral)?;
237            d..d + 1
238        }
239    };
240    Ok(range)
241}
242
243fn merge_ranges<Idx>(mut ranges: Vec<Range<Idx>>) -> Result<Range<Idx>, ParseError>
244where
245    Idx: Ord + Copy,
246{
247    ranges.sort_unstable_by_key(|range| range.start);
248    let merged = ranges
249        .into_iter()
250        .try_fold(None, |acc: Option<Range<Idx>>, curr| match acc {
251            // By sorting, always acc.start <= curr.start
252            // Only need to check curr's start is continuous with acc's end
253            Some(mut acc) if curr.start <= acc.end => {
254                acc.end = acc.end.max(curr.end);
255                Ok(Some(acc))
256            }
257            None => Ok(Some(curr)),
258            _ => Err(ParseError::IncontinuousRange),
259        })?;
260    merged.ok_or(ParseError::Empty)
261}
262
263/// Positions with boolean type
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
266#[allow(missing_docs)]
267pub enum BooleanPosition {
268    E3,
269    E5,
270
271    F3,
272
273    G3,
274    G5,
275}
276
277impl Position for BooleanPosition {
278    type Target = bool;
279    type Range = bool;
280
281    fn range(&self, ranges: &[&str]) -> Result<Self::Range, ParseError> {
282        let first = ranges.first().ok_or(ParseError::Empty)?;
283        // E5/G5's logics are inverted
284        let field_false = matches!(self, Self::E5 | Self::G5);
285        match *first {
286            "0" => Ok(field_false),
287            "1" => Ok(!field_false),
288            _ => Err(ParseError::InvalidBoolean(first.to_string())),
289        }
290    }
291
292    fn get<'a>(&self, label: &'a Label) -> Option<&'a Self::Target> {
293        match self {
294            Self::E3 => as_ref_map!(label.accent_phrase_prev.is_interrogative),
295            Self::E5 => as_ref_and_then!(label.accent_phrase_prev.is_pause_insertion),
296            Self::F3 => as_ref_map!(label.accent_phrase_curr.is_interrogative),
297            Self::G3 => as_ref_map!(label.accent_phrase_next.is_interrogative),
298            Self::G5 => as_ref_and_then!(label.accent_phrase_next.is_pause_insertion),
299        }
300    }
301
302    fn test(&self, range: &Self::Range, target: &Self::Target) -> bool {
303        range == target
304    }
305}
306
307/// Positions with numerical representations of categorical value
308#[derive(Debug, Clone, Copy, PartialEq, Eq)]
309#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
310#[allow(missing_docs)]
311pub enum CategoryPosition {
312    B1,
313    B2,
314    B3,
315    C1,
316    C2,
317    C3,
318    D1,
319    D2,
320    D3,
321}
322
323impl Position for CategoryPosition {
324    type Target = u8;
325    type Range = Vec<u8>;
326
327    fn range(&self, ranges: &[&str]) -> Result<Self::Range, ParseError> {
328        ranges
329            .iter()
330            .map(|s| s.parse::<u8>().map_err(ParseError::FailLiteral))
331            .collect()
332    }
333
334    fn get<'a>(&self, label: &'a Label) -> Option<&'a Self::Target> {
335        match self {
336            Self::B1 => as_ref_and_then!(label.word_prev.pos),
337            Self::B2 => as_ref_and_then!(label.word_prev.ctype),
338            Self::B3 => as_ref_and_then!(label.word_prev.cform),
339            Self::C1 => as_ref_and_then!(label.word_curr.pos),
340            Self::C2 => as_ref_and_then!(label.word_curr.ctype),
341            Self::C3 => as_ref_and_then!(label.word_curr.cform),
342            Self::D1 => as_ref_and_then!(label.word_next.pos),
343            Self::D2 => as_ref_and_then!(label.word_next.ctype),
344            Self::D3 => as_ref_and_then!(label.word_next.cform),
345        }
346    }
347
348    fn test(&self, range: &Self::Range, target: &Self::Target) -> bool {
349        range.contains(target)
350    }
351}
352
353/// Positions that are always `xx`
354#[derive(Debug, Clone, Copy, PartialEq, Eq)]
355#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
356#[allow(missing_docs)]
357pub enum UndefinedPotision {
358    E4,
359    F4,
360    G4,
361}
362
363impl Position for UndefinedPotision {
364    type Target = ();
365    type Range = ();
366
367    fn range(&self, _: &[&str]) -> Result<Self::Range, ParseError> {
368        Ok(())
369    }
370
371    fn get<'a>(&self, _: &'a Label) -> Option<&'a Self::Target> {
372        None
373    }
374
375    fn test(&self, _: &Self::Range, _: &Self::Target) -> bool {
376        true
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn parse_i8_range() {
386        assert_eq!(range_i8("12"), Ok(12..13));
387        assert_eq!(range_i8("1?"), Ok(10..20));
388        assert_eq!(range_i8("?"), Ok(0..10));
389
390        assert_eq!(range_i8("-12"), Ok(-12..-11));
391        assert_eq!(range_i8("-?"), Ok(-9..0));
392        assert_eq!(range_i8("-??"), Ok(-99..-9));
393
394        // assert_eq!(range_i8("-1?"), Ok(-19..-9));
395    }
396
397    #[test]
398    fn parse_u8_range() {
399        assert_eq!(range_u8("12"), Ok(12..13));
400        assert_eq!(range_u8("1?"), Ok(10..20));
401        assert_eq!(range_u8("12?"), Ok(120..130));
402        assert_eq!(range_u8("?"), Ok(1..10));
403    }
404
405    #[test]
406    fn range_fail() {
407        use std::num::IntErrorKind;
408        assert!(matches!(
409            range_u8("?2"),
410            Err(ParseError::FailLiteral(e)) if *e.kind() == IntErrorKind::InvalidDigit
411        ));
412        assert!(matches!(
413            range_i8("?2"),
414            Err(ParseError::FailLiteral(e)) if *e.kind() == IntErrorKind::InvalidDigit
415        ));
416
417        assert!(matches!(
418            range_u8("???"),
419            Err(ParseError::FailWildcard(e)) if *e.kind() == IntErrorKind::InvalidDigit
420        ));
421        assert!(matches!(
422            range_i8("???"),
423            Err(ParseError::FailWildcard(e)) if *e.kind() == IntErrorKind::InvalidDigit
424        ));
425    }
426
427    #[test]
428    #[allow(clippy::single_range_in_vec_init)]
429    fn merge_ranges_1() {
430        assert_eq!(merge_ranges(vec![0..1]), Ok(0..1));
431        assert_eq!(merge_ranges(vec![0..1, 1..3]), Ok(0..3));
432        assert_eq!(merge_ranges(vec![1..3, 0..1]), Ok(0..3));
433        assert_eq!(merge_ranges(vec![0..2, 1..3]), Ok(0..3));
434        assert_eq!(merge_ranges(vec![-6..7, 1..3]), Ok(-6..7));
435        assert_eq!(
436            merge_ranges(vec![-6..7, 1..3, 2..6, -8..-7, -8..0]),
437            Ok(-8..7)
438        );
439
440        assert_eq!(merge_ranges::<u8>(vec![]), Err(ParseError::Empty));
441        assert_eq!(
442            merge_ranges(vec![0..1, 5..6]),
443            Err(ParseError::IncontinuousRange)
444        );
445        assert_eq!(
446            merge_ranges(vec![3..6, -1..2]),
447            Err(ParseError::IncontinuousRange)
448        );
449        assert_eq!(
450            merge_ranges(vec![-6..7, 1..3, 2..6, -8..-7]),
451            Err(ParseError::IncontinuousRange)
452        );
453    }
454}