unicode_locale_parser/
subdivision.rs

1use crate::errors::ParserError;
2
3use std::fmt::{self};
4use std::str;
5use std::str::FromStr;
6
7#[derive(Debug, PartialEq)]
8pub struct UnicodeSubdivisionIdentifier {
9    pub region: String,
10    pub suffix: String,
11}
12
13impl fmt::Display for UnicodeSubdivisionIdentifier {
14    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
15        write!(f, "{}{}", self.region, self.suffix)?;
16        Ok(())
17    }
18}
19
20impl FromStr for UnicodeSubdivisionIdentifier {
21    type Err = ParserError;
22
23    fn from_str(source: &str) -> Result<Self, Self::Err> {
24        parse_unicode_subdivision_id(source)
25    }
26}
27
28/// Parse the given string as an Unicode Subdivision Identifier.
29///
30/// This function parses according to [`unicode_subdivision_id` EBNF defined in UTS #35](https://unicode.org/reports/tr35/#unicode_subdivision_id)
31///
32/// # Examples
33///
34/// ```
35/// use unicode_locale_parser::parse_subdivision_id;
36///
37/// let subdivision = parse_subdivision_id("ussct").unwrap();
38/// assert_eq!("us", subdivision.region);
39/// assert_eq!("sct", subdivision.suffix);
40/// ```
41///
42/// # Errors
43///
44/// This function returns an error in the following cases:
45///
46/// - [`ParserError::Missing`] if the given subdivision id is empty.
47/// - [`ParserError::InvalidSubdivision`] if the given subdivision id is not a valid subdivision identifier.
48pub fn parse_unicode_subdivision_id(
49    subdivision_id: &str,
50) -> Result<UnicodeSubdivisionIdentifier, ParserError> {
51    // unicode_subdivision_id
52    // https://unicode.org/reports/tr35/#unicode_subdivision_id
53
54    let chunks = subdivision_id.as_bytes();
55
56    if chunks.is_empty() {
57        return Err(ParserError::Missing);
58    }
59
60    let len = chunks.len();
61    if !(2..=7).contains(&len) {
62        return Err(ParserError::InvalidSubdivision);
63    }
64
65    let region_index = region_index(chunks)?;
66    let region = match str::from_utf8(&chunks[0..region_index]) {
67        Ok(s) => s,
68        Err(_) => return Err(ParserError::Unexpected),
69    };
70
71    let suffix_len = len - region_index;
72    if !(3..7).contains(&suffix_len)
73        || !chunks[region_index..]
74            .iter()
75            .all(|b: &u8| b.is_ascii_alphanumeric())
76    {
77        Err(ParserError::InvalidSubdivision)
78    } else {
79        let suffix = match str::from_utf8(&chunks[region_index..]) {
80            Ok(s) => s,
81            Err(_) => return Err(ParserError::Unexpected),
82        };
83        Ok(UnicodeSubdivisionIdentifier {
84            region: String::from(region),
85            suffix: String::from(suffix),
86        })
87    }
88}
89
90fn region_index(chunks: &[u8]) -> Result<usize, ParserError> {
91    if chunks[0..2].iter().all(|b| b.is_ascii_alphabetic()) {
92        Ok(2)
93    } else if chunks[0..3].iter().all(|b| b.is_ascii_digit()) {
94        Ok(3)
95    } else {
96        Err(ParserError::InvalidSubdivision)
97    }
98}
99
100/*
101 * Unit tests
102 */
103
104#[test]
105fn success_parse_unicode_subdivision_id() {
106    // alpha region + suffix
107    let subdivision = parse_unicode_subdivision_id("ussct").unwrap();
108    assert_eq!("us", subdivision.region);
109    assert_eq!("sct", subdivision.suffix);
110
111    // digit region + suffix
112    let subdivision = parse_unicode_subdivision_id("123abcd").unwrap();
113    assert_eq!("123", subdivision.region);
114    assert_eq!("abcd", subdivision.suffix);
115
116    // Display trait implementation
117    assert_eq!(
118        "123abcd",
119        format!("{}", parse_unicode_subdivision_id("123abcd").unwrap())
120    );
121
122    // PartialEq trait implementation
123    assert_eq!(
124        parse_unicode_subdivision_id("123abcd").unwrap(),
125        parse_unicode_subdivision_id("123abcd").unwrap()
126    );
127
128    // FromStr trait implementation
129    let subdivision: UnicodeSubdivisionIdentifier = "ussct".parse().unwrap();
130    assert_eq!("us", subdivision.region);
131    assert_eq!("sct", subdivision.suffix);
132}
133
134#[test]
135fn fail_parse_unicode_subdivision_id() {
136    // missing
137    assert_eq!(
138        ParserError::Missing,
139        parse_unicode_subdivision_id("").unwrap_err()
140    );
141
142    // 2 characters
143    assert_eq!(
144        ParserError::InvalidSubdivision,
145        parse_unicode_subdivision_id("ab").unwrap_err()
146    );
147
148    // 8 characters
149    assert_eq!(
150        ParserError::InvalidSubdivision,
151        parse_unicode_subdivision_id("12312345").unwrap_err()
152    );
153
154    // invalid region
155    assert_eq!(
156        ParserError::InvalidSubdivision,
157        parse_unicode_subdivision_id("1b123").unwrap_err()
158    );
159
160    // invalid suffix
161    assert_eq!(
162        ParserError::InvalidSubdivision,
163        parse_unicode_subdivision_id("ab{}").unwrap_err()
164    );
165}