icu_experimental/dimension/provider/
pattern_key.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5// Provider structs must be stable
6#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
7
8use zerovec::{
9    maps::ZeroMapKV,
10    ule::{AsULE, UleError, ULE},
11};
12
13use crate::dimension::provider::units_essentials::CompoundCount;
14
15#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Debug)]
16#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
17#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
18#[cfg_attr(feature = "datagen", databake(path = icu_experimental::dimension::provider::pattern_key))]
19#[repr(u8)]
20pub enum PowerValue {
21    Two,
22    Three,
23}
24
25#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Debug)]
26#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
27#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
28#[cfg_attr(feature = "datagen", databake(path = icu_experimental::dimension::provider::pattern_key))]
29pub enum PatternKey {
30    Binary(u8),
31    Decimal(i8),
32    Power {
33        power: PowerValue,
34        count: CompoundCount,
35    },
36}
37
38/// [`PatternKeyULE`] is a type optimized for efficient storage and
39/// deserialization of [`PatternKey`] using the `ZeroVec` model.
40///
41/// The serialization model packages the pattern item in a single byte.
42///
43/// The first two bits (b7 & b6) determine the variant of the pattern key:
44/// - `00`: `Binary`
45/// - `01`: `Decimal`
46/// - `10`: `Power`
47/// - `11`: Forbidden
48///
49/// The next 6 bits (b5 to b0) determine the value of the pattern key:
50/// - For `Binary`, the value is mapped directly to the pattern value.
51/// - For `Decimal`:
52///     - b5 is determining the sign of the value. if b5 is 0, the value is positive. if b5 is 1, the value is negative.
53///     - b4 to b0 are determining the magnitude of the value.
54/// - For `Power`:
55///     - b5 and b4 represent the power value, which can be `10` to represent `Two` and `11` to represent `Three`.
56///     - b3 to b0 represent the count value, which can be:
57///         - `0000`: Zero
58///         - `0001`: One
59///         - `0010`: Two
60///         - `0011`: Few
61///         - `0100`: Many
62///         - `0101`: Other
63///     - Note: In the `Power` case, b3 is always 0, and when b2 is 1, b1 must be 0.
64#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Debug)]
65pub struct PatternKeyULE(u8);
66
67// Safety (based on the safety checklist on the ULE trait):
68//  1. PatternKeyULE does not include any uninitialized or padding bytes.
69//     (achieved by `#[repr(transparent)]` on a ULE type)
70//  2. PatternKeyULE is aligned to 1 byte.
71//     (achieved by `#[repr(transparent)]` on a ULE type)
72//  3. The impl of validate_bytes() returns an error if any byte is not valid.
73//  4. The impl of validate_bytes() returns an error if there are extra bytes.
74//  5. The other ULE methods use the default impl.
75//  6. PatternKeyULE byte equality is semantic equality.
76unsafe impl ULE for PatternKeyULE {
77    fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
78        for &byte in bytes.iter() {
79            // Ensure the first two bits (b7 & b6) are not 11.
80            if (byte & 0b1100_0000) == 0b1100_0000 {
81                return Err(UleError::parse::<Self>());
82            }
83
84            // For the `Power` variant:
85            //      b5 & b4 must be 10 or 11. (this means that b5 must be 1)
86            //      b3 must be 0.
87            //      When b2 is 1, b1 must be 0.
88            if (byte & 0b1100_0000) == 0b1000_0000 {
89                // b5 must be 1
90                if (byte & 0b0010_0000) == 0 {
91                    return Err(UleError::parse::<Self>());
92                }
93
94                // b3 must be 0
95                if (byte & 0b0000_1000) != 0 {
96                    return Err(UleError::parse::<Self>());
97                }
98
99                // If b2 is 1, b1 must be 0
100                if (byte & 0b0000_0100) != 0 && (byte & 0b0000_0010) != 0 {
101                    return Err(UleError::parse::<Self>());
102                }
103            }
104        }
105
106        Ok(())
107    }
108}
109
110impl AsULE for PatternKey {
111    type ULE = PatternKeyULE;
112
113    fn to_unaligned(self) -> Self::ULE {
114        let byte = match self {
115            PatternKey::Binary(value) => value,
116            PatternKey::Decimal(value) => {
117                let sign = if value < 0 { 0b0010_0000 } else { 0 };
118                debug_assert!(value > -32 && value < 32);
119                (0b01 << 6) | sign | (value.unsigned_abs() & 0b0001_1111)
120            }
121            PatternKey::Power { power, count } => {
122                let power_bits = {
123                    match power {
124                        PowerValue::Two => 0b10 << 4,
125                        PowerValue::Three => 0b11 << 4,
126                    }
127                };
128                // Combine the bits to form the final byte
129                (0b10 << 6) | power_bits | count as u8
130            }
131        };
132
133        PatternKeyULE(byte)
134    }
135
136    fn from_unaligned(unaligned: Self::ULE) -> Self {
137        let byte = unaligned.0;
138
139        let variant = (byte & 0b1100_0000) >> 6;
140        let value = byte & 0b0011_1111;
141
142        match variant {
143            0b00 => PatternKey::Binary(value),
144            0b01 => match value & 0b0010_0000 {
145                0b0000_0000 => PatternKey::Decimal(value as i8),
146                0b0010_0000 => PatternKey::Decimal(-((value & 0b0001_1111) as i8)),
147                _ => unreachable!(),
148            },
149            0b10 => {
150                let power = match value & 0b0011_0000 {
151                    0b0010_0000 => PowerValue::Two,
152                    0b0011_0000 => PowerValue::Three,
153                    _ => unreachable!(),
154                };
155                let count = value & 0b0000_1111;
156                PatternKey::Power {
157                    power,
158                    count: count.into(),
159                }
160            }
161            _ => unreachable!(),
162        }
163    }
164}
165
166impl<'a> ZeroMapKV<'a> for PatternKey {
167    type Container = zerovec::ZeroVec<'a, PatternKey>;
168    type Slice = zerovec::ZeroSlice<PatternKey>;
169    type GetType = <PatternKey as AsULE>::ULE;
170    type OwnedType = PatternKey;
171}
172
173#[test]
174fn test_pattern_key_ule() {
175    use PowerValue::{Three, Two};
176
177    let binary = PatternKey::Binary(0b0000_1111);
178    let binary_ule = binary.to_unaligned();
179    PatternKeyULE::validate_bytes(&[binary_ule.0]).unwrap();
180    assert_eq!(binary_ule.0, 0b0000_1111);
181
182    let decimal = PatternKey::Decimal(0b0000_1111);
183    let decimal_ule = decimal.to_unaligned();
184    PatternKeyULE::validate_bytes(&[decimal_ule.0]).unwrap();
185    assert_eq!(decimal_ule.0, 0b0100_1111);
186
187    let power2 = PatternKey::Power {
188        power: Two,
189        count: CompoundCount::Two,
190    };
191    let power2_ule = power2.to_unaligned();
192    PatternKeyULE::validate_bytes(&[power2_ule.0]).unwrap();
193    assert_eq!(power2_ule.0, 0b1010_0010);
194
195    let power3 = PatternKey::Power {
196        power: Three,
197        count: CompoundCount::Two,
198    };
199    let power3_ule = power3.to_unaligned();
200    PatternKeyULE::validate_bytes(&[power3_ule.0]).unwrap();
201    assert_eq!(power3_ule.0, 0b1011_0010);
202
203    let binary = PatternKey::from_unaligned(binary_ule);
204    assert_eq!(binary, PatternKey::Binary(0b0000_1111));
205
206    let decimal = PatternKey::from_unaligned(decimal_ule);
207    assert_eq!(decimal, PatternKey::Decimal(0b0000_1111));
208
209    let power2 = PatternKey::from_unaligned(power2_ule);
210    assert_eq!(
211        power2,
212        PatternKey::Power {
213            power: Two,
214            count: CompoundCount::Two,
215        }
216    );
217
218    let power3 = PatternKey::from_unaligned(power3_ule);
219    assert_eq!(
220        power3,
221        PatternKey::Power {
222            power: Three,
223            count: CompoundCount::Two,
224        }
225    );
226
227    let decimal_neg_1 = PatternKey::Decimal(-1);
228    let decimal_neg_1_ule = decimal_neg_1.to_unaligned();
229    assert_eq!(decimal_neg_1_ule.0, 0b0110_0001);
230
231    let decimal_neg_1 = PatternKey::from_unaligned(decimal_neg_1_ule);
232    assert_eq!(decimal_neg_1, PatternKey::Decimal(-1));
233
234    // Test invalid bytes
235    let unvalidated_bytes = [0b1100_0000];
236    assert_eq!(
237        PatternKeyULE::validate_bytes(&unvalidated_bytes),
238        Err(UleError::parse::<PatternKeyULE>())
239    );
240
241    let unvalidated_bytes = [0b1000_0000];
242    assert_eq!(
243        PatternKeyULE::validate_bytes(&unvalidated_bytes),
244        Err(UleError::parse::<PatternKeyULE>())
245    );
246}