wacore_binary/
token.rs

1pub const DICT_VERSION: u8 = 3;
2
3// --- Public Constants for Special Tags ---
4pub const LIST_EMPTY: u8 = 0;
5pub const DICTIONARY_0: u8 = 236;
6pub const DICTIONARY_1: u8 = 237;
7pub const DICTIONARY_2: u8 = 238;
8pub const DICTIONARY_3: u8 = 239;
9
10pub const JID_PAIR: u8 = 250;
11pub const HEX_8: u8 = 251;
12pub const BINARY_8: u8 = 252;
13pub const BINARY_20: u8 = 253;
14pub const BINARY_32: u8 = 254;
15pub const NIBBLE_8: u8 = 255;
16pub const INTEROP_JID: u8 = 245;
17pub const FB_JID: u8 = 246;
18pub const AD_JID: u8 = 247;
19pub const LIST_8: u8 = 248;
20pub const LIST_16: u8 = 249;
21
22pub const PACKED_MAX: u8 = 127;
23pub const SINGLE_BYTE_MAX: u16 = 256;
24
25// Include the generated maps from the build script
26include!(concat!(env!("OUT_DIR"), "/token_maps.rs"));
27
28// The lookup functions now use the compile-time maps
29pub fn index_of_single_token(token: &str) -> Option<u8> {
30    SINGLE_BYTE_MAP.get(token).copied()
31}
32
33pub fn index_of_double_byte_token(token: &str) -> Option<(u8, u8)> {
34    DOUBLE_BYTE_MAP.get(token).copied()
35}
36
37pub fn get_single_token(index: u8) -> Option<&'static str> {
38    SINGLE_BYTE_TOKENS.get(index as usize).copied()
39}
40
41pub fn get_double_token(dict: u8, index: u8) -> Option<&'static str> {
42    DOUBLE_BYTE_TOKENS
43        .get(dict as usize)
44        .and_then(|d| d.get(index as usize))
45        .copied()
46}
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51
52    /// Test single byte token lookup round trip
53    #[test]
54    fn test_single_byte_token_roundtrip() {
55        // Test some known tokens exist and can be retrieved
56        for i in 1u8..=235 {
57            if let Some(token) = get_single_token(i) {
58                let index = index_of_single_token(token);
59                assert_eq!(
60                    index,
61                    Some(i),
62                    "Token '{}' at index {} doesn't round-trip",
63                    token,
64                    i
65                );
66            }
67        }
68    }
69
70    /// Test double byte token lookup round trip
71    #[test]
72    fn test_double_byte_token_roundtrip() {
73        // Test dictionary 0-3
74        for dict in 0..4u8 {
75            for idx in 0..255u8 {
76                if let Some(token) = get_double_token(dict, idx) {
77                    let lookup = index_of_double_byte_token(token);
78                    assert_eq!(
79                        lookup,
80                        Some((dict, idx)),
81                        "Token '{}' at dict {} index {} doesn't round-trip",
82                        token,
83                        dict,
84                        idx
85                    );
86                }
87            }
88        }
89    }
90
91    /// Test that unknown strings return None for token lookups
92    #[test]
93    fn test_unknown_string_returns_none() {
94        // Completely random strings shouldn't match any token
95        assert!(index_of_single_token("xyzzy_not_a_token_12345").is_none());
96        assert!(index_of_double_byte_token("xyzzy_not_a_token_12345").is_none());
97    }
98
99    /// Test boundary token indices
100    #[test]
101    fn test_token_boundary_indices() {
102        // Index 0 is an empty string token (LIST_EMPTY is a special tag, not same as token index 0)
103        let token_0 = get_single_token(0);
104        assert_eq!(token_0, Some(""), "Index 0 should be empty string token");
105
106        // Test known special indices return None for get_single_token
107        // These are reserved for special tags
108        assert!(get_single_token(LIST_8).is_none()); // 248
109        assert!(get_single_token(LIST_16).is_none()); // 249
110        assert!(get_single_token(JID_PAIR).is_none()); // 250
111        assert!(get_single_token(HEX_8).is_none()); // 251
112        assert!(get_single_token(BINARY_8).is_none()); // 252
113        assert!(get_single_token(BINARY_20).is_none()); // 253
114        assert!(get_single_token(BINARY_32).is_none()); // 254
115        assert!(get_single_token(NIBBLE_8).is_none()); // 255
116    }
117
118    /// Test strings that almost match tokens but shouldn't be encoded as such
119    #[test]
120    fn test_almost_matching_strings() {
121        // Get a known token
122        if let Some(token) = get_single_token(1) {
123            // Slightly modify it
124            let modified = format!("{}_modified", token);
125            // Should not match
126            assert!(index_of_single_token(&modified).is_none());
127
128            // With prefix
129            let prefixed = format!("prefix_{}", token);
130            assert!(index_of_single_token(&prefixed).is_none());
131
132            // With suffix
133            let suffixed = format!("{}!", token);
134            assert!(index_of_single_token(&suffixed).is_none());
135        }
136    }
137
138    /// Test out of bounds dictionary lookup
139    #[test]
140    fn test_out_of_bounds_dictionary() {
141        // Dictionary indices 4+ should return None
142        assert!(get_double_token(4, 0).is_none());
143        assert!(get_double_token(5, 100).is_none());
144        assert!(get_double_token(255, 0).is_none());
145    }
146}
wacore_binary/token.rs

wacore_binary/
token.rs