Skip to main content

ankurah_core/indexing/
encoding.rs

1use super::key_spec::KeySpec;
2use crate::collation::Collatable;
3use crate::value::{Value, ValueType};
4use thiserror::Error;
5
6#[derive(Debug, Error)]
7pub enum IndexError {
8    #[error("Type mismatch: expected {0:?}, got {1:?}")]
9    TypeMismatch(ValueType, ValueType),
10}
11
12// Type tags for JSON encoding.
13// These are chosen to provide sensible sort order: null < bool < int < float < string
14// Each type uses fixed-width encoding where possible to avoid sentinel issues.
15const JSON_TAG_NULL: u8 = 0x00;
16const JSON_TAG_BOOL: u8 = 0x10;
17const JSON_TAG_INT: u8 = 0x20; // i64: fixed 8 bytes, no sentinel needed
18const JSON_TAG_FLOAT: u8 = 0x30; // f64: fixed 8 bytes, no sentinel needed
19const JSON_TAG_STRING: u8 = 0x40; // variable length, uses 0x00 sentinel with 0x00→0x00 0xFF escaping
20
21/// Encode a single component (no NULL handling for now - TODO: add NULL support later)
22pub fn encode_component_typed(value: &Value, expected_type: ValueType, descending: bool) -> Result<Vec<u8>, IndexError> {
23    // Cast value to expected type (short-circuits if types already match)
24    let value = value.cast_to(expected_type).map_err(|_| IndexError::TypeMismatch(expected_type, ValueType::of(value)))?;
25
26    encode_value_component(&value, expected_type, descending)
27}
28
29/// Encode a non-NULL value component
30fn encode_value_component(value: &Value, expected_type: ValueType, descending: bool) -> Result<Vec<u8>, IndexError> {
31    match (value, expected_type) {
32        (Value::String(s), ValueType::String) => {
33            if !descending {
34                // ASC: [escaped UTF-8][0x00] - no type tag needed
35                let mut out = Vec::with_capacity(s.len() + 1);
36                for &b in s.as_bytes() {
37                    if b == 0x00 {
38                        out.push(0x00);
39                        out.push(0xFF);
40                    } else {
41                        out.push(b);
42                    }
43                }
44                out.push(0x00);
45                Ok(out)
46            } else {
47                // DESC: [inv(payload) with 0xFF escaped as 0xFF 0x00][0xFF 0xFF]
48                let mut out = Vec::with_capacity(s.len() + 2);
49                for &b in s.as_bytes() {
50                    let inv = 0xFFu8.wrapping_sub(b);
51                    if inv == 0xFF {
52                        out.push(0xFF);
53                        out.push(0x00);
54                    } else {
55                        out.push(inv);
56                    }
57                }
58                out.push(0xFF);
59                out.push(0xFF);
60                Ok(out)
61            }
62        }
63        (Value::I16(_) | Value::I32(_) | Value::I64(_), ValueType::I16 | ValueType::I32 | ValueType::I64) => {
64            // Integers are encoded big-endian (order-preserving). DESC: invert payload bytes.
65            let bytes = value.to_bytes();
66            if !descending {
67                Ok(bytes)
68            } else {
69                Ok(bytes.into_iter().map(|b| 0xFFu8.wrapping_sub(b)).collect())
70            }
71        }
72        (Value::F64(_), ValueType::F64) => {
73            // F64 uses collation ordering (NaN sorts last, proper IEEE 754 ordering). DESC: invert payload bytes.
74            let bytes = value.to_bytes();
75            if !descending {
76                Ok(bytes)
77            } else {
78                Ok(bytes.into_iter().map(|b| 0xFFu8.wrapping_sub(b)).collect())
79            }
80        }
81        (Value::Bool(_), ValueType::Bool) => {
82            // ASC: false(0) < true(1). DESC: invert payload to flip order.
83            let b = value.to_bytes()[0];
84            Ok(vec![if !descending { b } else { 0xFFu8.wrapping_sub(b) }])
85        }
86        (Value::EntityId(entity_id), ValueType::EntityId) => {
87            // Fixed-width EntityId: no terminator needed
88            let bytes = entity_id.to_bytes();
89            if !descending {
90                Ok(bytes.to_vec())
91            } else {
92                Ok(bytes.into_iter().map(|b| 0xFFu8.wrapping_sub(b)).collect())
93            }
94        }
95        (Value::Object(bytes) | Value::Binary(bytes), ValueType::Binary | ValueType::Object) => {
96            if !descending {
97                // ASC: [escaped bytes][0x00] - terminator needed for variable-width
98                let mut out = Vec::with_capacity(bytes.len() + 1);
99                for &b in bytes.iter() {
100                    if b == 0x00 {
101                        out.push(0x00);
102                        out.push(0xFF);
103                    } else {
104                        out.push(b);
105                    }
106                }
107                out.push(0x00);
108                Ok(out)
109            } else {
110                // DESC: [inv(bytes) with 0xFF escaped as 0xFF 0x00][0xFF 0xFF]
111                let mut out = Vec::with_capacity(bytes.len() + 2);
112                for &b in bytes.iter() {
113                    let inv = 0xFFu8.wrapping_sub(b);
114                    if inv == 0xFF {
115                        out.push(0xFF);
116                        out.push(0x00);
117                    } else {
118                        out.push(inv);
119                    }
120                }
121                out.push(0xFF);
122                out.push(0xFF);
123                Ok(out)
124            }
125        }
126        // JSON: type-tagged encoding preserving original type (no cross-type casting)
127        (Value::Json(json), ValueType::Json) => Ok(encode_json_value(json, descending)),
128        _ => Err(IndexError::TypeMismatch(expected_type, ValueType::of(value))),
129    }
130}
131
132/// Encode JSON value with type tag prefix.
133/// Different types get different prefixes, so "9" (string) != 9 (int).
134fn encode_json_value(json: &serde_json::Value, descending: bool) -> Vec<u8> {
135    let (tag, payload) = match json {
136        serde_json::Value::Null => (JSON_TAG_NULL, vec![]),
137        serde_json::Value::Bool(b) => (JSON_TAG_BOOL, vec![if *b { 1 } else { 0 }]),
138        serde_json::Value::Number(n) => {
139            if let Some(i) = n.as_i64() {
140                // i64: fixed 8 bytes big-endian with sign flip for proper ordering
141                (JSON_TAG_INT, Value::I64(i).to_bytes())
142            } else if let Some(f) = n.as_f64() {
143                // f64: fixed 8 bytes with IEEE 754 ordering
144                (JSON_TAG_FLOAT, Value::F64(f).to_bytes())
145            } else {
146                // Fallback for very large numbers
147                (JSON_TAG_NULL, vec![])
148            }
149        }
150        serde_json::Value::String(s) => {
151            // Variable length: escape 0x00 bytes and add 0x00 terminator
152            let mut payload = Vec::with_capacity(s.len() + 1);
153            for &b in s.as_bytes() {
154                if b == 0x00 {
155                    payload.push(0x00);
156                    payload.push(0xFF);
157                } else {
158                    payload.push(b);
159                }
160            }
161            payload.push(0x00); // terminator
162            (JSON_TAG_STRING, payload)
163        }
164        // Objects and arrays are unsortable - encode as null
165        serde_json::Value::Object(_) | serde_json::Value::Array(_) => (JSON_TAG_NULL, vec![]),
166    };
167
168    if !descending {
169        let mut out = Vec::with_capacity(1 + payload.len());
170        out.push(tag);
171        out.extend(payload);
172        out
173    } else {
174        // DESC: invert tag and all payload bytes
175        let mut out = Vec::with_capacity(1 + payload.len());
176        out.push(0xFFu8.wrapping_sub(tag));
177        out.extend(payload.into_iter().map(|b| 0xFFu8.wrapping_sub(b)));
178        out
179    }
180}
181
182/// Type-aware encoding using KeySpec for validation and optimization
183/// TODO: Add NULL handling later
184pub fn encode_tuple_values_with_key_spec(values: &[Value], key_spec: &KeySpec) -> Result<Vec<u8>, IndexError> {
185    let mut out = Vec::new();
186    for (i, v) in values.iter().enumerate() {
187        if i >= key_spec.keyparts.len() {
188            break; // Don't encode more values than key spec defines
189        }
190        let keypart = &key_spec.keyparts[i];
191
192        // Use type-aware encoding without type tags
193        let bytes = encode_component_typed(v, keypart.value_type, keypart.direction.is_desc())?;
194        out.extend_from_slice(&bytes);
195    }
196    Ok(out)
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::value::Value;
203
204    #[test]
205    fn test_desc_ordering() {
206        let a = encode_component_typed(&Value::String("a".to_string()), ValueType::String, true).unwrap();
207        let b = encode_component_typed(&Value::String("b".to_string()), ValueType::String, true).unwrap();
208
209        // DESC: "a" should sort after "b" (reversed)
210        assert!(a > b);
211    }
212
213    #[test]
214    fn test_asc_ordering() {
215        let a = encode_component_typed(&Value::String("a".to_string()), ValueType::String, false).unwrap();
216        let b = encode_component_typed(&Value::String("b".to_string()), ValueType::String, false).unwrap();
217
218        // ASC: "a" should sort before "b"
219        assert!(a < b);
220    }
221}