Skip to main content

grc_20/codec/
value.rs

1//! Value encoding/decoding for GRC-20 binary format.
2//!
3//! Implements the wire format for property values (spec Section 6.5).
4
5use std::borrow::Cow;
6
7use crate::codec::primitives::{Reader, Writer};
8use crate::error::{DecodeError, EncodeError};
9use crate::limits::{MAX_BYTES_LEN, MAX_EMBEDDING_BYTES, MAX_EMBEDDING_DIMS, MAX_POSITION_LEN, MAX_STRING_LEN};
10use crate::model::{
11    DataType, DecimalMantissa, DictionaryBuilder, EmbeddingSubType, PropertyValue, Value,
12    WireDictionaries,
13};
14use crate::util::{
15    format_date_rfc3339, format_datetime_rfc3339, format_time_rfc3339,
16    parse_date_rfc3339, parse_datetime_rfc3339, parse_time_rfc3339,
17};
18
19// =============================================================================
20// DECODING
21// =============================================================================
22
23/// Decodes a Value from the reader based on the data type (zero-copy).
24pub fn decode_value<'a>(
25    reader: &mut Reader<'a>,
26    data_type: DataType,
27    dicts: &WireDictionaries,
28) -> Result<Value<'a>, DecodeError> {
29    match data_type {
30        DataType::Bool => decode_bool(reader),
31        DataType::Int64 => decode_int64(reader, dicts),
32        DataType::Float64 => decode_float64(reader, dicts),
33        DataType::Decimal => decode_decimal(reader, dicts),
34        DataType::Text => decode_text(reader, dicts),
35        DataType::Bytes => decode_bytes(reader),
36        DataType::Date => decode_date(reader),
37        DataType::Time => decode_time(reader),
38        DataType::Datetime => decode_datetime(reader),
39        DataType::Schedule => decode_schedule(reader),
40        DataType::Point => decode_point(reader),
41        DataType::Rect => decode_rect(reader),
42        DataType::Embedding => decode_embedding(reader),
43    }
44}
45
46fn decode_bool<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
47    let byte = reader.read_byte("bool")?;
48    match byte {
49        0x00 => Ok(Value::Bool(false)),
50        0x01 => Ok(Value::Bool(true)),
51        _ => Err(DecodeError::InvalidBool { value: byte }),
52    }
53}
54
55fn decode_int64<'a>(reader: &mut Reader<'a>, dicts: &WireDictionaries) -> Result<Value<'a>, DecodeError> {
56    let value = reader.read_signed_varint("int64")?;
57    let unit_index = reader.read_varint("int64.unit")? as usize;
58    let unit = if unit_index == 0 {
59        None
60    } else {
61        let idx = unit_index - 1;
62        if idx >= dicts.units.len() {
63            return Err(DecodeError::IndexOutOfBounds {
64                dict: "units",
65                index: unit_index,
66                size: dicts.units.len() + 1,
67            });
68        }
69        Some(dicts.units[idx])
70    };
71    Ok(Value::Int64 { value, unit })
72}
73
74fn decode_float64<'a>(reader: &mut Reader<'a>, dicts: &WireDictionaries) -> Result<Value<'a>, DecodeError> {
75    let value = reader.read_f64("float64")?;
76    let unit_index = reader.read_varint("float64.unit")? as usize;
77    let unit = if unit_index == 0 {
78        None
79    } else {
80        let idx = unit_index - 1;
81        if idx >= dicts.units.len() {
82            return Err(DecodeError::IndexOutOfBounds {
83                dict: "units",
84                index: unit_index,
85                size: dicts.units.len() + 1,
86            });
87        }
88        Some(dicts.units[idx])
89    };
90    Ok(Value::Float64 { value, unit })
91}
92
93fn decode_decimal<'a>(reader: &mut Reader<'a>, dicts: &WireDictionaries) -> Result<Value<'a>, DecodeError> {
94    let exponent = reader.read_signed_varint("decimal.exponent")? as i32;
95    let mantissa_type = reader.read_byte("decimal.mantissa_type")?;
96
97    let mantissa = match mantissa_type {
98        0x00 => {
99            let v = reader.read_signed_varint("decimal.mantissa")?;
100            DecimalMantissa::I64(v)
101        }
102        0x01 => {
103            let len = reader.read_varint("decimal.mantissa_len")? as usize;
104            let bytes = reader.read_bytes(len, "decimal.mantissa_bytes")?;
105
106            // Validate minimal encoding
107            if !bytes.is_empty() {
108                let first = bytes[0];
109                // Check for redundant sign extension
110                if bytes.len() > 1 {
111                    let second = bytes[1];
112                    if (first == 0x00 && (second & 0x80) == 0)
113                        || (first == 0xFF && (second & 0x80) != 0) {
114                        return Err(DecodeError::DecimalMantissaNotMinimal);
115                    }
116                }
117            }
118
119            DecimalMantissa::Big(Cow::Borrowed(bytes))
120        }
121        _ => {
122            return Err(DecodeError::MalformedEncoding {
123                context: "invalid decimal mantissa type"
124            });
125        }
126    };
127
128    // Validate normalization
129    match &mantissa {
130        DecimalMantissa::I64(v) => {
131            if *v == 0 {
132                if exponent != 0 {
133                    return Err(DecodeError::DecimalNotNormalized);
134                }
135            } else if *v % 10 == 0 {
136                return Err(DecodeError::DecimalNotNormalized);
137            }
138        }
139        DecimalMantissa::Big(bytes) => {
140            if is_big_mantissa_zero(bytes) {
141                if exponent != 0 {
142                    return Err(DecodeError::DecimalNotNormalized);
143                }
144            } else if is_big_mantissa_divisible_by_10(bytes) {
145                return Err(DecodeError::DecimalNotNormalized);
146            }
147        }
148    }
149
150    let unit_index = reader.read_varint("decimal.unit")? as usize;
151    let unit = if unit_index == 0 {
152        None
153    } else {
154        let idx = unit_index - 1;
155        if idx >= dicts.units.len() {
156            return Err(DecodeError::IndexOutOfBounds {
157                dict: "units",
158                index: unit_index,
159                size: dicts.units.len() + 1,
160            });
161        }
162        Some(dicts.units[idx])
163    };
164
165    Ok(Value::Decimal { exponent, mantissa, unit })
166}
167
168/// Checks if a big-endian two's complement mantissa represents zero.
169fn is_big_mantissa_zero(bytes: &[u8]) -> bool {
170    bytes.iter().all(|&b| b == 0)
171}
172
173/// Checks if a big-endian two's complement mantissa is divisible by 10.
174///
175/// A number is divisible by 10 if its remainder when divided by 10 is 0.
176/// For big-endian bytes, we compute: sum(byte[i] * 256^(n-1-i)) mod 10.
177/// Since 256 mod 10 = 6, we can compute iteratively: (carry * 6 + byte) mod 10.
178///
179/// For negative numbers (high bit set), we need to handle two's complement.
180fn is_big_mantissa_divisible_by_10(bytes: &[u8]) -> bool {
181    if bytes.is_empty() {
182        return true; // Zero is divisible by 10
183    }
184
185    // Check if negative (high bit set)
186    let is_negative = bytes[0] & 0x80 != 0;
187
188    if is_negative {
189        // For negative two's complement, compute the absolute value first
190        // by inverting bits and adding 1, then check divisibility
191        let abs_mod = twos_complement_abs_mod_10(bytes);
192        abs_mod == 0
193    } else {
194        // Positive: just compute mod 10 directly
195        // 256 mod 10 = 6, so we iterate: remainder = (remainder * 6 + byte) mod 10
196        let mut remainder = 0u32;
197        for &byte in bytes {
198            // remainder * 256 + byte, mod 10
199            // Since 256 = 25 * 10 + 6, we have: (r * 256) mod 10 = (r * 6) mod 10
200            remainder = (remainder * 6 + byte as u32) % 10;
201        }
202        remainder == 0
203    }
204}
205
206/// Computes |x| mod 10 for a negative two's complement number.
207fn twos_complement_abs_mod_10(bytes: &[u8]) -> u32 {
208    // Two's complement negation: invert all bits and add 1
209    // To get |x| mod 10, we compute (-x) mod 10
210    //
211    // For a two's complement negative number x (represented in bytes),
212    // -x = ~x + 1 (bit inversion plus one)
213    //
214    // We compute (inverted bytes) mod 10, then add 1 mod 10
215
216    // First, compute (inverted bytes as big-endian unsigned) mod 10
217    let mut remainder = 0u32;
218    for &byte in bytes {
219        let inverted = !byte;
220        remainder = (remainder * 6 + inverted as u32) % 10;
221    }
222
223    // Add 1 (for two's complement)
224    (remainder + 1) % 10
225}
226
227fn decode_text<'a>(reader: &mut Reader<'a>, dicts: &WireDictionaries) -> Result<Value<'a>, DecodeError> {
228    let value = reader.read_str(MAX_STRING_LEN, "text")?;
229    let lang_index = reader.read_varint("text.language")? as usize;
230
231    let language = if lang_index == 0 {
232        None
233    } else {
234        let idx = lang_index - 1;
235        if idx >= dicts.languages.len() {
236            return Err(DecodeError::IndexOutOfBounds {
237                dict: "languages",
238                index: lang_index,
239                size: dicts.languages.len() + 1, // +1 for index 0
240            });
241        }
242        Some(dicts.languages[idx])
243    };
244
245    Ok(Value::Text { value: Cow::Borrowed(value), language })
246}
247
248fn decode_bytes<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
249    let len = reader.read_varint("bytes.len")? as usize;
250    if len > MAX_BYTES_LEN {
251        return Err(DecodeError::LengthExceedsLimit {
252            field: "bytes",
253            len,
254            max: MAX_BYTES_LEN,
255        });
256    }
257    let bytes = reader.read_bytes(len, "bytes")?;
258    Ok(Value::Bytes(Cow::Borrowed(bytes)))
259}
260
261fn decode_date<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
262    // DATE: 6 bytes (int32 days + int16 offset_min), little-endian
263    let bytes = reader.read_bytes(6, "date")?;
264    let days = i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
265    let offset_min = i16::from_le_bytes([bytes[4], bytes[5]]);
266
267    // Validate offset_min range
268    if offset_min < -1440 || offset_min > 1440 {
269        return Err(DecodeError::MalformedEncoding {
270            context: "DATE offset_min outside range [-1440, +1440]",
271        });
272    }
273
274    // Format as RFC 3339 string
275    let value = format_date_rfc3339(days, offset_min);
276    Ok(Value::Date(Cow::Owned(value)))
277}
278
279fn decode_time<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
280    // TIME: 8 bytes (int48 time_micros + int16 offset_min), little-endian
281    let bytes = reader.read_bytes(8, "time")?;
282
283    // Read int48 as 6 bytes, sign-extend to i64
284    let time_micros_unsigned = u64::from_le_bytes([
285        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], 0, 0
286    ]);
287    // Sign-extend from 48 bits
288    let time_micros = if time_micros_unsigned & 0x8000_0000_0000 != 0 {
289        (time_micros_unsigned | 0xFFFF_0000_0000_0000) as i64
290    } else {
291        time_micros_unsigned as i64
292    };
293
294    let offset_min = i16::from_le_bytes([bytes[6], bytes[7]]);
295
296    // Validate time_micros range
297    if time_micros < 0 || time_micros > 86_399_999_999 {
298        return Err(DecodeError::MalformedEncoding {
299            context: "TIME time_micros outside range [0, 86399999999]",
300        });
301    }
302
303    // Validate offset_min range
304    if offset_min < -1440 || offset_min > 1440 {
305        return Err(DecodeError::MalformedEncoding {
306            context: "TIME offset_min outside range [-1440, +1440]",
307        });
308    }
309
310    // Format as RFC 3339 string
311    let value = format_time_rfc3339(time_micros, offset_min);
312    Ok(Value::Time(Cow::Owned(value)))
313}
314
315fn decode_datetime<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
316    // DATETIME: 10 bytes (int64 epoch_micros + int16 offset_min), little-endian
317    let bytes = reader.read_bytes(10, "datetime")?;
318    let epoch_micros = i64::from_le_bytes([
319        bytes[0], bytes[1], bytes[2], bytes[3],
320        bytes[4], bytes[5], bytes[6], bytes[7]
321    ]);
322    let offset_min = i16::from_le_bytes([bytes[8], bytes[9]]);
323
324    // Validate offset_min range
325    if offset_min < -1440 || offset_min > 1440 {
326        return Err(DecodeError::MalformedEncoding {
327            context: "DATETIME offset_min outside range [-1440, +1440]",
328        });
329    }
330
331    // Format as RFC 3339 string
332    let value = format_datetime_rfc3339(epoch_micros, offset_min);
333    Ok(Value::Datetime(Cow::Owned(value)))
334}
335
336fn decode_schedule<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
337    let value = reader.read_str(MAX_STRING_LEN, "schedule")?;
338    // RFC 5545 iCalendar format - basic validation
339    // Full validation would require a complete iCalendar parser
340    Ok(Value::Schedule(Cow::Borrowed(value)))
341}
342
343fn decode_point<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
344    let ordinate_count = reader.read_byte("point.ordinate_count")?;
345
346    if ordinate_count != 2 && ordinate_count != 3 {
347        return Err(DecodeError::MalformedEncoding {
348            context: "POINT ordinate_count must be 2 or 3",
349        });
350    }
351
352    // Read in wire order: latitude, longitude, altitude (optional)
353    let lat = reader.read_f64("point.lat")?;
354    let lon = reader.read_f64("point.lon")?;
355    let alt = if ordinate_count == 3 {
356        Some(reader.read_f64("point.alt")?)
357    } else {
358        None
359    };
360
361    // Validate bounds
362    if !(-90.0..=90.0).contains(&lat) {
363        return Err(DecodeError::LatitudeOutOfRange { lat });
364    }
365    if !(-180.0..=180.0).contains(&lon) {
366        return Err(DecodeError::LongitudeOutOfRange { lon });
367    }
368    if lat.is_nan() || lon.is_nan() {
369        return Err(DecodeError::FloatIsNan);
370    }
371    if let Some(a) = alt {
372        if a.is_nan() {
373            return Err(DecodeError::FloatIsNan);
374        }
375    }
376
377    Ok(Value::Point { lat, lon, alt })
378}
379
380fn decode_rect<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
381    // RECT: 32 bytes (4 x float64), little-endian
382    // Wire order: min_lat, min_lon, max_lat, max_lon
383    let min_lat = reader.read_f64("rect.min_lat")?;
384    let min_lon = reader.read_f64("rect.min_lon")?;
385    let max_lat = reader.read_f64("rect.max_lat")?;
386    let max_lon = reader.read_f64("rect.max_lon")?;
387
388    // Validate bounds
389    if !(-90.0..=90.0).contains(&min_lat) || !(-90.0..=90.0).contains(&max_lat) {
390        return Err(DecodeError::LatitudeOutOfRange { lat: if !(-90.0..=90.0).contains(&min_lat) { min_lat } else { max_lat } });
391    }
392    if !(-180.0..=180.0).contains(&min_lon) || !(-180.0..=180.0).contains(&max_lon) {
393        return Err(DecodeError::LongitudeOutOfRange { lon: if !(-180.0..=180.0).contains(&min_lon) { min_lon } else { max_lon } });
394    }
395    if min_lat.is_nan() || min_lon.is_nan() || max_lat.is_nan() || max_lon.is_nan() {
396        return Err(DecodeError::FloatIsNan);
397    }
398
399    Ok(Value::Rect { min_lat, min_lon, max_lat, max_lon })
400}
401
402fn decode_embedding<'a>(reader: &mut Reader<'a>) -> Result<Value<'a>, DecodeError> {
403    let sub_type_byte = reader.read_byte("embedding.sub_type")?;
404    let sub_type = EmbeddingSubType::from_u8(sub_type_byte)
405        .ok_or(DecodeError::InvalidEmbeddingSubType { sub_type: sub_type_byte })?;
406
407    let dims = reader.read_varint("embedding.dims")? as usize;
408    if dims > MAX_EMBEDDING_DIMS {
409        return Err(DecodeError::LengthExceedsLimit {
410            field: "embedding.dims",
411            len: dims,
412            max: MAX_EMBEDDING_DIMS,
413        });
414    }
415
416    let expected_bytes = sub_type.bytes_for_dims(dims);
417    if expected_bytes > MAX_EMBEDDING_BYTES {
418        return Err(DecodeError::LengthExceedsLimit {
419            field: "embedding.data",
420            len: expected_bytes,
421            max: MAX_EMBEDDING_BYTES,
422        });
423    }
424
425    let data = reader.read_bytes(expected_bytes, "embedding.data")?;
426
427    // Validate no NaN in float32 embeddings
428    if sub_type == EmbeddingSubType::Float32 {
429        for chunk in data.chunks_exact(4) {
430            let f = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
431            if f.is_nan() {
432                return Err(DecodeError::FloatIsNan);
433            }
434        }
435    }
436
437    // Validate binary embedding has zeros in unused bits
438    if sub_type == EmbeddingSubType::Binary && dims % 8 != 0 {
439        let last_byte = data[data.len() - 1];
440        let unused_bits = 8 - (dims % 8);
441        let mask = !((1u8 << (8 - unused_bits)) - 1);
442        if last_byte & mask != 0 {
443            return Err(DecodeError::MalformedEncoding {
444                context: "binary embedding has non-zero unused bits",
445            });
446        }
447    }
448
449    Ok(Value::Embedding { sub_type, dims, data: Cow::Borrowed(data) })
450}
451
452/// Decodes a PropertyValue (property index + value + optional language).
453pub fn decode_property_value<'a>(
454    reader: &mut Reader<'a>,
455    dicts: &WireDictionaries,
456) -> Result<PropertyValue<'a>, DecodeError> {
457    let prop_index = reader.read_varint("property")? as usize;
458    if prop_index >= dicts.properties.len() {
459        return Err(DecodeError::IndexOutOfBounds {
460            dict: "properties",
461            index: prop_index,
462            size: dicts.properties.len(),
463        });
464    }
465
466    let (property, data_type) = dicts.properties[prop_index];
467    let value = decode_value(reader, data_type, dicts)?;
468
469    Ok(PropertyValue { property, value })
470}
471
472// =============================================================================
473// ENCODING
474// =============================================================================
475
476/// Encodes a Value to the writer.
477pub fn encode_value(
478    writer: &mut Writer,
479    value: &Value<'_>,
480    dict_builder: &mut DictionaryBuilder,
481) -> Result<(), EncodeError> {
482    match value {
483        Value::Bool(v) => {
484            writer.write_byte(if *v { 0x01 } else { 0x00 });
485        }
486        Value::Int64 { value, unit } => {
487            writer.write_signed_varint(*value);
488            let unit_index = dict_builder.add_unit(*unit);
489            writer.write_varint(unit_index as u64);
490        }
491        Value::Float64 { value, unit } => {
492            if value.is_nan() {
493                return Err(EncodeError::FloatIsNan);
494            }
495            writer.write_f64(*value);
496            let unit_index = dict_builder.add_unit(*unit);
497            writer.write_varint(unit_index as u64);
498        }
499        Value::Decimal { exponent, mantissa, unit } => {
500            encode_decimal(writer, *exponent, mantissa)?;
501            let unit_index = dict_builder.add_unit(*unit);
502            writer.write_varint(unit_index as u64);
503        }
504        Value::Text { value, language } => {
505            writer.write_string(value);
506            let lang_index = dict_builder.add_language(*language);
507            writer.write_varint(lang_index as u64);
508        }
509        Value::Bytes(bytes) => {
510            writer.write_bytes_prefixed(bytes);
511        }
512        Value::Date(s) => {
513            // Parse RFC 3339 date string
514            let (days, offset_min) = parse_date_rfc3339(s).map_err(|_| EncodeError::InvalidInput {
515                context: "Invalid RFC 3339 date format",
516            })?;
517            // DATE: 6 bytes (int32 days + int16 offset_min), little-endian
518            writer.write_bytes(&days.to_le_bytes());
519            writer.write_bytes(&offset_min.to_le_bytes());
520        }
521        Value::Time(s) => {
522            // Parse RFC 3339 time string
523            let (time_micros, offset_min) = parse_time_rfc3339(s).map_err(|_| EncodeError::InvalidInput {
524                context: "Invalid RFC 3339 time format",
525            })?;
526            // Validate time_micros range (should already be validated by parser)
527            if time_micros < 0 || time_micros > 86_399_999_999 {
528                return Err(EncodeError::InvalidInput {
529                    context: "TIME time_micros outside range [0, 86399999999]",
530                });
531            }
532            // TIME: 8 bytes (int48 time_micros + int16 offset_min), little-endian
533            // Write int48 as 6 bytes
534            let time_bytes = time_micros.to_le_bytes();
535            writer.write_bytes(&time_bytes[0..6]);
536            writer.write_bytes(&offset_min.to_le_bytes());
537        }
538        Value::Datetime(s) => {
539            // Parse RFC 3339 datetime string
540            let (epoch_micros, offset_min) = parse_datetime_rfc3339(s).map_err(|_| EncodeError::InvalidInput {
541                context: "Invalid RFC 3339 datetime format",
542            })?;
543            // DATETIME: 10 bytes (int64 epoch_micros + int16 offset_min), little-endian
544            writer.write_bytes(&epoch_micros.to_le_bytes());
545            writer.write_bytes(&offset_min.to_le_bytes());
546        }
547        Value::Schedule(s) => {
548            // RFC 5545 iCalendar format
549            writer.write_string(s);
550        }
551        Value::Point { lat, lon, alt } => {
552            if *lat < -90.0 || *lat > 90.0 {
553                return Err(EncodeError::LatitudeOutOfRange { lat: *lat });
554            }
555            if *lon < -180.0 || *lon > 180.0 {
556                return Err(EncodeError::LongitudeOutOfRange { lon: *lon });
557            }
558            if lat.is_nan() || lon.is_nan() {
559                return Err(EncodeError::FloatIsNan);
560            }
561            if let Some(a) = alt {
562                if a.is_nan() {
563                    return Err(EncodeError::FloatIsNan);
564                }
565            }
566            // Write ordinate_count: 2 for 2D, 3 for 3D
567            let ordinate_count = if alt.is_some() { 3u8 } else { 2u8 };
568            writer.write_byte(ordinate_count);
569            // Write in wire order: latitude, longitude, altitude (optional)
570            writer.write_f64(*lat);
571            writer.write_f64(*lon);
572            if let Some(a) = alt {
573                writer.write_f64(*a);
574            }
575        }
576        Value::Rect { min_lat, min_lon, max_lat, max_lon } => {
577            if *min_lat < -90.0 || *min_lat > 90.0 || *max_lat < -90.0 || *max_lat > 90.0 {
578                return Err(EncodeError::LatitudeOutOfRange { lat: if *min_lat < -90.0 || *min_lat > 90.0 { *min_lat } else { *max_lat } });
579            }
580            if *min_lon < -180.0 || *min_lon > 180.0 || *max_lon < -180.0 || *max_lon > 180.0 {
581                return Err(EncodeError::LongitudeOutOfRange { lon: if *min_lon < -180.0 || *min_lon > 180.0 { *min_lon } else { *max_lon } });
582            }
583            if min_lat.is_nan() || min_lon.is_nan() || max_lat.is_nan() || max_lon.is_nan() {
584                return Err(EncodeError::FloatIsNan);
585            }
586            // RECT: 32 bytes (4 x float64), little-endian
587            // Wire order: min_lat, min_lon, max_lat, max_lon
588            writer.write_f64(*min_lat);
589            writer.write_f64(*min_lon);
590            writer.write_f64(*max_lat);
591            writer.write_f64(*max_lon);
592        }
593        Value::Embedding { sub_type, dims, data } => {
594            let expected = sub_type.bytes_for_dims(*dims);
595            if data.len() != expected {
596                return Err(EncodeError::EmbeddingDimensionMismatch {
597                    sub_type: *sub_type as u8,
598                    dims: *dims,
599                    data_len: data.len(),
600                });
601            }
602            // Check for NaN in float32
603            if *sub_type == EmbeddingSubType::Float32 {
604                for chunk in data.chunks_exact(4) {
605                    let f = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
606                    if f.is_nan() {
607                        return Err(EncodeError::FloatIsNan);
608                    }
609                }
610            }
611            writer.write_byte(*sub_type as u8);
612            writer.write_varint(*dims as u64);
613            writer.write_bytes(data);
614        }
615    }
616    Ok(())
617}
618
619fn encode_decimal(
620    writer: &mut Writer,
621    exponent: i32,
622    mantissa: &DecimalMantissa<'_>,
623) -> Result<(), EncodeError> {
624    // Validate normalization
625    match mantissa {
626        DecimalMantissa::I64(v) => {
627            if *v == 0 {
628                if exponent != 0 {
629                    return Err(EncodeError::DecimalNotNormalized);
630                }
631            } else if *v % 10 == 0 {
632                return Err(EncodeError::DecimalNotNormalized);
633            }
634        }
635        DecimalMantissa::Big(bytes) => {
636            if is_big_mantissa_zero(bytes) {
637                if exponent != 0 {
638                    return Err(EncodeError::DecimalNotNormalized);
639                }
640            } else if is_big_mantissa_divisible_by_10(bytes) {
641                return Err(EncodeError::DecimalNotNormalized);
642            }
643        }
644    }
645
646    writer.write_signed_varint(exponent as i64);
647
648    match mantissa {
649        DecimalMantissa::I64(v) => {
650            writer.write_byte(0x00);
651            writer.write_signed_varint(*v);
652        }
653        DecimalMantissa::Big(bytes) => {
654            writer.write_byte(0x01);
655            writer.write_varint(bytes.len() as u64);
656            writer.write_bytes(bytes);
657        }
658    }
659
660    Ok(())
661}
662
663/// Encodes a PropertyValue (property index + value + optional language).
664pub fn encode_property_value(
665    writer: &mut Writer,
666    pv: &PropertyValue<'_>,
667    dict_builder: &mut DictionaryBuilder,
668    data_type: DataType,
669) -> Result<(), EncodeError> {
670    let prop_index = dict_builder.add_property(pv.property, data_type);
671    writer.write_varint(prop_index as u64);
672    encode_value(writer, &pv.value, dict_builder)?;
673    Ok(())
674}
675
676/// Validates a position string according to spec rules.
677pub fn validate_position(pos: &str) -> Result<(), EncodeError> {
678    if pos.len() > MAX_POSITION_LEN {
679        return Err(EncodeError::PositionTooLong);
680    }
681    for c in pos.chars() {
682        if !c.is_ascii_alphanumeric() {
683            return Err(EncodeError::InvalidPositionChar);
684        }
685    }
686    Ok(())
687}
688
689/// Decodes a position string with validation (zero-copy).
690pub fn decode_position<'a>(reader: &mut Reader<'a>) -> Result<Cow<'a, str>, DecodeError> {
691    let pos = reader.read_str(MAX_POSITION_LEN, "position")?;
692    for c in pos.chars() {
693        if !c.is_ascii_alphanumeric() {
694            return Err(DecodeError::InvalidPositionChar { char: c });
695        }
696    }
697    Ok(Cow::Borrowed(pos))
698}
699
700#[cfg(test)]
701mod tests {
702    use super::*;
703
704    #[test]
705    fn test_bool_roundtrip() {
706        for v in [true, false] {
707            let value = Value::Bool(v);
708            let dicts = WireDictionaries::default();
709            let mut dict_builder = DictionaryBuilder::new();
710
711            let mut writer = Writer::new();
712            encode_value(&mut writer, &value, &mut dict_builder).unwrap();
713
714            let mut reader = Reader::new(writer.as_bytes());
715            let decoded = decode_value(&mut reader, DataType::Bool, &dicts).unwrap();
716
717            assert_eq!(value, decoded);
718        }
719    }
720
721    #[test]
722    fn test_int64_roundtrip() {
723        for v in [0i64, 1, -1, i64::MAX, i64::MIN, 12345678] {
724            let value = Value::Int64 { value: v, unit: None };
725            let mut dict_builder = DictionaryBuilder::new();
726
727            let mut writer = Writer::new();
728            encode_value(&mut writer, &value, &mut dict_builder).unwrap();
729
730            let dicts = dict_builder.build();
731            let mut reader = Reader::new(writer.as_bytes());
732            let decoded = decode_value(&mut reader, DataType::Int64, &dicts).unwrap();
733
734            assert_eq!(value, decoded);
735        }
736    }
737
738    #[test]
739    fn test_float64_roundtrip() {
740        for v in [0.0, 1.0, -1.0, f64::INFINITY, f64::NEG_INFINITY, 3.14159] {
741            let value = Value::Float64 { value: v, unit: None };
742            let mut dict_builder = DictionaryBuilder::new();
743
744            let mut writer = Writer::new();
745            encode_value(&mut writer, &value, &mut dict_builder).unwrap();
746
747            let dicts = dict_builder.build();
748            let mut reader = Reader::new(writer.as_bytes());
749            let decoded = decode_value(&mut reader, DataType::Float64, &dicts).unwrap();
750
751            assert_eq!(value, decoded);
752        }
753    }
754
755    #[test]
756    fn test_text_roundtrip() {
757        let value = Value::Text {
758            value: Cow::Owned("hello world".to_string()),
759            language: None,
760        };
761        let mut dict_builder = DictionaryBuilder::new();
762
763        let mut writer = Writer::new();
764        encode_value(&mut writer, &value, &mut dict_builder).unwrap();
765
766        // Build dicts for decoding
767        let decode_dicts = dict_builder.build();
768
769        let mut reader = Reader::new(writer.as_bytes());
770        let decoded = decode_value(&mut reader, DataType::Text, &decode_dicts).unwrap();
771
772        // Compare inner values since one is Owned and one is Borrowed
773        match (&value, &decoded) {
774            (Value::Text { value: v1, language: l1 }, Value::Text { value: v2, language: l2 }) => {
775                assert_eq!(v1.as_ref(), v2.as_ref());
776                assert_eq!(l1, l2);
777            }
778            _ => panic!("expected Text values"),
779        }
780    }
781
782    #[test]
783    fn test_point_roundtrip() {
784        // 2D point (no altitude)
785        let value = Value::Point { lat: 37.7749, lon: -122.4194, alt: None };
786        let dicts = WireDictionaries::default();
787        let mut dict_builder = DictionaryBuilder::new();
788
789        let mut writer = Writer::new();
790        encode_value(&mut writer, &value, &mut dict_builder).unwrap();
791
792        let mut reader = Reader::new(writer.as_bytes());
793        let decoded = decode_value(&mut reader, DataType::Point, &dicts).unwrap();
794
795        assert_eq!(value, decoded);
796
797        // 3D point (with altitude)
798        let value_3d = Value::Point { lat: 37.7749, lon: -122.4194, alt: Some(100.0) };
799        let mut dict_builder = DictionaryBuilder::new();
800
801        let mut writer = Writer::new();
802        encode_value(&mut writer, &value_3d, &mut dict_builder).unwrap();
803
804        let mut reader = Reader::new(writer.as_bytes());
805        let decoded_3d = decode_value(&mut reader, DataType::Point, &dicts).unwrap();
806
807        assert_eq!(value_3d, decoded_3d);
808    }
809
810    #[test]
811    fn test_point_validation() {
812        // Latitude out of range
813        let value = Value::Point { lat: 91.0, lon: 0.0, alt: None };
814        let mut dict_builder = DictionaryBuilder::new();
815        let mut writer = Writer::new();
816        let result = encode_value(&mut writer, &value, &mut dict_builder);
817        assert!(result.is_err());
818
819        // Longitude out of range
820        let value = Value::Point { lat: 0.0, lon: 181.0, alt: None };
821        let mut dict_builder = DictionaryBuilder::new();
822        let mut writer = Writer::new();
823        let result = encode_value(&mut writer, &value, &mut dict_builder);
824        assert!(result.is_err());
825
826        // NaN in altitude
827        let value = Value::Point { lat: 0.0, lon: 0.0, alt: Some(f64::NAN) };
828        let mut dict_builder = DictionaryBuilder::new();
829        let mut writer = Writer::new();
830        let result = encode_value(&mut writer, &value, &mut dict_builder);
831        assert!(result.is_err());
832    }
833
834    #[test]
835    fn test_rect_roundtrip() {
836        let value = Value::Rect {
837            min_lat: 24.5,
838            min_lon: -125.0,
839            max_lat: 49.4,
840            max_lon: -66.9,
841        };
842        let dicts = WireDictionaries::default();
843        let mut dict_builder = DictionaryBuilder::new();
844
845        let mut writer = Writer::new();
846        encode_value(&mut writer, &value, &mut dict_builder).unwrap();
847
848        let mut reader = Reader::new(writer.as_bytes());
849        let decoded = decode_value(&mut reader, DataType::Rect, &dicts).unwrap();
850
851        assert_eq!(value, decoded);
852    }
853
854    #[test]
855    fn test_rect_validation() {
856        // Latitude out of range
857        let value = Value::Rect { min_lat: -91.0, min_lon: 0.0, max_lat: 0.0, max_lon: 0.0 };
858        let mut dict_builder = DictionaryBuilder::new();
859        let mut writer = Writer::new();
860        let result = encode_value(&mut writer, &value, &mut dict_builder);
861        assert!(result.is_err());
862
863        let value = Value::Rect { min_lat: 0.0, min_lon: 0.0, max_lat: 91.0, max_lon: 0.0 };
864        let mut dict_builder = DictionaryBuilder::new();
865        let mut writer = Writer::new();
866        let result = encode_value(&mut writer, &value, &mut dict_builder);
867        assert!(result.is_err());
868
869        // Longitude out of range
870        let value = Value::Rect { min_lat: 0.0, min_lon: -181.0, max_lat: 0.0, max_lon: 0.0 };
871        let mut dict_builder = DictionaryBuilder::new();
872        let mut writer = Writer::new();
873        let result = encode_value(&mut writer, &value, &mut dict_builder);
874        assert!(result.is_err());
875
876        let value = Value::Rect { min_lat: 0.0, min_lon: 0.0, max_lat: 0.0, max_lon: 181.0 };
877        let mut dict_builder = DictionaryBuilder::new();
878        let mut writer = Writer::new();
879        let result = encode_value(&mut writer, &value, &mut dict_builder);
880        assert!(result.is_err());
881
882        // NaN not allowed
883        let value = Value::Rect { min_lat: f64::NAN, min_lon: 0.0, max_lat: 0.0, max_lon: 0.0 };
884        let mut dict_builder = DictionaryBuilder::new();
885        let mut writer = Writer::new();
886        let result = encode_value(&mut writer, &value, &mut dict_builder);
887        assert!(result.is_err());
888    }
889
890    #[test]
891    fn test_schedule_roundtrip() {
892        let dicts = WireDictionaries::default();
893        let mut dict_builder = DictionaryBuilder::new();
894
895        // Simple iCalendar event (single occurrence)
896        let value = Value::Schedule(Cow::Owned("BEGIN:VEVENT\r\nDTSTART:20240315T090000Z\r\nDTEND:20240315T100000Z\r\nEND:VEVENT".to_string()));
897
898        let mut writer = Writer::new();
899        encode_value(&mut writer, &value, &mut dict_builder).unwrap();
900
901        let mut reader = Reader::new(writer.as_bytes());
902        let decoded = decode_value(&mut reader, DataType::Schedule, &dicts).unwrap();
903
904        match (&value, &decoded) {
905            (Value::Schedule(s1), Value::Schedule(s2)) => {
906                assert_eq!(s1.as_ref(), s2.as_ref());
907            }
908            _ => panic!("expected Schedule values"),
909        }
910    }
911
912    #[test]
913    fn test_embedding_roundtrip() {
914        let value = Value::Embedding {
915            sub_type: EmbeddingSubType::Float32,
916            dims: 4,
917            data: Cow::Owned(vec![0u8; 16]), // 4 dims * 4 bytes
918        };
919        let dicts = WireDictionaries::default();
920        let mut dict_builder = DictionaryBuilder::new();
921
922        let mut writer = Writer::new();
923        encode_value(&mut writer, &value, &mut dict_builder).unwrap();
924
925        let mut reader = Reader::new(writer.as_bytes());
926        let decoded = decode_value(&mut reader, DataType::Embedding, &dicts).unwrap();
927
928        // Compare inner values since one is Owned and one is Borrowed
929        match (&value, &decoded) {
930            (
931                Value::Embedding { sub_type: s1, dims: d1, data: data1 },
932                Value::Embedding { sub_type: s2, dims: d2, data: data2 },
933            ) => {
934                assert_eq!(s1, s2);
935                assert_eq!(d1, d2);
936                assert_eq!(data1.as_ref(), data2.as_ref());
937            }
938            _ => panic!("expected Embedding values"),
939        }
940    }
941
942    #[test]
943    fn test_decimal_normalized() {
944        // Valid: 12.34 = 1234 * 10^-2
945        let valid = Value::Decimal {
946            exponent: -2,
947            mantissa: DecimalMantissa::I64(1234),
948            unit: None,
949        };
950        let mut dict_builder = DictionaryBuilder::new();
951        let mut writer = Writer::new();
952        assert!(encode_value(&mut writer, &valid, &mut dict_builder).is_ok());
953
954        // Invalid: has trailing zeros
955        let invalid = Value::Decimal {
956            exponent: -2,
957            mantissa: DecimalMantissa::I64(1230),
958            unit: None,
959        };
960        let mut dict_builder = DictionaryBuilder::new();
961        let mut writer = Writer::new();
962        assert!(encode_value(&mut writer, &invalid, &mut dict_builder).is_err());
963    }
964
965    #[test]
966    fn test_date_roundtrip() {
967        let dicts = WireDictionaries::default();
968        let mut dict_builder = DictionaryBuilder::new();
969
970        // Test various date values (RFC 3339 format)
971        let test_cases = [
972            "1970-01-01Z",        // Unix epoch, UTC
973            "2024-03-15Z",        // March 15, 2024 UTC
974            "2024-03-15+05:30",   // March 15, 2024 +05:30
975            "2024-03-15-08:00",   // March 15, 2024 -08:00
976        ];
977
978        for date_str in test_cases {
979            let value = Value::Date(Cow::Owned(date_str.to_string()));
980
981            let mut writer = Writer::new();
982            encode_value(&mut writer, &value, &mut dict_builder).unwrap();
983
984            let mut reader = Reader::new(writer.as_bytes());
985            let decoded = decode_value(&mut reader, DataType::Date, &dicts).unwrap();
986
987            // Compare the string values
988            match (&value, &decoded) {
989                (Value::Date(v1), Value::Date(v2)) => {
990                    assert_eq!(v1.as_ref(), v2.as_ref(), "Roundtrip failed for {}", date_str);
991                }
992                _ => panic!("expected Date values"),
993            }
994        }
995    }
996
997    #[test]
998    fn test_time_roundtrip() {
999        let dicts = WireDictionaries::default();
1000        let mut dict_builder = DictionaryBuilder::new();
1001
1002        // Test various time values (RFC 3339 format)
1003        let test_cases = [
1004            "00:00:00Z",              // Midnight UTC
1005            "14:30:00Z",              // 14:30:00 UTC
1006            "14:30:00.5+05:30",       // 14:30:00.500 +05:30
1007            "23:59:59.999999Z",       // 23:59:59.999999 UTC
1008            "00:00:00-05:00",         // Midnight -05:00
1009        ];
1010
1011        for time_str in test_cases {
1012            let value = Value::Time(Cow::Owned(time_str.to_string()));
1013
1014            let mut writer = Writer::new();
1015            encode_value(&mut writer, &value, &mut dict_builder).unwrap();
1016
1017            let mut reader = Reader::new(writer.as_bytes());
1018            let decoded = decode_value(&mut reader, DataType::Time, &dicts).unwrap();
1019
1020            // Compare the string values
1021            match (&value, &decoded) {
1022                (Value::Time(v1), Value::Time(v2)) => {
1023                    assert_eq!(v1.as_ref(), v2.as_ref(), "Roundtrip failed for {}", time_str);
1024                }
1025                _ => panic!("expected Time values"),
1026            }
1027        }
1028    }
1029
1030    #[test]
1031    fn test_datetime_roundtrip() {
1032        let dicts = WireDictionaries::default();
1033        let mut dict_builder = DictionaryBuilder::new();
1034
1035        // Test various datetime values (RFC 3339 format)
1036        let test_cases = [
1037            "1970-01-01T00:00:00Z",          // Unix epoch UTC
1038            "2024-03-15T14:30:00Z",          // 2024-03-15T14:30:00Z
1039            "2024-03-15T14:30:00+05:30",     // 2024-03-15T14:30:00+05:30
1040            "2024-03-15T14:30:00.123456Z",   // With microseconds
1041        ];
1042
1043        for datetime_str in test_cases {
1044            let value = Value::Datetime(Cow::Owned(datetime_str.to_string()));
1045
1046            let mut writer = Writer::new();
1047            encode_value(&mut writer, &value, &mut dict_builder).unwrap();
1048
1049            let mut reader = Reader::new(writer.as_bytes());
1050            let decoded = decode_value(&mut reader, DataType::Datetime, &dicts).unwrap();
1051
1052            // Compare the string values
1053            match (&value, &decoded) {
1054                (Value::Datetime(v1), Value::Datetime(v2)) => {
1055                    assert_eq!(v1.as_ref(), v2.as_ref(), "Roundtrip failed for {}", datetime_str);
1056                }
1057                _ => panic!("expected Datetime values"),
1058            }
1059        }
1060    }
1061
1062    #[test]
1063    fn test_date_validation() {
1064        let mut dict_builder = DictionaryBuilder::new();
1065
1066        // DATE should reject invalid RFC 3339 format
1067        let invalid = Value::Date(Cow::Borrowed("not-a-date"));
1068        let mut writer = Writer::new();
1069        assert!(encode_value(&mut writer, &invalid, &mut dict_builder).is_err());
1070
1071        // Invalid month
1072        let invalid_month = Value::Date(Cow::Borrowed("2024-13-01"));
1073        let mut writer = Writer::new();
1074        assert!(encode_value(&mut writer, &invalid_month, &mut dict_builder).is_err());
1075    }
1076
1077    #[test]
1078    fn test_time_validation() {
1079        let mut dict_builder = DictionaryBuilder::new();
1080
1081        // TIME should reject invalid RFC 3339 format
1082        let invalid = Value::Time(Cow::Borrowed("not:a:time"));
1083        let mut writer = Writer::new();
1084        assert!(encode_value(&mut writer, &invalid, &mut dict_builder).is_err());
1085
1086        // Invalid hours
1087        let invalid_hours = Value::Time(Cow::Borrowed("24:00:00"));
1088        let mut writer = Writer::new();
1089        assert!(encode_value(&mut writer, &invalid_hours, &mut dict_builder).is_err());
1090
1091        // Invalid minutes
1092        let invalid_minutes = Value::Time(Cow::Borrowed("14:60:00"));
1093        let mut writer = Writer::new();
1094        assert!(encode_value(&mut writer, &invalid_minutes, &mut dict_builder).is_err());
1095    }
1096
1097    #[test]
1098    fn test_datetime_validation() {
1099        let mut dict_builder = DictionaryBuilder::new();
1100
1101        // DATETIME should reject invalid RFC 3339 format
1102        let invalid = Value::Datetime(Cow::Borrowed("not-a-datetime"));
1103        let mut writer = Writer::new();
1104        assert!(encode_value(&mut writer, &invalid, &mut dict_builder).is_err());
1105
1106        // Invalid date part
1107        let invalid_date = Value::Datetime(Cow::Borrowed("2024-13-01T00:00:00Z"));
1108        let mut writer = Writer::new();
1109        assert!(encode_value(&mut writer, &invalid_date, &mut dict_builder).is_err());
1110
1111        // Invalid time part
1112        let invalid_time = Value::Datetime(Cow::Borrowed("2024-01-01T25:00:00Z"));
1113        let mut writer = Writer::new();
1114        assert!(encode_value(&mut writer, &invalid_time, &mut dict_builder).is_err());
1115    }
1116
1117    #[test]
1118    fn test_big_decimal_normalization_helpers() {
1119        // Test is_big_mantissa_zero
1120        assert!(is_big_mantissa_zero(&[]));
1121        assert!(is_big_mantissa_zero(&[0]));
1122        assert!(is_big_mantissa_zero(&[0, 0, 0]));
1123        assert!(!is_big_mantissa_zero(&[1]));
1124        assert!(!is_big_mantissa_zero(&[0, 1]));
1125
1126        // Test is_big_mantissa_divisible_by_10 for positive numbers
1127        // 10 in big-endian = [0x0A]
1128        assert!(is_big_mantissa_divisible_by_10(&[0x0A])); // 10
1129        assert!(is_big_mantissa_divisible_by_10(&[0x14])); // 20
1130        assert!(is_big_mantissa_divisible_by_10(&[0x64])); // 100
1131        assert!(is_big_mantissa_divisible_by_10(&[0x01, 0xF4])); // 500
1132
1133        assert!(!is_big_mantissa_divisible_by_10(&[0x01])); // 1
1134        assert!(!is_big_mantissa_divisible_by_10(&[0x07])); // 7
1135        assert!(!is_big_mantissa_divisible_by_10(&[0x0B])); // 11
1136        assert!(!is_big_mantissa_divisible_by_10(&[0x15])); // 21
1137
1138        // Test negative numbers (two's complement)
1139        // -10 in two's complement (1 byte): 0xF6
1140        assert!(is_big_mantissa_divisible_by_10(&[0xF6])); // -10
1141        // -20 in two's complement (1 byte): 0xEC
1142        assert!(is_big_mantissa_divisible_by_10(&[0xEC])); // -20
1143        // -1 in two's complement (1 byte): 0xFF
1144        assert!(!is_big_mantissa_divisible_by_10(&[0xFF])); // -1
1145        // -7 in two's complement (1 byte): 0xF9
1146        assert!(!is_big_mantissa_divisible_by_10(&[0xF9])); // -7
1147    }
1148
1149    #[test]
1150    fn test_big_decimal_normalization_encode() {
1151        // Valid: mantissa not divisible by 10
1152        let valid = Value::Decimal {
1153            exponent: 0,
1154            mantissa: DecimalMantissa::Big(Cow::Owned(vec![0x07])), // 7
1155            unit: None,
1156        };
1157        let mut dict_builder = DictionaryBuilder::new();
1158        let mut writer = Writer::new();
1159        assert!(encode_value(&mut writer, &valid, &mut dict_builder).is_ok());
1160
1161        // Invalid: mantissa is 10 (divisible by 10)
1162        let invalid = Value::Decimal {
1163            exponent: 0,
1164            mantissa: DecimalMantissa::Big(Cow::Owned(vec![0x0A])), // 10
1165            unit: None,
1166        };
1167        let mut dict_builder = DictionaryBuilder::new();
1168        let mut writer = Writer::new();
1169        assert!(encode_value(&mut writer, &invalid, &mut dict_builder).is_err());
1170
1171        // Invalid: zero mantissa with non-zero exponent
1172        let invalid_zero = Value::Decimal {
1173            exponent: 1,
1174            mantissa: DecimalMantissa::Big(Cow::Owned(vec![0x00])),
1175            unit: None,
1176        };
1177        let mut dict_builder = DictionaryBuilder::new();
1178        let mut writer = Writer::new();
1179        assert!(encode_value(&mut writer, &invalid_zero, &mut dict_builder).is_err());
1180
1181        // Valid: zero mantissa with zero exponent
1182        let valid_zero = Value::Decimal {
1183            exponent: 0,
1184            mantissa: DecimalMantissa::Big(Cow::Owned(vec![0x00])),
1185            unit: None,
1186        };
1187        let mut dict_builder = DictionaryBuilder::new();
1188        let mut writer = Writer::new();
1189        assert!(encode_value(&mut writer, &valid_zero, &mut dict_builder).is_ok());
1190    }
1191
1192}