Skip to main content

grc_20/model/
value.rs

1//! Value types for GRC-20 properties.
2//!
3//! Values are typed attribute instances on entities and relations.
4
5use std::borrow::Cow;
6
7use crate::model::Id;
8use crate::util::{parse_date_rfc3339, parse_datetime_rfc3339, parse_time_rfc3339};
9
10/// Data types for property values (spec Section 2.4).
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12#[repr(u8)]
13pub enum DataType {
14    Bool = 1,
15    Int64 = 2,
16    Float64 = 3,
17    Decimal = 4,
18    Text = 5,
19    Bytes = 6,
20    Date = 7,
21    Time = 8,
22    Datetime = 9,
23    Schedule = 10,
24    Point = 11,
25    Rect = 12,
26    Embedding = 13,
27}
28
29impl DataType {
30    /// Creates a DataType from its wire representation.
31    pub fn from_u8(v: u8) -> Option<DataType> {
32        match v {
33            1 => Some(DataType::Bool),
34            2 => Some(DataType::Int64),
35            3 => Some(DataType::Float64),
36            4 => Some(DataType::Decimal),
37            5 => Some(DataType::Text),
38            6 => Some(DataType::Bytes),
39            7 => Some(DataType::Date),
40            8 => Some(DataType::Time),
41            9 => Some(DataType::Datetime),
42            10 => Some(DataType::Schedule),
43            11 => Some(DataType::Point),
44            12 => Some(DataType::Rect),
45            13 => Some(DataType::Embedding),
46            _ => None,
47        }
48    }
49}
50
51/// Embedding sub-types (spec Section 2.4).
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
53#[repr(u8)]
54pub enum EmbeddingSubType {
55    /// 32-bit IEEE 754 float, little-endian (4 bytes per dim)
56    Float32 = 0,
57    /// Signed 8-bit integer (1 byte per dim)
58    Int8 = 1,
59    /// Bit-packed binary, LSB-first (1/8 byte per dim)
60    Binary = 2,
61}
62
63impl EmbeddingSubType {
64    /// Creates an EmbeddingSubType from its wire representation.
65    pub fn from_u8(v: u8) -> Option<EmbeddingSubType> {
66        match v {
67            0 => Some(EmbeddingSubType::Float32),
68            1 => Some(EmbeddingSubType::Int8),
69            2 => Some(EmbeddingSubType::Binary),
70            _ => None,
71        }
72    }
73
74    /// Returns the number of bytes needed for the given number of dimensions.
75    pub fn bytes_for_dims(self, dims: usize) -> usize {
76        match self {
77            EmbeddingSubType::Float32 => dims * 4,
78            EmbeddingSubType::Int8 => dims,
79            EmbeddingSubType::Binary => dims.div_ceil(8),
80        }
81    }
82}
83
84/// Decimal mantissa representation.
85///
86/// Most decimals fit in i64; larger values use big-endian two's complement bytes.
87#[derive(Debug, Clone, PartialEq, Eq, Hash)]
88pub enum DecimalMantissa<'a> {
89    /// Mantissa fits in signed 64-bit integer.
90    I64(i64),
91    /// Arbitrary precision: big-endian two's complement, minimal-length.
92    Big(Cow<'a, [u8]>),
93}
94
95impl DecimalMantissa<'_> {
96    /// Returns whether this mantissa has trailing zeros (not normalized).
97    pub fn has_trailing_zeros(&self) -> bool {
98        match self {
99            DecimalMantissa::I64(v) => *v != 0 && *v % 10 == 0,
100            DecimalMantissa::Big(bytes) => {
101                // For big mantissas, we'd need to convert to check
102                // This is a simplification - full check would convert to decimal
103                !bytes.is_empty() && bytes[bytes.len() - 1] == 0
104            }
105        }
106    }
107
108    /// Returns true if this is the zero mantissa.
109    pub fn is_zero(&self) -> bool {
110        match self {
111            DecimalMantissa::I64(v) => *v == 0,
112            DecimalMantissa::Big(bytes) => bytes.iter().all(|b| *b == 0),
113        }
114    }
115}
116
117/// A typed value that can be stored on an entity or relation.
118#[derive(Debug, Clone, PartialEq)]
119pub enum Value<'a> {
120    /// Boolean value.
121    Bool(bool),
122
123    /// 64-bit signed integer with optional unit.
124    Int64 {
125        value: i64,
126        /// Unit entity ID, or None for no unit.
127        unit: Option<Id>,
128    },
129
130    /// 64-bit IEEE 754 float (NaN not allowed) with optional unit.
131    Float64 {
132        value: f64,
133        /// Unit entity ID, or None for no unit.
134        unit: Option<Id>,
135    },
136
137    /// Arbitrary-precision decimal: value = mantissa * 10^exponent, with optional unit.
138    Decimal {
139        exponent: i32,
140        mantissa: DecimalMantissa<'a>,
141        /// Unit entity ID, or None for no unit.
142        unit: Option<Id>,
143    },
144
145    /// UTF-8 text with optional language.
146    Text {
147        value: Cow<'a, str>,
148        /// Language entity ID, or None for default language.
149        language: Option<Id>,
150    },
151
152    /// Opaque byte array.
153    Bytes(Cow<'a, [u8]>),
154
155    /// Calendar date in RFC 3339 format (e.g., "2024-01-15" or "2024-01-15+05:30").
156    Date(Cow<'a, str>),
157
158    /// Time of day in RFC 3339 format (e.g., "14:30:45.123456Z" or "14:30:45+05:30").
159    Time(Cow<'a, str>),
160
161    /// Combined date and time in RFC 3339 format (e.g., "2024-01-15T14:30:45.123456Z").
162    Datetime(Cow<'a, str>),
163
164    /// RFC 5545 iCalendar schedule string.
165    Schedule(Cow<'a, str>),
166
167    /// WGS84 geographic coordinate with optional altitude.
168    Point {
169        /// Latitude in degrees (-90 to +90).
170        lat: f64,
171        /// Longitude in degrees (-180 to +180).
172        lon: f64,
173        /// Altitude in meters above WGS84 ellipsoid (optional).
174        alt: Option<f64>,
175    },
176
177    /// Axis-aligned bounding box in WGS84 coordinates.
178    Rect {
179        /// Southern edge latitude (-90 to +90).
180        min_lat: f64,
181        /// Western edge longitude (-180 to +180).
182        min_lon: f64,
183        /// Northern edge latitude (-90 to +90).
184        max_lat: f64,
185        /// Eastern edge longitude (-180 to +180).
186        max_lon: f64,
187    },
188
189    /// Dense vector for semantic similarity search.
190    Embedding {
191        sub_type: EmbeddingSubType,
192        dims: usize,
193        /// Raw bytes in the format specified by sub_type.
194        data: Cow<'a, [u8]>,
195    },
196}
197
198impl Value<'_> {
199    /// Returns the data type of this value.
200    pub fn data_type(&self) -> DataType {
201        match self {
202            Value::Bool(_) => DataType::Bool,
203            Value::Int64 { .. } => DataType::Int64,
204            Value::Float64 { .. } => DataType::Float64,
205            Value::Decimal { .. } => DataType::Decimal,
206            Value::Text { .. } => DataType::Text,
207            Value::Bytes(_) => DataType::Bytes,
208            Value::Date { .. } => DataType::Date,
209            Value::Time { .. } => DataType::Time,
210            Value::Datetime { .. } => DataType::Datetime,
211            Value::Schedule(_) => DataType::Schedule,
212            Value::Point { .. } => DataType::Point,
213            Value::Rect { .. } => DataType::Rect,
214            Value::Embedding { .. } => DataType::Embedding,
215        }
216    }
217
218    /// Validates this value according to spec rules.
219    ///
220    /// Returns an error description if invalid, None if valid.
221    pub fn validate(&self) -> Option<&'static str> {
222        match self {
223            Value::Float64 { value, .. } => {
224                if value.is_nan() {
225                    return Some("NaN is not allowed in Float64");
226                }
227            }
228            Value::Decimal { exponent, mantissa, .. } => {
229                // Zero must be {0, 0}
230                if mantissa.is_zero() && *exponent != 0 {
231                    return Some("zero DECIMAL must have exponent 0");
232                }
233                // Non-zero must not have trailing zeros
234                if !mantissa.is_zero() && mantissa.has_trailing_zeros() {
235                    return Some("DECIMAL mantissa has trailing zeros (not normalized)");
236                }
237            }
238            Value::Point { lat, lon, alt } => {
239                if *lat < -90.0 || *lat > 90.0 {
240                    return Some("latitude out of range [-90, +90]");
241                }
242                if *lon < -180.0 || *lon > 180.0 {
243                    return Some("longitude out of range [-180, +180]");
244                }
245                if lat.is_nan() || lon.is_nan() {
246                    return Some("NaN is not allowed in Point coordinates");
247                }
248                if let Some(a) = alt {
249                    if a.is_nan() {
250                        return Some("NaN is not allowed in Point altitude");
251                    }
252                }
253            }
254            Value::Rect { min_lat, min_lon, max_lat, max_lon } => {
255                if *min_lat < -90.0 || *min_lat > 90.0 || *max_lat < -90.0 || *max_lat > 90.0 {
256                    return Some("latitude out of range [-90, +90]");
257                }
258                if *min_lon < -180.0 || *min_lon > 180.0 || *max_lon < -180.0 || *max_lon > 180.0 {
259                    return Some("longitude out of range [-180, +180]");
260                }
261                if min_lat.is_nan() || min_lon.is_nan() || max_lat.is_nan() || max_lon.is_nan() {
262                    return Some("NaN is not allowed in Rect coordinates");
263                }
264            }
265            Value::Date(s) => {
266                if parse_date_rfc3339(s).is_err() {
267                    return Some("Invalid RFC 3339 date format");
268                }
269            }
270            Value::Time(s) => {
271                if parse_time_rfc3339(s).is_err() {
272                    return Some("Invalid RFC 3339 time format");
273                }
274            }
275            Value::Datetime(s) => {
276                if parse_datetime_rfc3339(s).is_err() {
277                    return Some("Invalid RFC 3339 datetime format");
278                }
279            }
280            Value::Embedding {
281                sub_type,
282                dims,
283                data,
284            } => {
285                let expected = sub_type.bytes_for_dims(*dims);
286                if data.len() != expected {
287                    return Some("embedding data length doesn't match dims");
288                }
289                // Check for NaN in float32 embeddings
290                if *sub_type == EmbeddingSubType::Float32 {
291                    for chunk in data.chunks_exact(4) {
292                        let f = f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
293                        if f.is_nan() {
294                            return Some("NaN is not allowed in float32 embedding");
295                        }
296                    }
297                }
298            }
299            _ => {}
300        }
301        None
302    }
303}
304
305/// A property-value pair that can be attached to an object.
306#[derive(Debug, Clone, PartialEq)]
307pub struct PropertyValue<'a> {
308    /// The property ID this value is for.
309    pub property: Id,
310    /// The value.
311    pub value: Value<'a>,
312}
313
314/// A property definition in the schema.
315#[derive(Debug, Clone, PartialEq, Eq)]
316pub struct Property {
317    /// The property's unique identifier.
318    pub id: Id,
319    /// The data type for values of this property.
320    pub data_type: DataType,
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn test_embedding_bytes_for_dims() {
329        assert_eq!(EmbeddingSubType::Float32.bytes_for_dims(10), 40);
330        assert_eq!(EmbeddingSubType::Int8.bytes_for_dims(10), 10);
331        assert_eq!(EmbeddingSubType::Binary.bytes_for_dims(10), 2);
332        assert_eq!(EmbeddingSubType::Binary.bytes_for_dims(8), 1);
333        assert_eq!(EmbeddingSubType::Binary.bytes_for_dims(9), 2);
334    }
335
336    #[test]
337    fn test_value_validation_nan() {
338        assert!(Value::Float64 { value: f64::NAN, unit: None }.validate().is_some());
339        assert!(Value::Float64 { value: f64::INFINITY, unit: None }.validate().is_none());
340        assert!(Value::Float64 { value: -f64::INFINITY, unit: None }.validate().is_none());
341        assert!(Value::Float64 { value: 42.0, unit: None }.validate().is_none());
342    }
343
344    #[test]
345    fn test_value_validation_point() {
346        assert!(Value::Point { lat: 91.0, lon: 0.0, alt: None }.validate().is_some());
347        assert!(Value::Point { lat: -91.0, lon: 0.0, alt: None }.validate().is_some());
348        assert!(Value::Point { lat: 0.0, lon: 181.0, alt: None }.validate().is_some());
349        assert!(Value::Point { lat: 0.0, lon: -181.0, alt: None }.validate().is_some());
350        assert!(Value::Point { lat: 90.0, lon: 180.0, alt: None }.validate().is_none());
351        assert!(Value::Point { lat: -90.0, lon: -180.0, alt: None }.validate().is_none());
352        // With altitude
353        assert!(Value::Point { lat: 0.0, lon: 0.0, alt: Some(1000.0) }.validate().is_none());
354        assert!(Value::Point { lat: 0.0, lon: 0.0, alt: Some(f64::NAN) }.validate().is_some());
355    }
356
357    #[test]
358    fn test_value_validation_rect() {
359        // Invalid latitudes
360        assert!(Value::Rect { min_lat: -91.0, min_lon: 0.0, max_lat: 0.0, max_lon: 0.0 }.validate().is_some());
361        assert!(Value::Rect { min_lat: 0.0, min_lon: 0.0, max_lat: 91.0, max_lon: 0.0 }.validate().is_some());
362        // Invalid longitudes
363        assert!(Value::Rect { min_lat: 0.0, min_lon: -181.0, max_lat: 0.0, max_lon: 0.0 }.validate().is_some());
364        assert!(Value::Rect { min_lat: 0.0, min_lon: 0.0, max_lat: 0.0, max_lon: 181.0 }.validate().is_some());
365        // Valid rect
366        assert!(Value::Rect { min_lat: 24.5, min_lon: -125.0, max_lat: 49.4, max_lon: -66.9 }.validate().is_none());
367        // NaN not allowed
368        assert!(Value::Rect { min_lat: f64::NAN, min_lon: 0.0, max_lat: 0.0, max_lon: 0.0 }.validate().is_some());
369    }
370
371    #[test]
372    fn test_decimal_normalization() {
373        // Zero must have exponent 0
374        let zero_bad = Value::Decimal {
375            exponent: 1,
376            mantissa: DecimalMantissa::I64(0),
377            unit: None,
378        };
379        assert!(zero_bad.validate().is_some());
380
381        // Non-zero with trailing zeros is invalid
382        let trailing = Value::Decimal {
383            exponent: 0,
384            mantissa: DecimalMantissa::I64(1230),
385            unit: None,
386        };
387        assert!(trailing.validate().is_some());
388
389        // Valid decimal
390        let valid = Value::Decimal {
391            exponent: -2,
392            mantissa: DecimalMantissa::I64(1234),
393            unit: None,
394        };
395        assert!(valid.validate().is_none());
396    }
397}