iceberg_rust_spec/spec/
values.rs

1/*!
2 * Value types and operations for Iceberg data
3 *
4 * This module implements the runtime value system for Iceberg, including:
5 * - Primitive values (boolean, numeric, string, binary, etc.)
6 * - Complex values (structs, lists, maps)
7 * - Value transformations for partitioning
8 * - Serialization/deserialization to/from various formats
9 * - Value comparison and manipulation operations
10 *
11 * The value system provides:
12 * - Type-safe data representation
13 * - Efficient value storage and access
14 * - Support for partition transforms
15 * - JSON/binary format conversions
16 */
17
18use core::panic;
19use std::{
20    any::Any,
21    collections::{btree_map::Keys, BTreeMap, HashMap},
22    fmt,
23    hash::{DefaultHasher, Hash, Hasher},
24    io::Cursor,
25    ops::Sub,
26    slice::Iter,
27};
28
29use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
30use datetime::{
31    date_to_months, date_to_years, datetime_to_days, datetime_to_hours, datetime_to_months,
32    days_to_date, micros_to_datetime,
33};
34use itertools::Itertools;
35use ordered_float::OrderedFloat;
36use rust_decimal::Decimal;
37use serde::{
38    de::{MapAccess, Visitor},
39    ser::SerializeStruct,
40    Deserialize, Deserializer, Serialize,
41};
42use serde_bytes::ByteBuf;
43use serde_json::{Map as JsonMap, Number, Value as JsonValue};
44use uuid::Uuid;
45
46use crate::error::Error;
47
48use super::{
49    partition::{PartitionField, Transform},
50    types::{PrimitiveType, StructType, Type},
51};
52
53pub static YEARS_BEFORE_UNIX_EPOCH: i32 = 1970;
54
55/// Values present in iceberg type
56#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
57#[serde(untagged)]
58pub enum Value {
59    /// 0x00 for false, non-zero byte for true
60    Boolean(bool),
61    /// Stored as 4-byte little-endian
62    Int(i32),
63    /// Stored as 8-byte little-endian
64    LongInt(i64),
65    /// Stored as 4-byte little-endian
66    Float(OrderedFloat<f32>),
67    /// Stored as 8-byte little-endian
68    Double(OrderedFloat<f64>),
69    /// Stores days from the 1970-01-01 in an 4-byte little-endian int
70    Date(i32),
71    /// Stores microseconds from midnight in an 8-byte little-endian long
72    Time(i64),
73    /// Stores microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian long
74    Timestamp(i64),
75    /// Stores microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian long
76    TimestampTZ(i64),
77    /// UTF-8 bytes (without length)
78    String(String),
79    /// 16-byte big-endian value
80    UUID(Uuid),
81    /// Binary value
82    Fixed(usize, Vec<u8>),
83    /// Binary value (without length)
84    Binary(Vec<u8>),
85    /// Stores unscaled value as two’s-complement big-endian binary,
86    /// using the minimum number of bytes for the value
87    Decimal(Decimal),
88    /// A struct is a tuple of typed values. Each field in the tuple is named and has an integer id that is unique in the table schema.
89    /// Each field can be either optional or required, meaning that values can (or cannot) be null. Fields may be any type.
90    /// Fields may have an optional comment or doc string. Fields can have default values.
91    Struct(Struct),
92    /// A list is a collection of values with some element type.
93    /// The element field has an integer id that is unique in the table schema.
94    /// Elements can be either optional or required. Element types may be any type.
95    List(Vec<Option<Value>>),
96    /// A map is a collection of key-value pairs with a key type and a value type.
97    /// Both the key field and value field each have an integer id that is unique in the table schema.
98    /// Map keys are required and map values can be either optional or required. Both map keys and map values may be any type, including nested types.
99    Map(BTreeMap<Value, Option<Value>>),
100}
101
102impl From<Value> for ByteBuf {
103    fn from(value: Value) -> Self {
104        match value {
105            Value::Boolean(val) => {
106                if val {
107                    ByteBuf::from([1u8])
108                } else {
109                    ByteBuf::from([0u8])
110                }
111            }
112            Value::Int(val) => ByteBuf::from(val.to_le_bytes()),
113            Value::LongInt(val) => ByteBuf::from(val.to_le_bytes()),
114            Value::Float(val) => ByteBuf::from(val.to_le_bytes()),
115            Value::Double(val) => ByteBuf::from(val.to_le_bytes()),
116            Value::Date(val) => ByteBuf::from(val.to_le_bytes()),
117            Value::Time(val) => ByteBuf::from(val.to_le_bytes()),
118            Value::Timestamp(val) => ByteBuf::from(val.to_le_bytes()),
119            Value::TimestampTZ(val) => ByteBuf::from(val.to_le_bytes()),
120            Value::String(val) => ByteBuf::from(val.as_bytes()),
121            Value::UUID(val) => ByteBuf::from(val.as_u128().to_be_bytes()),
122            Value::Fixed(_, val) => ByteBuf::from(val),
123            Value::Binary(val) => ByteBuf::from(val),
124            Value::Decimal(val) => {
125                // rust_decimal mantissa is 96 bits
126                // so we can remove the first 32 bits of the i128 representation
127                let bytes = val.mantissa().to_be_bytes()[4..].to_vec();
128                ByteBuf::from(bytes)
129            }
130            _ => todo!(),
131        }
132    }
133}
134
135impl fmt::Display for Value {
136    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
137        match self {
138            Value::Boolean(b) => write!(f, "{b}"),
139            Value::Int(i) => write!(f, "{i}"),
140            Value::LongInt(l) => write!(f, "{l}"),
141            Value::Float(fl) => write!(f, "{fl}"),
142            Value::Double(d) => write!(f, "{d}"),
143            Value::Date(d) => write!(f, "{d}"),
144            Value::Time(t) => write!(f, "{t}"),
145            Value::Timestamp(ts) => write!(f, "{ts}"),
146            Value::TimestampTZ(ts) => write!(f, "{ts}"),
147            Value::String(s) => write!(f, "{s}"),
148            Value::UUID(u) => write!(f, "{u}"),
149            Value::Fixed(size, data) => write!(f, "{data:?} ({size} bytes)"),
150            Value::Binary(data) => write!(f, "{:?} ({} bytes)", data, data.len()),
151            Value::Decimal(d) => write!(f, "{d}"),
152            _ => panic!("Printing of compound types is not supported"),
153        }
154    }
155}
156
157/// The partition struct stores the tuple of partition values for each file.
158/// Its type is derived from the partition fields of the partition spec used to write the manifest file.
159/// In v2, the partition struct’s field ids must match the ids from the partition spec.
160#[derive(Debug, Clone, Eq, PartialOrd, Ord)]
161pub struct Struct {
162    /// Vector to store the field values
163    pub fields: Vec<Option<Value>>,
164    /// A lookup that matches the field name to the entry in the vector
165    pub lookup: BTreeMap<String, usize>,
166}
167
168impl Struct {
169    /// Gets a reference to the value associated with the given field name
170    ///
171    /// # Arguments
172    /// * `name` - The name of the field to retrieve
173    ///
174    /// # Returns
175    /// * `Some(&Option<Value>)` if the field exists
176    /// * `None` if the field doesn't exist
177    pub fn get(&self, name: &str) -> Option<&Option<Value>> {
178        self.fields.get(*self.lookup.get(name)?)
179    }
180    /// Gets a mutable reference to the value associated with the given field name
181    ///
182    /// # Arguments
183    /// * `name` - The name of the field to retrieve
184    ///
185    /// # Returns
186    /// * `Some(&mut Option<Value>)` if the field exists
187    /// * `None` if the field doesn't exist
188    pub fn get_mut(&mut self, name: &str) -> Option<&mut Option<Value>> {
189        self.fields.get_mut(*self.lookup.get(name)?)
190    }
191
192    /// Returns an iterator over all field values in this struct
193    ///
194    /// # Returns
195    /// * An iterator yielding references to each optional Value in order
196    pub fn iter(&self) -> Iter<'_, Option<Value>> {
197        self.fields.iter()
198    }
199
200    /// Returns an iterator over all field names in this struct
201    ///
202    /// # Returns
203    /// * An iterator yielding references to each field name in sorted order
204    pub fn keys(&self) -> Keys<'_, String, usize> {
205        self.lookup.keys()
206    }
207
208    /// Casts the struct's values according to a schema and partition specification
209    ///
210    /// # Arguments
211    /// * `schema` - The StructType defining the expected types
212    /// * `partition_spec` - The partition fields specification
213    ///
214    /// # Returns
215    /// * `Ok(Struct)` - A new Struct with values cast to match the schema and partition spec
216    /// * `Err(Error)` - If casting fails or schema references are invalid
217    ///
218    /// This method transforms the struct's values based on the partition specification,
219    /// applying any necessary type conversions to match the target schema.
220    pub(crate) fn cast(
221        self,
222        schema: &StructType,
223        partition_spec: &[PartitionField],
224    ) -> Result<Self, Error> {
225        if self.fields.is_empty() {
226            return Ok(self);
227        }
228        // Returns a HashMap mapping partition field names to transformed types.
229        let map = partition_spec
230            .iter()
231            .map(|partition_field| {
232                let field = schema.get(*partition_field.source_id() as usize).ok_or(
233                    Error::InvalidFormat(format!(
234                        "partition spec references unknown column id {}",
235                        partition_field.source_id()
236                    )),
237                )?;
238
239                Ok((
240                    partition_field.name().clone(),
241                    field.field_type.tranform(partition_field.transform())?,
242                ))
243            })
244            .collect::<Result<HashMap<_, _>, Error>>()?;
245        Ok(Struct::from_iter(
246            self.fields
247                .into_iter()
248                .enumerate()
249                .map(|(idx, field)| {
250                    // Get name of the column
251                    let name = self
252                        .lookup
253                        .iter()
254                        .find(|(_, v)| **v == idx)
255                        .ok_or(Error::InvalidFormat("partition struct".to_string()))?
256                        .0;
257
258                    // Get datatype after tranform
259                    let datatype = map
260                        .get(name)
261                        .ok_or(Error::InvalidFormat("partition_struct".to_string()))?;
262                    // Cast the value to the datatype
263                    let value = field.map(|value| value.cast(datatype)).transpose()?;
264                    Ok((name.clone(), value))
265                })
266                .collect::<Result<Vec<_>, Error>>()?,
267        ))
268    }
269}
270
271impl FromIterator<(String, Option<Value>)> for Struct {
272    fn from_iter<I: IntoIterator<Item = (String, Option<Value>)>>(iter: I) -> Self {
273        let mut fields = Vec::new();
274        let mut lookup = BTreeMap::new();
275
276        for (i, (key, value)) in iter.into_iter().enumerate() {
277            fields.push(value);
278            lookup.insert(key, i);
279        }
280
281        Struct { fields, lookup }
282    }
283}
284
285impl Serialize for Struct {
286    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
287    where
288        S: serde::Serializer,
289    {
290        let mut record = serializer.serialize_struct("r102", self.fields.len())?;
291        for (i, value) in self.fields.iter().enumerate() {
292            let (key, _) = self.lookup.iter().find(|(_, value)| **value == i).unwrap();
293            record.serialize_field(Box::leak(key.clone().into_boxed_str()), value)?;
294        }
295        record.end()
296    }
297}
298
299impl<'de> Deserialize<'de> for Struct {
300    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
301    where
302        D: Deserializer<'de>,
303    {
304        struct PartitionStructVisitor;
305
306        impl<'de> Visitor<'de> for PartitionStructVisitor {
307            type Value = Struct;
308
309            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
310                formatter.write_str("map")
311            }
312
313            fn visit_map<V>(self, mut map: V) -> Result<Struct, V::Error>
314            where
315                V: MapAccess<'de>,
316            {
317                let mut fields: Vec<Option<Value>> = Vec::new();
318                let mut lookup: BTreeMap<String, usize> = BTreeMap::new();
319                let mut index = 0;
320                while let Some(key) = map.next_key()? {
321                    fields.push(map.next_value()?);
322                    lookup.insert(key, index);
323                    index += 1;
324                }
325                Ok(Struct { fields, lookup })
326            }
327        }
328        deserializer.deserialize_struct(
329            "r102",
330            Box::leak(vec![].into_boxed_slice()),
331            PartitionStructVisitor,
332        )
333    }
334}
335
336impl PartialEq for Struct {
337    fn eq(&self, other: &Self) -> bool {
338        self.keys().all(|key| self.get(key).eq(&other.get(key)))
339    }
340}
341
342impl Hash for Struct {
343    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
344        for key in self.keys().sorted() {
345            key.hash(state);
346            self.get(key).hash(state);
347        }
348    }
349}
350
351impl Value {
352    /// Applies a partition transform to the value
353    ///
354    /// # Arguments
355    /// * `transform` - The partition transform to apply
356    ///
357    /// # Returns
358    /// * `Ok(Value)` - The transformed value
359    /// * `Err(Error)` - If the transform cannot be applied to this value type
360    ///
361    /// Supported transforms include:
362    /// * Identity - Returns the value unchanged
363    /// * Bucket - Applies a hash function and returns bucket number
364    /// * Truncate - Truncates numbers or strings
365    /// * Year/Month/Day/Hour - Extracts time components from dates and timestamps
366    pub fn transform(&self, transform: &Transform) -> Result<Value, Error> {
367        match transform {
368            Transform::Identity => Ok(self.clone()),
369            Transform::Bucket(n) => {
370                let mut bytes = Cursor::new(<Value as Into<ByteBuf>>::into(self.clone()));
371                let hash = murmur3::murmur3_32(&mut bytes, 0).unwrap();
372                Ok(Value::Int((hash % n) as i32))
373            }
374            Transform::Truncate(w) => match self {
375                Value::Int(i) => Ok(Value::Int(i - i.rem_euclid(*w as i32))),
376                Value::LongInt(i) => Ok(Value::LongInt(i - i.rem_euclid(*w as i64))),
377                Value::String(s) => {
378                    let mut s = s.clone();
379                    s.truncate(*w as usize);
380                    Ok(Value::String(s))
381                }
382                _ => Err(Error::NotSupported(
383                    "Datatype for truncate partition transform.".to_string(),
384                )),
385            },
386            Transform::Year => match self {
387                Value::Date(date) => Ok(Value::Int(date_to_years(&days_to_date(*date)))),
388                Value::Timestamp(time) => Ok(Value::Int(
389                    micros_to_datetime(*time).year() - YEARS_BEFORE_UNIX_EPOCH,
390                )),
391                Value::TimestampTZ(time) => Ok(Value::Int(
392                    micros_to_datetime(*time).year() - YEARS_BEFORE_UNIX_EPOCH,
393                )),
394                _ => Err(Error::NotSupported(
395                    "Datatype for year partition transform.".to_string(),
396                )),
397            },
398            Transform::Month => match self {
399                Value::Date(date) => Ok(Value::Int(date_to_months(&days_to_date(*date)))),
400                Value::Timestamp(time) => {
401                    Ok(Value::Int(datetime_to_months(&micros_to_datetime(*time))))
402                }
403                Value::TimestampTZ(time) => {
404                    Ok(Value::Int(datetime_to_months(&micros_to_datetime(*time))))
405                }
406                _ => Err(Error::NotSupported(
407                    "Datatype for month partition transform.".to_string(),
408                )),
409            },
410            Transform::Day => match self {
411                Value::Date(date) => Ok(Value::Int(*date)),
412                Value::Timestamp(time) => Ok(Value::Int(
413                    datetime_to_days(&micros_to_datetime(*time)) as i32,
414                )),
415                Value::TimestampTZ(time) => Ok(Value::Int(datetime_to_days(&micros_to_datetime(
416                    *time,
417                )) as i32)),
418                _ => Err(Error::NotSupported(
419                    "Datatype for day partition transform.".to_string(),
420                )),
421            },
422            Transform::Hour => match self {
423                Value::Timestamp(time) => Ok(Value::Int(datetime_to_hours(&micros_to_datetime(
424                    *time,
425                )) as i32)),
426                Value::TimestampTZ(time) => Ok(Value::Int(datetime_to_hours(&micros_to_datetime(
427                    *time,
428                )) as i32)),
429                _ => Err(Error::NotSupported(
430                    "Datatype for hour partition transform.".to_string(),
431                )),
432            },
433            _ => Err(Error::NotSupported(
434                "Partition transform operation".to_string(),
435            )),
436        }
437    }
438
439    /// Attempts to create a Value from raw bytes according to a specified type
440    ///
441    /// # Arguments
442    /// * `bytes` - The raw byte slice to parse
443    /// * `data_type` - The expected type of the value
444    ///
445    /// # Returns
446    /// * `Ok(Value)` - Successfully parsed value of the specified type
447    /// * `Err(Error)` - If the bytes cannot be parsed as the specified type
448    ///
449    /// # Note
450    /// Currently only supports primitive types. Complex types like structs, lists,
451    /// and maps are not supported and will return an error.
452    #[inline]
453    pub fn try_from_bytes(bytes: &[u8], data_type: &Type) -> Result<Self, Error> {
454        match data_type {
455            Type::Primitive(primitive) => match primitive {
456                PrimitiveType::Boolean => {
457                    if bytes.len() == 1 && bytes[0] == 0u8 {
458                        Ok(Value::Boolean(false))
459                    } else {
460                        Ok(Value::Boolean(true))
461                    }
462                }
463                PrimitiveType::Int => Ok(Value::Int(i32::from_le_bytes(bytes.try_into()?))),
464                PrimitiveType::Long => Ok(Value::LongInt(i64::from_le_bytes(bytes.try_into()?))),
465                PrimitiveType::Float => Ok(Value::Float(OrderedFloat(f32::from_le_bytes(
466                    bytes.try_into()?,
467                )))),
468                PrimitiveType::Double => Ok(Value::Double(OrderedFloat(f64::from_le_bytes(
469                    bytes.try_into()?,
470                )))),
471                PrimitiveType::Date => Ok(Value::Date(i32::from_le_bytes(bytes.try_into()?))),
472                PrimitiveType::Time => Ok(Value::Time(i64::from_le_bytes(bytes.try_into()?))),
473                PrimitiveType::Timestamp => {
474                    Ok(Value::Timestamp(i64::from_le_bytes(bytes.try_into()?)))
475                }
476                PrimitiveType::Timestamptz => {
477                    Ok(Value::TimestampTZ(i64::from_le_bytes(bytes.try_into()?)))
478                }
479                PrimitiveType::String => Ok(Value::String(std::str::from_utf8(bytes)?.to_string())),
480                PrimitiveType::Uuid => Ok(Value::UUID(Uuid::from_u128(u128::from_be_bytes(
481                    bytes.try_into()?,
482                )))),
483                PrimitiveType::Fixed(len) => Ok(Value::Fixed(*len as usize, Vec::from(bytes))),
484                PrimitiveType::Binary => Ok(Value::Binary(Vec::from(bytes))),
485                PrimitiveType::Decimal { scale, .. } => {
486                    let val = if bytes.len() <= 16 {
487                        i128::from_be_bytes(sign_extend_be(bytes))
488                    } else {
489                        return Err(Error::Type("decimal".to_string(), "bytes".to_string()));
490                    };
491                    Ok(Value::Decimal(Decimal::from_i128_with_scale(val, *scale)))
492                }
493            },
494            _ => Err(Error::NotSupported("Complex types as bytes".to_string())),
495        }
496    }
497
498    /// Attempts to create a Value from a JSON value according to a specified type
499    ///
500    /// # Arguments
501    /// * `value` - The JSON value to parse
502    /// * `data_type` - The expected Iceberg type
503    ///
504    /// # Returns
505    /// * `Ok(Some(Value))` - Successfully parsed value of the specified type
506    /// * `Ok(None)` - If the JSON value is null
507    /// * `Err(Error)` - If the JSON value cannot be parsed as the specified type
508    ///
509    /// # Note
510    /// Handles all primitive types as well as complex types like structs, lists and maps.
511    /// For complex types, recursively parses their contents according to their type specifications.
512    pub fn try_from_json(value: JsonValue, data_type: &Type) -> Result<Option<Self>, Error> {
513        match data_type {
514            Type::Primitive(primitive) => match (primitive, value) {
515                (PrimitiveType::Boolean, JsonValue::Bool(bool)) => Ok(Some(Value::Boolean(bool))),
516                (PrimitiveType::Int, JsonValue::Number(number)) => Ok(Some(Value::Int(
517                    number
518                        .as_i64()
519                        .ok_or(Error::Conversion(
520                            "json number".to_string(),
521                            "int".to_string(),
522                        ))?
523                        .try_into()?,
524                ))),
525                (PrimitiveType::Long, JsonValue::Number(number)) => {
526                    Ok(Some(Value::LongInt(number.as_i64().ok_or(
527                        Error::Conversion("json number".to_string(), "long".to_string()),
528                    )?)))
529                }
530                (PrimitiveType::Float, JsonValue::Number(number)) => Ok(Some(Value::Float(
531                    OrderedFloat(number.as_f64().ok_or(Error::Conversion(
532                        "json number".to_string(),
533                        "float".to_string(),
534                    ))? as f32),
535                ))),
536                (PrimitiveType::Double, JsonValue::Number(number)) => {
537                    Ok(Some(Value::Double(OrderedFloat(number.as_f64().ok_or(
538                        Error::Conversion("json number".to_string(), "double".to_string()),
539                    )?))))
540                }
541                (PrimitiveType::Date, JsonValue::String(s)) => Ok(Some(Value::Date(
542                    datetime::date_to_days(&NaiveDate::parse_from_str(&s, "%Y-%m-%d")?),
543                ))),
544                (PrimitiveType::Time, JsonValue::String(s)) => Ok(Some(Value::Time(
545                    datetime::time_to_microseconds(&NaiveTime::parse_from_str(&s, "%H:%M:%S%.f")?),
546                ))),
547                (PrimitiveType::Timestamp, JsonValue::String(s)) => {
548                    Ok(Some(Value::Timestamp(datetime::datetime_to_micros(
549                        &NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.f")?,
550                    ))))
551                }
552                (PrimitiveType::Timestamptz, JsonValue::String(s)) => Ok(Some(Value::TimestampTZ(
553                    datetime::datetimetz_to_micros(&Utc.from_utc_datetime(
554                        &NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.f+00:00")?,
555                    )),
556                ))),
557                (PrimitiveType::String, JsonValue::String(s)) => Ok(Some(Value::String(s))),
558                (PrimitiveType::Uuid, JsonValue::String(s)) => {
559                    Ok(Some(Value::UUID(Uuid::parse_str(&s)?)))
560                }
561                (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(),
562                (PrimitiveType::Binary, JsonValue::String(_)) => todo!(),
563                (
564                    PrimitiveType::Decimal {
565                        precision: _,
566                        scale: _,
567                    },
568                    JsonValue::String(_),
569                ) => todo!(),
570                (_, JsonValue::Null) => Ok(None),
571                (i, j) => Err(Error::Type(i.to_string(), j.to_string())),
572            },
573            Type::Struct(schema) => {
574                if let JsonValue::Object(mut object) = value {
575                    Ok(Some(Value::Struct(Struct::from_iter(schema.iter().map(
576                        |field| {
577                            (
578                                field.name.clone(),
579                                object.remove(&field.name).and_then(|value| {
580                                    Value::try_from_json(value, &field.field_type)
581                                        .and_then(|value| {
582                                            value.ok_or(Error::InvalidFormat(
583                                                "key of map".to_string(),
584                                            ))
585                                        })
586                                        .ok()
587                                }),
588                            )
589                        },
590                    )))))
591                } else {
592                    Err(Error::Type(
593                        "json for a struct".to_string(),
594                        "object".to_string(),
595                    ))
596                }
597            }
598            Type::List(list) => {
599                if let JsonValue::Array(array) = value {
600                    Ok(Some(Value::List(
601                        array
602                            .into_iter()
603                            .map(|value| Value::try_from_json(value, &list.element))
604                            .collect::<Result<Vec<_>, Error>>()?,
605                    )))
606                } else {
607                    Err(Error::Type(
608                        "json for a list".to_string(),
609                        "array".to_string(),
610                    ))
611                }
612            }
613            Type::Map(map) => {
614                if let JsonValue::Object(mut object) = value {
615                    if let (Some(JsonValue::Array(keys)), Some(JsonValue::Array(values))) =
616                        (object.remove("keys"), object.remove("values"))
617                    {
618                        Ok(Some(Value::Map(BTreeMap::from_iter(
619                            keys.into_iter()
620                                .zip(values.into_iter())
621                                .map(|(key, value)| {
622                                    Ok((
623                                        Value::try_from_json(key, &map.key).and_then(|value| {
624                                            value.ok_or(Error::InvalidFormat(
625                                                "key of map".to_string(),
626                                            ))
627                                        })?,
628                                        Value::try_from_json(value, &map.value)?,
629                                    ))
630                                })
631                                .collect::<Result<Vec<_>, Error>>()?,
632                        ))))
633                    } else {
634                        Err(Error::Type(
635                            "json for a list".to_string(),
636                            "array".to_string(),
637                        ))
638                    }
639                } else {
640                    Err(Error::Type(
641                        "json for a list".to_string(),
642                        "array".to_string(),
643                    ))
644                }
645            }
646        }
647    }
648
649    /// Returns the Iceberg Type that corresponds to this Value
650    ///
651    /// # Returns
652    /// * The Type (primitive or complex) that matches this Value's variant
653    ///
654    /// # Note
655    /// Currently only implemented for primitive types. Complex types like
656    /// structs, lists, and maps will cause a panic.
657    pub fn datatype(&self) -> Type {
658        match self {
659            Value::Boolean(_) => Type::Primitive(PrimitiveType::Boolean),
660            Value::Int(_) => Type::Primitive(PrimitiveType::Int),
661            Value::LongInt(_) => Type::Primitive(PrimitiveType::Long),
662            Value::Float(_) => Type::Primitive(PrimitiveType::Float),
663            Value::Double(_) => Type::Primitive(PrimitiveType::Double),
664            Value::Date(_) => Type::Primitive(PrimitiveType::Date),
665            Value::Time(_) => Type::Primitive(PrimitiveType::Time),
666            Value::Timestamp(_) => Type::Primitive(PrimitiveType::Timestamp),
667            Value::TimestampTZ(_) => Type::Primitive(PrimitiveType::Timestamptz),
668            Value::Fixed(len, _) => Type::Primitive(PrimitiveType::Fixed(*len as u64)),
669            Value::Binary(_) => Type::Primitive(PrimitiveType::Binary),
670            Value::String(_) => Type::Primitive(PrimitiveType::String),
671            Value::UUID(_) => Type::Primitive(PrimitiveType::Uuid),
672            Value::Decimal(dec) => Type::Primitive(PrimitiveType::Decimal {
673                precision: 38,
674                scale: dec.scale(),
675            }),
676            _ => unimplemented!(),
677        }
678    }
679
680    /// Converts this Value into a boxed Any trait object
681    ///
682    /// # Returns
683    /// * `Box<dyn Any>` containing the underlying value
684    ///
685    /// # Note
686    /// Currently only implemented for primitive types. Complex types like
687    /// structs, lists, and maps will panic with unimplemented!()
688    pub fn into_any(self) -> Box<dyn Any> {
689        match self {
690            Value::Boolean(any) => Box::new(any),
691            Value::Int(any) => Box::new(any),
692            Value::LongInt(any) => Box::new(any),
693            Value::Float(any) => Box::new(any),
694            Value::Double(any) => Box::new(any),
695            Value::Date(any) => Box::new(any),
696            Value::Time(any) => Box::new(any),
697            Value::Timestamp(any) => Box::new(any),
698            Value::TimestampTZ(any) => Box::new(any),
699            Value::Fixed(_, any) => Box::new(any),
700            Value::Binary(any) => Box::new(any),
701            Value::String(any) => Box::new(any),
702            Value::UUID(any) => Box::new(any),
703            Value::Decimal(any) => Box::new(any),
704            _ => unimplemented!(),
705        }
706    }
707
708    /// Attempts to cast this Value to a different Type
709    ///
710    /// # Arguments
711    /// * `data_type` - The target Type to cast to
712    ///
713    /// # Returns
714    /// * `Ok(Value)` - Successfully cast Value of the target type
715    /// * `Err(Error)` - If the value cannot be cast to the target type
716    ///
717    /// # Note
718    /// Currently supports casting between numeric types (Int -> Long, Int -> Date, etc)
719    /// and temporal types (Long -> Time/Timestamp/TimestampTZ).
720    /// Returns the original value if the target type matches the current type.
721    pub fn cast(self, data_type: &Type) -> Result<Self, Error> {
722        if self.datatype() == *data_type {
723            Ok(self)
724        } else {
725            match (self, data_type) {
726                (Value::Int(input), Type::Primitive(PrimitiveType::Long)) => {
727                    Ok(Value::LongInt(input as i64))
728                }
729                (Value::Int(input), Type::Primitive(PrimitiveType::Date)) => Ok(Value::Date(input)),
730                (Value::LongInt(input), Type::Primitive(PrimitiveType::Time)) => {
731                    Ok(Value::Time(input))
732                }
733                (Value::LongInt(input), Type::Primitive(PrimitiveType::Timestamp)) => {
734                    Ok(Value::Timestamp(input))
735                }
736                (Value::LongInt(input), Type::Primitive(PrimitiveType::Timestamptz)) => {
737                    Ok(Value::TimestampTZ(input))
738                }
739                _ => Err(Error::NotSupported("cast".to_string())),
740            }
741        }
742    }
743}
744
745/// Performs big endian sign extension
746/// Copied from arrow-rs repo/parquet crate:
747/// https://github.com/apache/arrow-rs/blob/b25c441745602c9967b1e3cc4a28bc469cfb1311/parquet/src/arrow/buffer/bit_util.rs#L54
748pub fn sign_extend_be<const N: usize>(b: &[u8]) -> [u8; N] {
749    assert!(b.len() <= N, "Array too large, expected less than {N}");
750    let is_negative = (b[0] & 128u8) == 128u8;
751    let mut result = if is_negative { [255u8; N] } else { [0u8; N] };
752    for (d, s) in result.iter_mut().skip(N - b.len()).zip(b) {
753        *d = *s;
754    }
755    result
756}
757
758impl From<&Value> for JsonValue {
759    fn from(value: &Value) -> Self {
760        match value {
761            Value::Boolean(val) => JsonValue::Bool(*val),
762            Value::Int(val) => JsonValue::Number((*val).into()),
763            Value::LongInt(val) => JsonValue::Number((*val).into()),
764            Value::Float(val) => match Number::from_f64(val.0 as f64) {
765                Some(number) => JsonValue::Number(number),
766                None => JsonValue::Null,
767            },
768            Value::Double(val) => match Number::from_f64(val.0) {
769                Some(number) => JsonValue::Number(number),
770                None => JsonValue::Null,
771            },
772            Value::Date(val) => JsonValue::String(datetime::days_to_date(*val).to_string()),
773            Value::Time(val) => JsonValue::String(datetime::micros_to_time(*val).to_string()),
774            Value::Timestamp(val) => JsonValue::String(
775                datetime::micros_to_datetime(*val)
776                    .format("%Y-%m-%dT%H:%M:%S%.f")
777                    .to_string(),
778            ),
779            Value::TimestampTZ(val) => JsonValue::String(
780                datetime::micros_to_datetimetz(*val)
781                    .format("%Y-%m-%dT%H:%M:%S%.f+00:00")
782                    .to_string(),
783            ),
784            Value::String(val) => JsonValue::String(val.clone()),
785            Value::UUID(val) => JsonValue::String(val.to_string()),
786            Value::Fixed(_, val) => {
787                JsonValue::String(val.iter().fold(String::new(), |mut acc, x| {
788                    acc.push_str(&format!("{x:x}"));
789                    acc
790                }))
791            }
792            Value::Binary(val) => {
793                JsonValue::String(val.iter().fold(String::new(), |mut acc, x| {
794                    acc.push_str(&format!("{x:x}"));
795                    acc
796                }))
797            }
798            Value::Decimal(_) => todo!(),
799
800            Value::Struct(s) => JsonValue::Object(JsonMap::from_iter(
801                s.lookup
802                    .iter()
803                    .map(|(k, v)| (k, &s.fields[*v]))
804                    .map(|(id, value)| {
805                        let json: JsonValue = match value {
806                            Some(val) => val.into(),
807                            None => JsonValue::Null,
808                        };
809                        (id.to_string(), json)
810                    }),
811            )),
812            Value::List(list) => JsonValue::Array(
813                list.iter()
814                    .map(|opt| match opt {
815                        Some(literal) => literal.into(),
816                        None => JsonValue::Null,
817                    })
818                    .collect(),
819            ),
820            Value::Map(map) => {
821                let mut object = JsonMap::with_capacity(2);
822                object.insert(
823                    "keys".to_string(),
824                    JsonValue::Array(map.keys().map(|literal| literal.into()).collect()),
825                );
826                object.insert(
827                    "values".to_string(),
828                    JsonValue::Array(
829                        map.values()
830                            .map(|literal| match literal {
831                                Some(literal) => literal.into(),
832                                None => JsonValue::Null,
833                            })
834                            .collect(),
835                    ),
836                );
837                JsonValue::Object(object)
838            }
839        }
840    }
841}
842
843mod datetime {
844    #[inline]
845    pub(crate) fn date_to_years(date: &NaiveDate) -> i32 {
846        date.years_since(
847            // This is always the same and shouldn't fail
848            NaiveDate::from_ymd_opt(YEARS_BEFORE_UNIX_EPOCH, 1, 1).unwrap(),
849        )
850        .unwrap() as i32
851    }
852
853    #[inline]
854    pub(crate) fn date_to_months(date: &NaiveDate) -> i32 {
855        let years = date
856            .years_since(
857                // This is always the same and shouldn't fail
858                NaiveDate::from_ymd_opt(YEARS_BEFORE_UNIX_EPOCH, 1, 1).unwrap(),
859            )
860            .unwrap() as i32;
861        let months = date.month();
862        years * 12 + months as i32
863    }
864
865    #[inline]
866    pub(crate) fn datetime_to_months(date: &NaiveDateTime) -> i32 {
867        let years = date.year() - YEARS_BEFORE_UNIX_EPOCH;
868        let months = date.month();
869        years * 12 + months as i32
870    }
871
872    #[inline]
873    pub(crate) fn date_to_days(date: &NaiveDate) -> i32 {
874        date.signed_duration_since(
875            // This is always the same and shouldn't fail
876            NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
877        )
878        .num_days() as i32
879    }
880
881    #[inline]
882    pub(crate) fn days_to_date(days: i32) -> NaiveDate {
883        // This shouldn't fail until the year 262000
884        DateTime::from_timestamp(days as i64 * 86_400, 0)
885            .unwrap()
886            .naive_utc()
887            .date()
888    }
889
890    #[inline]
891    pub(crate) fn time_to_microseconds(time: &NaiveTime) -> i64 {
892        time.signed_duration_since(
893            // This is always the same and shouldn't fail
894            NaiveTime::from_num_seconds_from_midnight_opt(0, 0).unwrap(),
895        )
896        .num_microseconds()
897        .unwrap()
898    }
899
900    #[inline]
901    pub(crate) fn micros_to_time(micros: i64) -> NaiveTime {
902        let (secs, rem) = (micros / 1_000_000, micros % 1_000_000);
903
904        NaiveTime::from_num_seconds_from_midnight_opt(secs as u32, rem as u32 * 1_000).unwrap()
905    }
906
907    #[inline]
908    pub(crate) fn datetime_to_micros(time: &NaiveDateTime) -> i64 {
909        time.and_utc().timestamp_micros()
910    }
911
912    #[inline]
913    pub(crate) fn micros_to_datetime(time: i64) -> NaiveDateTime {
914        DateTime::from_timestamp_micros(time).unwrap().naive_utc()
915    }
916
917    #[inline]
918    pub(crate) fn datetime_to_days(time: &NaiveDateTime) -> i64 {
919        time.signed_duration_since(
920            // This is always the same and shouldn't fail
921            DateTime::from_timestamp_micros(0).unwrap().naive_utc(),
922        )
923        .num_days()
924    }
925
926    #[inline]
927    pub(crate) fn datetime_to_hours(time: &NaiveDateTime) -> i64 {
928        time.signed_duration_since(
929            // This is always the same and shouldn't fail
930            DateTime::from_timestamp_micros(0).unwrap().naive_utc(),
931        )
932        .num_hours()
933    }
934
935    use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
936
937    use super::YEARS_BEFORE_UNIX_EPOCH;
938
939    #[inline]
940    pub(crate) fn datetimetz_to_micros(time: &DateTime<Utc>) -> i64 {
941        time.timestamp_micros()
942    }
943
944    #[inline]
945    pub(crate) fn micros_to_datetimetz(micros: i64) -> DateTime<Utc> {
946        let (secs, rem) = (micros / 1_000_000, micros % 1_000_000);
947
948        Utc.from_utc_datetime(
949            // This shouldn't fail until the year 262000
950            &DateTime::from_timestamp(secs, rem as u32 * 1_000)
951                .unwrap()
952                .naive_utc(),
953        )
954    }
955}
956
957/// A trait for types that support fallible subtraction
958///
959/// This trait is similar to the standard `Sub` trait, but returns a `Result`
960/// to handle cases where subtraction might fail.
961///
962/// # Type Parameters
963/// * `Sized` - Required to ensure the type can be used by value
964pub trait TrySub: Sized {
965    fn try_sub(&self, other: &Self) -> Result<Self, Error>;
966}
967
968impl<T: Sub<Output = T> + Copy> TrySub for T {
969    fn try_sub(&self, other: &Self) -> Result<Self, Error> {
970        Ok(*self - *other)
971    }
972}
973
974impl TrySub for Value {
975    fn try_sub(&self, other: &Self) -> Result<Self, Error> {
976        match (self, other) {
977            (Value::Int(own), Value::Int(other)) => Ok(Value::Int(own - other)),
978            (Value::LongInt(own), Value::LongInt(other)) => Ok(Value::LongInt(own - other)),
979            (Value::Float(own), Value::Float(other)) => Ok(Value::Float(*own - *other)),
980            (Value::Double(own), Value::Double(other)) => Ok(Value::Double(*own - *other)),
981            (Value::Date(own), Value::Date(other)) => Ok(Value::Date(own - other)),
982            (Value::Time(own), Value::Time(other)) => Ok(Value::Time(own - other)),
983            (Value::Timestamp(own), Value::Timestamp(other)) => Ok(Value::Timestamp(own - other)),
984            (Value::TimestampTZ(own), Value::TimestampTZ(other)) => {
985                Ok(Value::TimestampTZ(own - other))
986            }
987            (Value::String(own), Value::String(other)) => {
988                Ok(Value::LongInt(sub_string(own, other) as i64))
989            }
990            (Value::UUID(own), Value::UUID(other)) => {
991                let (own1, own2, own3, own4) = own.to_fields_le();
992                let (other1, other2, other3, other4) = other.to_fields_le();
993                let mut sub4 = [0; 8];
994                for i in 0..own4.len() {
995                    sub4[i] = own4[i] - other4[i];
996                }
997                Ok(Value::UUID(Uuid::from_fields_le(
998                    own1 - other1,
999                    own2 - other2,
1000                    own3 - other3,
1001                    &sub4,
1002                )))
1003            }
1004            (Value::Fixed(own_size, own), Value::Fixed(other_size, other)) => Ok(Value::Fixed(
1005                if own_size <= other_size {
1006                    *own_size
1007                } else if own_size > other_size {
1008                    *other_size
1009                } else {
1010                    panic!("Size must be either smaller, equal or larger");
1011                },
1012                own.iter()
1013                    .zip(other.iter())
1014                    .map(|(own, other)| own - other)
1015                    .collect(),
1016            )),
1017            (x, y) => Err(Error::Type(
1018                x.datatype().to_string(),
1019                y.datatype().to_string(),
1020            )),
1021        }
1022    }
1023}
1024
1025/// Calculates a numeric distance between two strings
1026///
1027/// # Arguments
1028/// * `left` - First string to compare
1029/// * `right` - Second string to compare
1030///
1031/// # Returns
1032/// * For strings that can be converted to base-36 numbers, returns sum of squared differences
1033/// * For other strings, returns difference of their hash values
1034///
1035/// First attempts to compare up to 256 characters as base-36 numbers.
1036/// Falls back to hash-based comparison if conversion fails.
1037fn sub_string(left: &str, right: &str) -> u64 {
1038    if let Some(distance) = left
1039        .chars()
1040        .zip(right.chars())
1041        .take(256)
1042        .skip_while(|(l, r)| l == r)
1043        .try_fold(0, |acc, (l, r)| {
1044            if let (Some(l), Some(r)) = (l.to_digit(36), r.to_digit(36)) {
1045                Some(acc + l.abs_diff(r).pow(2))
1046            } else {
1047                None
1048            }
1049        })
1050    {
1051        distance as u64
1052    } else {
1053        let mut hasher = DefaultHasher::new();
1054        hasher.write(left.as_bytes());
1055        let left = hasher.finish();
1056        hasher.write(right.as_bytes());
1057        let right = hasher.finish();
1058        left.abs_diff(right)
1059    }
1060}
1061
1062#[cfg(test)]
1063mod tests {
1064
1065    use super::*;
1066    use crate::{
1067        spec::types::{ListType, MapType, StructType},
1068        types::StructField,
1069    };
1070
1071    fn check_json_serde(json: &str, expected_literal: Value, expected_type: &Type) {
1072        let raw_json_value = serde_json::from_str::<JsonValue>(json).unwrap();
1073        let desered_literal = Value::try_from_json(raw_json_value.clone(), expected_type).unwrap();
1074        assert_eq!(desered_literal, Some(expected_literal.clone()));
1075
1076        let expected_json_value: JsonValue = (&expected_literal).into();
1077        let sered_json = serde_json::to_string(&expected_json_value).unwrap();
1078        let parsed_json_value = serde_json::from_str::<JsonValue>(&sered_json).unwrap();
1079
1080        assert_eq!(parsed_json_value, raw_json_value);
1081    }
1082
1083    fn check_avro_bytes_serde(input: Vec<u8>, expected_literal: Value, expected_type: &Type) {
1084        let raw_schema = r#""bytes""#;
1085        let schema = apache_avro::Schema::parse_str(raw_schema).unwrap();
1086
1087        let bytes = ByteBuf::from(input);
1088        let literal = Value::try_from_bytes(&bytes, expected_type).unwrap();
1089        assert_eq!(literal, expected_literal);
1090
1091        let mut writer = apache_avro::Writer::new(&schema, Vec::new());
1092        writer.append_ser(bytes).unwrap();
1093        let encoded = writer.into_inner().unwrap();
1094        let reader = apache_avro::Reader::new(&*encoded).unwrap();
1095
1096        for record in reader {
1097            let result = apache_avro::from_value::<ByteBuf>(&record.unwrap()).unwrap();
1098            let desered_literal = Value::try_from_bytes(&result, expected_type).unwrap();
1099            assert_eq!(desered_literal, expected_literal);
1100        }
1101    }
1102
1103    #[test]
1104    fn json_boolean() {
1105        let record = r#"true"#;
1106
1107        check_json_serde(
1108            record,
1109            Value::Boolean(true),
1110            &Type::Primitive(PrimitiveType::Boolean),
1111        );
1112    }
1113
1114    #[test]
1115    fn json_int() {
1116        let record = r#"32"#;
1117
1118        check_json_serde(record, Value::Int(32), &Type::Primitive(PrimitiveType::Int));
1119    }
1120
1121    #[test]
1122    fn json_long() {
1123        let record = r#"32"#;
1124
1125        check_json_serde(
1126            record,
1127            Value::LongInt(32),
1128            &Type::Primitive(PrimitiveType::Long),
1129        );
1130    }
1131
1132    #[test]
1133    fn json_float() {
1134        let record = r#"1.0"#;
1135
1136        check_json_serde(
1137            record,
1138            Value::Float(OrderedFloat(1.0)),
1139            &Type::Primitive(PrimitiveType::Float),
1140        );
1141    }
1142
1143    #[test]
1144    fn json_double() {
1145        let record = r#"1.0"#;
1146
1147        check_json_serde(
1148            record,
1149            Value::Double(OrderedFloat(1.0)),
1150            &Type::Primitive(PrimitiveType::Double),
1151        );
1152    }
1153
1154    #[test]
1155    fn json_date() {
1156        let record = r#""2017-11-16""#;
1157
1158        check_json_serde(
1159            record,
1160            Value::Date(17486),
1161            &Type::Primitive(PrimitiveType::Date),
1162        );
1163    }
1164
1165    #[test]
1166    fn json_time() {
1167        let record = r#""22:31:08.123456""#;
1168
1169        check_json_serde(
1170            record,
1171            Value::Time(81068123456),
1172            &Type::Primitive(PrimitiveType::Time),
1173        );
1174    }
1175
1176    #[test]
1177    fn json_timestamp() {
1178        let record = r#""2017-11-16T22:31:08.123456""#;
1179
1180        check_json_serde(
1181            record,
1182            Value::Timestamp(1510871468123456),
1183            &Type::Primitive(PrimitiveType::Timestamp),
1184        );
1185    }
1186
1187    #[test]
1188    fn json_timestamptz() {
1189        let record = r#""2017-11-16T22:31:08.123456+00:00""#;
1190
1191        check_json_serde(
1192            record,
1193            Value::TimestampTZ(1510871468123456),
1194            &Type::Primitive(PrimitiveType::Timestamptz),
1195        );
1196    }
1197
1198    #[test]
1199    fn json_string() {
1200        let record = r#""iceberg""#;
1201
1202        check_json_serde(
1203            record,
1204            Value::String("iceberg".to_string()),
1205            &Type::Primitive(PrimitiveType::String),
1206        );
1207    }
1208
1209    #[test]
1210    fn json_uuid() {
1211        let record = r#""f79c3e09-677c-4bbd-a479-3f349cb785e7""#;
1212
1213        check_json_serde(
1214            record,
1215            Value::UUID(Uuid::parse_str("f79c3e09-677c-4bbd-a479-3f349cb785e7").unwrap()),
1216            &Type::Primitive(PrimitiveType::Uuid),
1217        );
1218    }
1219
1220    #[test]
1221    fn json_struct() {
1222        let record = r#"{"id": 1, "name": "bar", "address": null}"#;
1223
1224        check_json_serde(
1225            record,
1226            Value::Struct(Struct::from_iter(vec![
1227                ("id".to_string(), Some(Value::Int(1))),
1228                ("name".to_string(), Some(Value::String("bar".to_string()))),
1229                ("address".to_string(), None),
1230            ])),
1231            &Type::Struct(StructType::new(vec![
1232                StructField {
1233                    id: 1,
1234                    name: "id".to_string(),
1235                    required: true,
1236                    field_type: Type::Primitive(PrimitiveType::Int),
1237                    doc: None,
1238                },
1239                StructField {
1240                    id: 2,
1241                    name: "name".to_string(),
1242                    required: false,
1243                    field_type: Type::Primitive(PrimitiveType::String),
1244                    doc: None,
1245                },
1246                StructField {
1247                    id: 3,
1248                    name: "address".to_string(),
1249                    required: false,
1250                    field_type: Type::Primitive(PrimitiveType::String),
1251                    doc: None,
1252                },
1253            ])),
1254        );
1255    }
1256
1257    #[test]
1258    fn json_list() {
1259        let record = r#"[1, 2, 3, null]"#;
1260
1261        check_json_serde(
1262            record,
1263            Value::List(vec![
1264                Some(Value::Int(1)),
1265                Some(Value::Int(2)),
1266                Some(Value::Int(3)),
1267                None,
1268            ]),
1269            &Type::List(ListType {
1270                element_id: 0,
1271                element_required: true,
1272                element: Box::new(Type::Primitive(PrimitiveType::Int)),
1273            }),
1274        );
1275    }
1276
1277    #[test]
1278    fn json_map() {
1279        let record = r#"{ "keys": ["a", "b", "c"], "values": [1, 2, null] }"#;
1280
1281        check_json_serde(
1282            record,
1283            Value::Map(BTreeMap::from([
1284                (Value::String("a".to_string()), Some(Value::Int(1))),
1285                (Value::String("b".to_string()), Some(Value::Int(2))),
1286                (Value::String("c".to_string()), None),
1287            ])),
1288            &Type::Map(MapType {
1289                key_id: 0,
1290                key: Box::new(Type::Primitive(PrimitiveType::String)),
1291                value_id: 1,
1292                value: Box::new(Type::Primitive(PrimitiveType::Int)),
1293                value_required: true,
1294            }),
1295        );
1296    }
1297
1298    #[test]
1299    fn avro_bytes_boolean() {
1300        let bytes = vec![1u8];
1301
1302        check_avro_bytes_serde(
1303            bytes,
1304            Value::Boolean(true),
1305            &Type::Primitive(PrimitiveType::Boolean),
1306        );
1307    }
1308
1309    #[test]
1310    fn avro_bytes_int() {
1311        let bytes = vec![32u8, 0u8, 0u8, 0u8];
1312
1313        check_avro_bytes_serde(bytes, Value::Int(32), &Type::Primitive(PrimitiveType::Int));
1314    }
1315
1316    #[test]
1317    fn avro_bytes_long() {
1318        let bytes = vec![32u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
1319
1320        check_avro_bytes_serde(
1321            bytes,
1322            Value::LongInt(32),
1323            &Type::Primitive(PrimitiveType::Long),
1324        );
1325    }
1326
1327    #[test]
1328    fn avro_bytes_float() {
1329        let bytes = vec![0u8, 0u8, 128u8, 63u8];
1330
1331        check_avro_bytes_serde(
1332            bytes,
1333            Value::Float(OrderedFloat(1.0)),
1334            &Type::Primitive(PrimitiveType::Float),
1335        );
1336    }
1337
1338    #[test]
1339    fn avro_bytes_double() {
1340        let bytes = vec![0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8];
1341
1342        check_avro_bytes_serde(
1343            bytes,
1344            Value::Double(OrderedFloat(1.0)),
1345            &Type::Primitive(PrimitiveType::Double),
1346        );
1347    }
1348
1349    #[test]
1350    fn avro_bytes_string() {
1351        let bytes = vec![105u8, 99u8, 101u8, 98u8, 101u8, 114u8, 103u8];
1352
1353        check_avro_bytes_serde(
1354            bytes,
1355            Value::String("iceberg".to_string()),
1356            &Type::Primitive(PrimitiveType::String),
1357        );
1358    }
1359
1360    #[test]
1361    fn avro_bytes_decimal() {
1362        let value = Value::Decimal(Decimal::from_str_exact("104899.50").unwrap());
1363
1364        // Test serialization
1365        let byte_buf: ByteBuf = value.clone().into();
1366        let bytes: Vec<u8> = byte_buf.into_vec();
1367        assert_eq!(
1368            bytes,
1369            vec![0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 160u8, 16u8, 94u8]
1370        );
1371
1372        // Test deserialization
1373        check_avro_bytes_serde(
1374            bytes,
1375            value,
1376            &Type::Primitive(PrimitiveType::Decimal {
1377                precision: 15,
1378                scale: 2,
1379            }),
1380        );
1381    }
1382
1383    #[test]
1384    fn test_transform_identity() {
1385        let value = Value::Int(42);
1386        let result = value.transform(&Transform::Identity).unwrap();
1387        assert_eq!(result, Value::Int(42));
1388    }
1389
1390    #[test]
1391    fn test_transform_bucket() {
1392        let value = Value::String("test".to_string());
1393        let result = value.transform(&Transform::Bucket(10)).unwrap();
1394        assert!(matches!(result, Value::Int(_)));
1395    }
1396
1397    #[test]
1398    fn test_transform_truncate_int() {
1399        let value = Value::Int(42);
1400        let result = value.transform(&Transform::Truncate(10)).unwrap();
1401        assert_eq!(result, Value::Int(40));
1402    }
1403
1404    #[test]
1405    fn test_transform_truncate_long_int() {
1406        let value = Value::LongInt(1234567890);
1407        let result = value.transform(&Transform::Truncate(1000000)).unwrap();
1408        assert_eq!(result, Value::LongInt(1234000000));
1409    }
1410
1411    #[test]
1412    fn test_transform_truncate_string() {
1413        let value = Value::String("Hello, World!".to_string());
1414        let result = value.transform(&Transform::Truncate(5)).unwrap();
1415        assert_eq!(result, Value::String("Hello".to_string()));
1416    }
1417
1418    #[test]
1419    fn test_transform_truncate_unsupported() {
1420        let value = Value::Boolean(true);
1421        let result = value.transform(&Transform::Truncate(5));
1422        assert!(matches!(result, Err(Error::NotSupported(_))));
1423    }
1424
1425    #[test]
1426    fn test_transform_year_date() {
1427        let value = Value::Date(19478);
1428        let result = value.transform(&Transform::Year).unwrap();
1429        assert_eq!(result, Value::Int(53));
1430
1431        let value = Value::Date(19523);
1432        let result = value.transform(&Transform::Year).unwrap();
1433        assert_eq!(result, Value::Int(53));
1434
1435        let value = Value::Date(19723);
1436        let result = value.transform(&Transform::Year).unwrap();
1437        assert_eq!(result, Value::Int(54));
1438    }
1439
1440    #[test]
1441    fn test_transform_year_timestamp() {
1442        let value = Value::Timestamp(1682937000000000);
1443        let result = value.transform(&Transform::Year).unwrap();
1444        assert_eq!(result, Value::Int(53));
1445
1446        let value = Value::Timestamp(1686840330000000);
1447        let result = value.transform(&Transform::Year).unwrap();
1448        assert_eq!(result, Value::Int(53));
1449
1450        let value = Value::Timestamp(1704067200000000);
1451        let result = value.transform(&Transform::Year).unwrap();
1452        assert_eq!(result, Value::Int(54));
1453    }
1454
1455    #[test]
1456    fn test_transform_month_date() {
1457        let value = Value::Date(19478);
1458        let result = value.transform(&Transform::Month).unwrap();
1459        assert_eq!(result, Value::Int(641)); // 0-based month index
1460
1461        let value = Value::Date(19523);
1462        let result = value.transform(&Transform::Month).unwrap();
1463        assert_eq!(result, Value::Int(642)); // 0-based month index
1464
1465        let value = Value::Date(19723);
1466        let result = value.transform(&Transform::Month).unwrap();
1467        assert_eq!(result, Value::Int(649)); // 0-based month index
1468    }
1469
1470    #[test]
1471    fn test_transform_month_timestamp() {
1472        let value = Value::Timestamp(1682937000000000);
1473        let result = value.transform(&Transform::Month).unwrap();
1474        assert_eq!(result, Value::Int(641)); // 0-based month index
1475
1476        let value = Value::Timestamp(1686840330000000);
1477        let result = value.transform(&Transform::Month).unwrap();
1478        assert_eq!(result, Value::Int(642)); // 0-based month index
1479
1480        let value = Value::Timestamp(1704067200000000);
1481        let result = value.transform(&Transform::Month).unwrap();
1482        assert_eq!(result, Value::Int(649)); // 0-based month index
1483    }
1484
1485    #[test]
1486    fn test_transform_month_unsupported() {
1487        let value = Value::Boolean(true);
1488        let result = value.transform(&Transform::Month);
1489        assert!(matches!(result, Err(Error::NotSupported(_))));
1490
1491        let value = Value::Boolean(true);
1492        let result = value.transform(&Transform::Month);
1493        assert!(matches!(result, Err(Error::NotSupported(_))));
1494
1495        let value = Value::Boolean(true);
1496        let result = value.transform(&Transform::Month);
1497        assert!(matches!(result, Err(Error::NotSupported(_))));
1498    }
1499
1500    #[test]
1501    fn test_transform_day_date() {
1502        let value = Value::Date(19478);
1503        let result = value.transform(&Transform::Day).unwrap();
1504        assert_eq!(result, Value::Int(19478)); // 0-based day index
1505
1506        let value = Value::Date(19523);
1507        let result = value.transform(&Transform::Day).unwrap();
1508        assert_eq!(result, Value::Int(19523)); // 0-based day index
1509
1510        let value = Value::Date(19723);
1511        let result = value.transform(&Transform::Day).unwrap();
1512        assert_eq!(result, Value::Int(19723)); // 0-based day index
1513    }
1514
1515    #[test]
1516    fn test_transform_day_timestamp() {
1517        let value = Value::Timestamp(1682937000000000);
1518        let result = value.transform(&Transform::Day).unwrap();
1519        assert_eq!(result, Value::Int(19478)); // 0-based day index
1520
1521        let value = Value::Timestamp(1686840330000000);
1522        let result = value.transform(&Transform::Day).unwrap();
1523        assert_eq!(result, Value::Int(19523)); // 0-based day index
1524
1525        let value = Value::Timestamp(1704067200000000);
1526        let result = value.transform(&Transform::Day).unwrap();
1527        assert_eq!(result, Value::Int(19723)); // 0-based day index
1528    }
1529
1530    #[test]
1531    fn test_transform_day_unsupported() {
1532        let value = Value::Boolean(true);
1533        let result = value.transform(&Transform::Day);
1534        assert!(matches!(result, Err(Error::NotSupported(_))));
1535    }
1536
1537    #[test]
1538    fn test_transform_hour_timestamp() {
1539        let value = Value::Timestamp(1682937000000000);
1540        let result = value.transform(&Transform::Hour).unwrap();
1541        assert_eq!(result, Value::Int(467482)); // Assuming the timestamp is at 12:00 UTC
1542
1543        let value = Value::Timestamp(1686840330000000);
1544        let result = value.transform(&Transform::Hour).unwrap();
1545        assert_eq!(result, Value::Int(468566)); // Assuming the timestamp is at 12:00 UTC
1546
1547        let value = Value::Timestamp(1704067200000000);
1548        let result = value.transform(&Transform::Hour).unwrap();
1549        assert_eq!(result, Value::Int(473352)); // Assuming the timestamp is at 12:00 UTC
1550    }
1551
1552    #[test]
1553    fn test_transform_hour_unsupported() {
1554        let value = Value::Date(0);
1555        let result = value.transform(&Transform::Hour);
1556        assert!(matches!(result, Err(Error::NotSupported(_))));
1557    }
1558
1559    #[test]
1560    fn test_sub_string() {
1561        assert_eq!(
1562            sub_string("zyxwvutsrqponmlkjihgfedcba", "abcdefghijklmnopqrstuvxyz"),
1563            5354
1564        );
1565        assert_eq!(
1566            sub_string("abcdefghijklmnopqrstuvxyz", "zyxwvutsrqponmlkjihgfedcba"),
1567            5354
1568        );
1569    }
1570}