Skip to main content

clickhouse_native_client/column/
column_value.rs

1//! ColumnValue - A value extracted from or to be inserted into a column
2//!
3//! This is similar to C++ clickhouse-cpp's ItemView, providing a type-tagged
4//! byte representation of column values.
5
6use crate::{
7    types::TypeCode,
8    Error,
9    Result,
10};
11use std::{
12    collections::hash_map::DefaultHasher,
13    hash::{
14        Hash,
15        Hasher,
16    },
17    sync::Arc,
18};
19
20/// A value from a column, stored as little-endian bytes with type information.
21/// Similar to C++ clickhouse-cpp's `ItemView` but owned.
22#[derive(Clone, Debug)]
23pub struct ColumnValue {
24    /// The ClickHouse type code for this value.
25    pub type_code: TypeCode,
26    /// The raw little-endian byte representation of the value.
27    pub data: Vec<u8>,
28}
29
30impl ColumnValue {
31    /// Create a `UInt8` value.
32    pub fn from_u8(value: u8) -> Self {
33        Self { type_code: TypeCode::UInt8, data: value.to_le_bytes().to_vec() }
34    }
35
36    /// Create a `UInt16` value.
37    pub fn from_u16(value: u16) -> Self {
38        Self {
39            type_code: TypeCode::UInt16,
40            data: value.to_le_bytes().to_vec(),
41        }
42    }
43
44    /// Create a `UInt32` value.
45    pub fn from_u32(value: u32) -> Self {
46        Self {
47            type_code: TypeCode::UInt32,
48            data: value.to_le_bytes().to_vec(),
49        }
50    }
51
52    /// Create a `UInt64` value.
53    pub fn from_u64(value: u64) -> Self {
54        Self {
55            type_code: TypeCode::UInt64,
56            data: value.to_le_bytes().to_vec(),
57        }
58    }
59
60    /// Create an `Int8` value.
61    pub fn from_i8(value: i8) -> Self {
62        Self { type_code: TypeCode::Int8, data: value.to_le_bytes().to_vec() }
63    }
64
65    /// Create an `Int16` value.
66    pub fn from_i16(value: i16) -> Self {
67        Self { type_code: TypeCode::Int16, data: value.to_le_bytes().to_vec() }
68    }
69
70    /// Create an `Int32` value.
71    pub fn from_i32(value: i32) -> Self {
72        Self { type_code: TypeCode::Int32, data: value.to_le_bytes().to_vec() }
73    }
74
75    /// Create an `Int64` value.
76    pub fn from_i64(value: i64) -> Self {
77        Self { type_code: TypeCode::Int64, data: value.to_le_bytes().to_vec() }
78    }
79
80    /// Create a `Float32` value.
81    pub fn from_f32(value: f32) -> Self {
82        Self {
83            type_code: TypeCode::Float32,
84            data: value.to_le_bytes().to_vec(),
85        }
86    }
87
88    /// Create a `Float64` value.
89    pub fn from_f64(value: f64) -> Self {
90        Self {
91            type_code: TypeCode::Float64,
92            data: value.to_le_bytes().to_vec(),
93        }
94    }
95
96    /// Create a `String` value.
97    pub fn from_string(value: &str) -> Self {
98        Self { type_code: TypeCode::String, data: value.as_bytes().to_vec() }
99    }
100
101    /// Create a Void/NULL value with no data.
102    pub fn void() -> Self {
103        Self { type_code: TypeCode::Void, data: Vec::new() }
104    }
105
106    /// Get as string (for String type)
107    pub fn as_string(&self) -> Result<&str> {
108        if self.type_code != TypeCode::String {
109            return Err(Error::TypeMismatch {
110                expected: "String".to_string(),
111                actual: format!("{:?}", self.type_code),
112            });
113        }
114        std::str::from_utf8(&self.data).map_err(|e| {
115            Error::Protocol(format!("Invalid UTF-8 in string: {}", e))
116        })
117    }
118
119    /// Get raw bytes
120    pub fn as_bytes(&self) -> &[u8] {
121        &self.data
122    }
123}
124
125/// Hash computation for LowCardinality deduplication
126/// Matches C++ computeHashKey using dual hashing
127pub fn compute_hash_key(value: &ColumnValue) -> (u64, u64) {
128    // Void type gets special (0, 0) hash to distinguish NULL from empty string
129    if value.type_code == TypeCode::Void {
130        return (0, 0);
131    }
132
133    // Hash 1: std::hash equivalent
134    let mut hasher = DefaultHasher::new();
135    value.data.hash(&mut hasher);
136    let hash1 = hasher.finish();
137
138    // Hash 2: CityHash64 equivalent (using simple FNV-1a for now)
139    let hash2 = fnv1a_64(&value.data);
140
141    (hash1, hash2)
142}
143
144/// Simple FNV-1a hash (64-bit)
145/// This is a placeholder - ideally we'd use actual CityHash64
146/// FNV-1a is simple, fast, and has good distribution
147fn fnv1a_64(data: &[u8]) -> u64 {
148    const FNV_OFFSET_BASIS: u64 = 0xcbf29ce484222325;
149    const FNV_PRIME: u64 = 0x100000001b3;
150
151    let mut hash = FNV_OFFSET_BASIS;
152    for &byte in data {
153        hash ^= byte as u64;
154        hash = hash.wrapping_mul(FNV_PRIME);
155    }
156    hash
157}
158
159/// Helper functions to extract ColumnValue from specific column types
160use super::{
161    nullable::ColumnNullable,
162    numeric::*,
163    string::ColumnString,
164    Column,
165};
166
167/// Get item from a column by index
168/// Returns ColumnValue representation
169pub fn get_column_item(
170    column: &dyn Column,
171    index: usize,
172) -> Result<ColumnValue> {
173    use crate::types::Type;
174
175    if index >= column.size() {
176        return Err(Error::InvalidArgument(format!(
177            "Index {} out of bounds (size: {})",
178            index,
179            column.size()
180        )));
181    }
182
183    match column.column_type() {
184        Type::Simple(type_code) => match type_code {
185            TypeCode::UInt8 => {
186                if let Some(col) =
187                    column.as_any().downcast_ref::<ColumnUInt8>()
188                {
189                    Ok(ColumnValue::from_u8(col.at(index)))
190                } else {
191                    Err(Error::Protocol(
192                        "Failed to downcast UInt8 column".to_string(),
193                    ))
194                }
195            }
196            TypeCode::UInt16 => {
197                if let Some(col) =
198                    column.as_any().downcast_ref::<ColumnUInt16>()
199                {
200                    Ok(ColumnValue::from_u16(col.at(index)))
201                } else {
202                    Err(Error::Protocol(
203                        "Failed to downcast UInt16 column".to_string(),
204                    ))
205                }
206            }
207            TypeCode::UInt32 => {
208                if let Some(col) =
209                    column.as_any().downcast_ref::<ColumnUInt32>()
210                {
211                    Ok(ColumnValue::from_u32(col.at(index)))
212                } else {
213                    Err(Error::Protocol(
214                        "Failed to downcast UInt32 column".to_string(),
215                    ))
216                }
217            }
218            TypeCode::UInt64 => {
219                if let Some(col) =
220                    column.as_any().downcast_ref::<ColumnUInt64>()
221                {
222                    Ok(ColumnValue::from_u64(col.at(index)))
223                } else {
224                    Err(Error::Protocol(
225                        "Failed to downcast UInt64 column".to_string(),
226                    ))
227                }
228            }
229            TypeCode::Int8 => {
230                if let Some(col) = column.as_any().downcast_ref::<ColumnInt8>()
231                {
232                    Ok(ColumnValue::from_i8(col.at(index)))
233                } else {
234                    Err(Error::Protocol(
235                        "Failed to downcast Int8 column".to_string(),
236                    ))
237                }
238            }
239            TypeCode::Int16 => {
240                if let Some(col) =
241                    column.as_any().downcast_ref::<ColumnInt16>()
242                {
243                    Ok(ColumnValue::from_i16(col.at(index)))
244                } else {
245                    Err(Error::Protocol(
246                        "Failed to downcast Int16 column".to_string(),
247                    ))
248                }
249            }
250            TypeCode::Int32 => {
251                if let Some(col) =
252                    column.as_any().downcast_ref::<ColumnInt32>()
253                {
254                    Ok(ColumnValue::from_i32(col.at(index)))
255                } else {
256                    Err(Error::Protocol(
257                        "Failed to downcast Int32 column".to_string(),
258                    ))
259                }
260            }
261            TypeCode::Int64 => {
262                if let Some(col) =
263                    column.as_any().downcast_ref::<ColumnInt64>()
264                {
265                    Ok(ColumnValue::from_i64(col.at(index)))
266                } else {
267                    Err(Error::Protocol(
268                        "Failed to downcast Int64 column".to_string(),
269                    ))
270                }
271            }
272            TypeCode::Float32 => {
273                if let Some(col) =
274                    column.as_any().downcast_ref::<ColumnFloat32>()
275                {
276                    Ok(ColumnValue::from_f32(col.at(index)))
277                } else {
278                    Err(Error::Protocol(
279                        "Failed to downcast Float32 column".to_string(),
280                    ))
281                }
282            }
283            TypeCode::Float64 => {
284                if let Some(col) =
285                    column.as_any().downcast_ref::<ColumnFloat64>()
286                {
287                    Ok(ColumnValue::from_f64(col.at(index)))
288                } else {
289                    Err(Error::Protocol(
290                        "Failed to downcast Float64 column".to_string(),
291                    ))
292                }
293            }
294            TypeCode::String => {
295                if let Some(col) =
296                    column.as_any().downcast_ref::<ColumnString>()
297                {
298                    Ok(ColumnValue::from_string(&col.at(index)))
299                } else {
300                    Err(Error::Protocol(
301                        "Failed to downcast String column".to_string(),
302                    ))
303                }
304            }
305            _ => Err(Error::Protocol(format!(
306                "get_column_item not implemented for type {:?}",
307                type_code
308            ))),
309        },
310        Type::Nullable { nested_type: _ } => {
311            if let Some(col) = column.as_any().downcast_ref::<ColumnNullable>()
312            {
313                if col.is_null(index) {
314                    Ok(ColumnValue::void())
315                } else {
316                    get_column_item(col.nested_ref().as_ref(), index)
317                }
318            } else {
319                Err(Error::Protocol(
320                    "Failed to downcast Nullable column".to_string(),
321                ))
322            }
323        }
324        _ => Err(Error::Protocol(format!(
325            "get_column_item not implemented for type {}",
326            column.column_type().name()
327        ))),
328    }
329}
330
331/// Append item to a column
332pub fn append_column_item(
333    column: &mut dyn Column,
334    value: &ColumnValue,
335) -> Result<()> {
336    use crate::types::Type;
337
338    match column.column_type() {
339        Type::Simple(type_code) => {
340            if *type_code != value.type_code {
341                return Err(Error::TypeMismatch {
342                    expected: format!("{:?}", type_code),
343                    actual: format!("{:?}", value.type_code),
344                });
345            }
346
347            match type_code {
348                TypeCode::String => {
349                    if let Some(col) =
350                        column.as_any_mut().downcast_mut::<ColumnString>()
351                    {
352                        col.append(value.as_string()?);
353                        Ok(())
354                    } else {
355                        Err(Error::Protocol(
356                            "Failed to downcast String column".to_string(),
357                        ))
358                    }
359                }
360                TypeCode::UInt8 => {
361                    if let Some(col) =
362                        column.as_any_mut().downcast_mut::<ColumnUInt8>()
363                    {
364                        let val = u8::from_le_bytes(
365                            value.data.as_slice().try_into().map_err(
366                                |_| {
367                                    Error::Protocol(
368                                        "Invalid UInt8 data".to_string(),
369                                    )
370                                },
371                            )?,
372                        );
373                        col.append(val);
374                        Ok(())
375                    } else {
376                        Err(Error::Protocol(
377                            "Failed to downcast UInt8 column".to_string(),
378                        ))
379                    }
380                }
381                TypeCode::UInt64 => {
382                    if let Some(col) =
383                        column.as_any_mut().downcast_mut::<ColumnUInt64>()
384                    {
385                        let val = u64::from_le_bytes(
386                            value.data.as_slice().try_into().map_err(
387                                |_| {
388                                    Error::Protocol(
389                                        "Invalid UInt64 data".to_string(),
390                                    )
391                                },
392                            )?,
393                        );
394                        col.append(val);
395                        Ok(())
396                    } else {
397                        Err(Error::Protocol(
398                            "Failed to downcast UInt64 column".to_string(),
399                        ))
400                    }
401                }
402                // Add more types as needed
403                _ => Err(Error::Protocol(format!(
404                    "append_column_item not implemented for type {:?}",
405                    type_code
406                ))),
407            }
408        }
409        Type::Nullable { .. } => {
410            if let Some(col) =
411                column.as_any_mut().downcast_mut::<ColumnNullable>()
412            {
413                if value.type_code == TypeCode::Void {
414                    col.append_null();
415                    Ok(())
416                } else {
417                    // Get mutable access to the nested Arc<dyn Column>
418                    let nested_ref = col.nested_ref_mut();
419                    let nested_mut = Arc::get_mut(nested_ref).ok_or_else(|| {
420                        Error::Protocol(
421                            "Cannot append to shared nullable column - column has multiple references"
422                                .to_string(),
423                        )
424                    })?;
425                    append_column_item(nested_mut, value)?;
426                    col.append_non_null();
427                    Ok(())
428                }
429            } else {
430                Err(Error::Protocol(
431                    "Failed to downcast Nullable column".to_string(),
432                ))
433            }
434        }
435        _ => Err(Error::Protocol(format!(
436            "append_column_item not implemented for type {}",
437            column.column_type().name()
438        ))),
439    }
440}
441
442#[cfg(test)]
443#[cfg_attr(coverage_nightly, coverage(off))]
444mod tests {
445    use super::*;
446
447    #[test]
448    fn test_column_value_primitives() {
449        let v = ColumnValue::from_u64(42);
450        assert_eq!(v.type_code, TypeCode::UInt64);
451        assert_eq!(v.data, 42u64.to_le_bytes());
452
453        let s = ColumnValue::from_string("hello");
454        assert_eq!(s.type_code, TypeCode::String);
455        assert_eq!(s.as_string().unwrap(), "hello");
456    }
457
458    #[test]
459    fn test_hash_computation() {
460        let v1 = ColumnValue::from_string("test");
461        let v2 = ColumnValue::from_string("test");
462        let v3 = ColumnValue::from_string("different");
463
464        let h1 = compute_hash_key(&v1);
465        let h2 = compute_hash_key(&v2);
466        let h3 = compute_hash_key(&v3);
467
468        // Same values should have same hash
469        assert_eq!(h1, h2);
470        // Different values should (likely) have different hash
471        assert_ne!(h1, h3);
472    }
473
474    #[test]
475    fn test_void_hash() {
476        let void = ColumnValue::void();
477        let hash = compute_hash_key(&void);
478        assert_eq!(hash, (0, 0));
479    }
480}