Skip to main content

clickhouse_native_client/column/
string.rs

1//! String column implementations
2//!
3//! **ClickHouse Documentation:**
4//! - [String](https://clickhouse.com/docs/en/sql-reference/data-types/string)
5//!   - Variable-length UTF-8 strings
6//! - [FixedString](https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring)
7//!   - Fixed-length binary strings
8//!
9//! ## String Type
10//!
11//! Variable-length UTF-8 strings. Each string is prefixed with its length
12//! (varint encoded).
13//!
14//! **Wire Format:**
15//! ```text
16//! For each string: [length:varint][bytes:UInt8 * length]
17//! ```
18//!
19//! ## FixedString Type
20//!
21//! Fixed-length binary strings, zero-padded if shorter than the specified
22//! size. Useful for storing UUIDs, hashes, or other fixed-size binary data.
23//!
24//! **Wire Format:**
25//! ```text
26//! [bytes:UInt8 * N]  // N is the FixedString size
27//! ```
28
29use super::{
30    Column,
31    ColumnRef,
32};
33use crate::{
34    io::buffer_utils,
35    types::Type,
36    Error,
37    Result,
38};
39use bytes::{
40    Buf,
41    BufMut,
42    BytesMut,
43};
44use std::sync::Arc;
45
46/// Column for fixed-length strings (all strings padded to same length)
47///
48/// Stores binary data of exactly `N` bytes per element, zero-padded if needed.
49///
50/// **ClickHouse Reference:** <https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring>
51pub struct ColumnFixedString {
52    type_: Type,
53    string_size: usize,
54    data: Vec<u8>,
55}
56
57impl ColumnFixedString {
58    /// Creates a new empty FixedString column, extracting the fixed size from
59    /// the type.
60    pub fn new(type_: Type) -> Self {
61        let string_size = match &type_ {
62            Type::FixedString { size } => *size,
63            _ => panic!("Expected FixedString type"),
64        };
65
66        Self { type_, string_size, data: Vec::new() }
67    }
68
69    /// Creates a new empty FixedString column with pre-allocated capacity for
70    /// the given number of elements.
71    pub fn with_capacity(type_: Type, capacity: usize) -> Self {
72        let string_size = match &type_ {
73            Type::FixedString { size } => *size,
74            _ => panic!("Expected FixedString type"),
75        };
76
77        Self {
78            type_,
79            string_size,
80            data: Vec::with_capacity(string_size * capacity),
81        }
82    }
83
84    /// Create a column with initial data (builder pattern)
85    pub fn with_data(mut self, data: Vec<String>) -> Self {
86        for s in data {
87            self.append(s);
88        }
89        self
90    }
91
92    /// Appends a string value, zero-padding it to the fixed size.
93    pub fn append(&mut self, s: String) {
94        let bytes = s.as_bytes();
95
96        if bytes.len() > self.string_size {
97            panic!(
98                "String too long for FixedString({}): got {} bytes",
99                self.string_size,
100                bytes.len()
101            );
102        }
103
104        // Append the string data
105        self.data.extend_from_slice(bytes);
106
107        // Pad with zeros if needed
108        if bytes.len() < self.string_size {
109            self.data
110                .resize(self.data.len() + (self.string_size - bytes.len()), 0);
111        }
112    }
113
114    /// Returns the string at the given index, or `None` if out of bounds.
115    pub fn get(&self, index: usize) -> Option<String> {
116        if index >= self.size() {
117            return None;
118        }
119
120        let start = index * self.string_size;
121        let end = start + self.string_size;
122        let bytes = &self.data[start..end];
123
124        // Trim null bytes from the end
125        let trimmed =
126            bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
127        Some(String::from_utf8_lossy(&bytes[..trimmed]).to_string())
128    }
129
130    /// Get value at index (for tests)
131    pub fn at(&self, index: usize) -> String {
132        self.get(index).unwrap()
133    }
134
135    /// Get the number of elements (alias for size())
136    pub fn len(&self) -> usize {
137        self.size()
138    }
139
140    /// Check if the column is empty
141    pub fn is_empty(&self) -> bool {
142        self.data.is_empty()
143    }
144
145    /// Returns the fixed byte size of each element in the column.
146    pub fn fixed_size(&self) -> usize {
147        self.string_size
148    }
149}
150
151impl Column for ColumnFixedString {
152    fn column_type(&self) -> &Type {
153        &self.type_
154    }
155
156    fn size(&self) -> usize {
157        self.data.len() / self.string_size
158    }
159
160    fn clear(&mut self) {
161        self.data.clear();
162    }
163
164    fn reserve(&mut self, new_cap: usize) {
165        self.data.reserve(self.string_size * new_cap);
166    }
167
168    fn append_column(&mut self, other: ColumnRef) -> Result<()> {
169        let other = other
170            .as_any()
171            .downcast_ref::<ColumnFixedString>()
172            .ok_or_else(|| Error::TypeMismatch {
173                expected: self.type_.name(),
174                actual: other.column_type().name(),
175            })?;
176
177        if self.string_size != other.string_size {
178            return Err(Error::TypeMismatch {
179                expected: format!("FixedString({})", self.string_size),
180                actual: format!("FixedString({})", other.string_size),
181            });
182        }
183
184        self.data.extend_from_slice(&other.data);
185        Ok(())
186    }
187
188    fn load_from_buffer(
189        &mut self,
190        buffer: &mut &[u8],
191        rows: usize,
192    ) -> Result<()> {
193        let total_bytes = self.string_size * rows;
194
195        if buffer.len() < total_bytes {
196            return Err(Error::Protocol(format!(
197                "Not enough data for {} FixedString({}) values: need {}, have {}",
198                rows, self.string_size, total_bytes, buffer.len()
199            )));
200        }
201
202        self.data.extend_from_slice(&buffer[..total_bytes]);
203        buffer.advance(total_bytes);
204        Ok(())
205    }
206
207    fn save_to_buffer(&self, buffer: &mut BytesMut) -> Result<()> {
208        buffer.put_slice(&self.data);
209        Ok(())
210    }
211
212    fn clone_empty(&self) -> ColumnRef {
213        Arc::new(ColumnFixedString::new(self.type_.clone()))
214    }
215
216    fn slice(&self, begin: usize, len: usize) -> Result<ColumnRef> {
217        if begin + len > self.size() {
218            return Err(Error::InvalidArgument(format!(
219                "Slice out of bounds: begin={}, len={}, size={}",
220                begin,
221                len,
222                self.size()
223            )));
224        }
225
226        let start = begin * self.string_size;
227        let end = start + len * self.string_size;
228
229        let mut result = ColumnFixedString::new(self.type_.clone());
230        result.data = self.data[start..end].to_vec();
231
232        Ok(Arc::new(result))
233    }
234
235    fn as_any(&self) -> &dyn std::any::Any {
236        self
237    }
238
239    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
240        self
241    }
242}
243
244/// Column for variable-length strings
245pub struct ColumnString {
246    type_: Type,
247    data: Vec<String>,
248}
249
250impl ColumnString {
251    /// Creates a new empty String column with the given type.
252    pub fn new(type_: Type) -> Self {
253        Self { type_, data: Vec::new() }
254    }
255
256    /// Creates a new empty String column with pre-allocated capacity for the
257    /// given number of elements.
258    pub fn with_capacity(type_: Type, capacity: usize) -> Self {
259        Self { type_, data: Vec::with_capacity(capacity) }
260    }
261
262    /// Creates a String column from an existing vector of strings.
263    pub fn from_vec(type_: Type, data: Vec<String>) -> Self {
264        Self { type_, data }
265    }
266
267    /// Create a column with initial data (builder pattern)
268    pub fn with_data(mut self, data: Vec<String>) -> Self {
269        self.data = data;
270        self
271    }
272
273    /// Appends a string value to the column.
274    pub fn append(&mut self, s: impl Into<String>) {
275        self.data.push(s.into());
276    }
277
278    /// Returns a reference to the string at the given index, or `None` if out
279    /// of bounds.
280    pub fn get(&self, index: usize) -> Option<&str> {
281        self.data.get(index).map(|s| s.as_str())
282    }
283
284    /// Get value at index (for tests)
285    pub fn at(&self, index: usize) -> String {
286        self.data[index].clone()
287    }
288
289    /// Get the number of elements (alias for size())
290    pub fn len(&self) -> usize {
291        self.data.len()
292    }
293
294    /// Check if the column is empty
295    pub fn is_empty(&self) -> bool {
296        self.data.is_empty()
297    }
298
299    /// Returns an iterator over the string values in the column.
300    pub fn iter(&self) -> impl Iterator<Item = &str> {
301        self.data.iter().map(|s| s.as_str())
302    }
303}
304
305impl Default for ColumnString {
306    fn default() -> Self {
307        Self::new(Type::string())
308    }
309}
310
311impl Column for ColumnString {
312    fn column_type(&self) -> &Type {
313        &self.type_
314    }
315
316    fn size(&self) -> usize {
317        self.data.len()
318    }
319
320    fn clear(&mut self) {
321        self.data.clear();
322    }
323
324    fn reserve(&mut self, new_cap: usize) {
325        self.data.reserve(new_cap);
326    }
327
328    fn append_column(&mut self, other: ColumnRef) -> Result<()> {
329        let other = other.as_any().downcast_ref::<ColumnString>().ok_or_else(
330            || Error::TypeMismatch {
331                expected: self.type_.name(),
332                actual: other.column_type().name(),
333            },
334        )?;
335
336        self.data.extend(other.data.iter().cloned());
337        Ok(())
338    }
339
340    fn load_from_buffer(
341        &mut self,
342        buffer: &mut &[u8],
343        rows: usize,
344    ) -> Result<()> {
345        self.data.reserve(rows);
346
347        for _ in 0..rows {
348            // Read varint length
349            let len = buffer_utils::read_varint(buffer)? as usize;
350
351            if buffer.len() < len {
352                return Err(Error::Protocol(format!(
353                    "Not enough data for string: need {}, have {}",
354                    len,
355                    buffer.len()
356                )));
357            }
358
359            // Read string data
360            let string_data = &buffer[..len];
361            let s = String::from_utf8(string_data.to_vec()).map_err(|e| {
362                Error::Protocol(format!("Invalid UTF-8 in string: {}", e))
363            })?;
364
365            self.data.push(s);
366            buffer.advance(len);
367        }
368
369        Ok(())
370    }
371
372    fn save_to_buffer(&self, buffer: &mut BytesMut) -> Result<()> {
373        for s in &self.data {
374            // Write varint length
375            buffer_utils::write_varint(buffer, s.len() as u64);
376            // Write string data
377            buffer.put_slice(s.as_bytes());
378        }
379        Ok(())
380    }
381
382    fn clone_empty(&self) -> ColumnRef {
383        Arc::new(ColumnString::new(self.type_.clone()))
384    }
385
386    fn slice(&self, begin: usize, len: usize) -> Result<ColumnRef> {
387        if begin + len > self.data.len() {
388            return Err(Error::InvalidArgument(format!(
389                "Slice out of bounds: begin={}, len={}, size={}",
390                begin,
391                len,
392                self.data.len()
393            )));
394        }
395
396        let sliced = self.data[begin..begin + len].to_vec();
397        Ok(Arc::new(ColumnString::from_vec(self.type_.clone(), sliced)))
398    }
399
400    fn as_any(&self) -> &dyn std::any::Any {
401        self
402    }
403
404    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
405        self
406    }
407}
408
409// Helper functions removed - using buffer_utils module
410
411#[cfg(test)]
412#[cfg_attr(coverage_nightly, coverage(off))]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_fixed_string_creation() {
418        let col = ColumnFixedString::new(Type::fixed_string(10));
419        assert_eq!(col.size(), 0);
420        assert_eq!(col.fixed_size(), 10);
421    }
422
423    #[test]
424    fn test_fixed_string_append() {
425        let mut col = ColumnFixedString::new(Type::fixed_string(10));
426        col.append("hello".to_string());
427        col.append("world".to_string());
428
429        assert_eq!(col.size(), 2);
430        assert_eq!(col.get(0), Some("hello".to_string()));
431        assert_eq!(col.get(1), Some("world".to_string()));
432    }
433
434    #[test]
435    fn test_fixed_string_padding() {
436        let mut col = ColumnFixedString::new(Type::fixed_string(10));
437        col.append("hi".to_string());
438
439        // Should be padded to 10 bytes
440        assert_eq!(col.data.len(), 10);
441        assert_eq!(col.get(0), Some("hi".to_string()));
442    }
443
444    #[test]
445    #[should_panic(expected = "String too long")]
446    fn test_fixed_string_too_long() {
447        let mut col = ColumnFixedString::new(Type::fixed_string(5));
448        col.append("too long string".to_string());
449    }
450
451    #[test]
452    fn test_fixed_string_save_load() {
453        let mut col = ColumnFixedString::new(Type::fixed_string(8));
454        col.append("hello".to_string());
455        col.append("world".to_string());
456
457        let mut buffer = BytesMut::new();
458        col.save_to_buffer(&mut buffer).unwrap();
459
460        let mut col2 = ColumnFixedString::new(Type::fixed_string(8));
461        let mut reader = &buffer[..];
462        col2.load_from_buffer(&mut reader, 2).unwrap();
463
464        assert_eq!(col2.size(), 2);
465        assert_eq!(col2.get(0), Some("hello".to_string()));
466        assert_eq!(col2.get(1), Some("world".to_string()));
467    }
468
469    #[test]
470    fn test_string_creation() {
471        let col = ColumnString::new(Type::string());
472        assert_eq!(col.size(), 0);
473    }
474
475    #[test]
476    fn test_string_append() {
477        let mut col = ColumnString::new(Type::string());
478        col.append("hello");
479        col.append("world");
480        col.append(String::from("rust"));
481
482        assert_eq!(col.size(), 3);
483        assert_eq!(col.get(0), Some("hello"));
484        assert_eq!(col.get(1), Some("world"));
485        assert_eq!(col.get(2), Some("rust"));
486    }
487
488    #[test]
489    fn test_string_save_load() {
490        let mut col = ColumnString::new(Type::string());
491        col.append("hello");
492        col.append("мир"); // Unicode
493        col.append("🦀"); // Emoji
494
495        let mut buffer = BytesMut::new();
496        col.save_to_buffer(&mut buffer).unwrap();
497
498        let mut col2 = ColumnString::new(Type::string());
499        let mut reader = &buffer[..];
500        col2.load_from_buffer(&mut reader, 3).unwrap();
501
502        assert_eq!(col2.size(), 3);
503        assert_eq!(col2.get(0), Some("hello"));
504        assert_eq!(col2.get(1), Some("мир"));
505        assert_eq!(col2.get(2), Some("🦀"));
506    }
507
508    #[test]
509    fn test_string_slice() {
510        let mut col = ColumnString::new(Type::string());
511        for i in 0..10 {
512            col.append(format!("str_{}", i));
513        }
514
515        let sliced = col.slice(2, 5).unwrap();
516        let sliced_col =
517            sliced.as_any().downcast_ref::<ColumnString>().unwrap();
518
519        assert_eq!(sliced_col.size(), 5);
520        assert_eq!(sliced_col.get(0), Some("str_2"));
521        assert_eq!(sliced_col.get(4), Some("str_6"));
522    }
523
524    #[test]
525    fn test_varint_encode_decode() {
526        let test_values = vec![0u64, 1, 127, 128, 255, 256, 65535, u64::MAX];
527
528        for value in test_values {
529            let mut buffer = BytesMut::new();
530            buffer_utils::write_varint(&mut buffer, value);
531
532            let mut reader = &buffer[..];
533            let decoded = buffer_utils::read_varint(&mut reader).unwrap();
534
535            assert_eq!(value, decoded);
536        }
537    }
538}