Skip to main content

heliosdb_nano/
types.rs

1//! Core data types for HeliosDB Lite
2//!
3//! This module defines the fundamental types used throughout the database:
4//!
5//! - [`DataType`] - SQL data types (PostgreSQL compatible)
6//! - [`Value`] - Runtime values that can be stored and queried
7//! - [`Tuple`] - A row of values
8//! - [`Schema`] - Table schema with column definitions
9//! - [`Column`] - Column metadata (name, type, constraints)
10//!
11//! # Type System
12//!
13//! HeliosDB Lite uses a PostgreSQL-compatible type system with support for:
14//!
15//! - **Numeric types**: Int2, Int4, Int8, Float4, Float8, Numeric
16//! - **String types**: Text, Varchar, Char
17//! - **Binary types**: Bytea
18//! - **Date/Time types**: Date, Time, Timestamp, Timestamptz, Interval
19//! - **Structured types**: Json, Jsonb, Array, Vector (for embeddings)
20//! - **Special types**: Boolean, Uuid
21//!
22//! # Examples
23//!
24//! ```rust
25//! use heliosdb_nano::{DataType, Value, Column, Schema};
26//!
27//! // Define a schema
28//! let schema = Schema::new(vec![
29//!     Column::new("id", DataType::Int4).primary_key(),
30//!     Column::new("name", DataType::Text).not_null(),
31//!     Column::new("email", DataType::Varchar(Some(255))),
32//! ]);
33//!
34//! // Create a value
35//! let name = Value::String("Alice".to_string());
36//! assert_eq!(name.data_type(), DataType::Text);
37//! ```
38
39use serde::{Deserialize, Serialize};
40use std::fmt;
41use std::hash::{Hash, Hasher};
42
43/// Column storage mode for per-column storage optimization
44///
45/// Allows fine-grained control over how individual columns are stored,
46/// enabling different compression and deduplication strategies based on
47/// the column's data characteristics.
48///
49/// # Storage Modes
50///
51/// - `Default`: Standard row-oriented storage, inline in tuple
52/// - `Dictionary`: Dictionary-encoded strings for low-cardinality columns
53/// - `ContentAddressed`: Hash-based deduplication for large values
54/// - `Columnar`: Column-grouped storage for analytics workloads
55///
56/// # Example
57///
58/// ```sql
59/// CREATE TABLE users (
60///     id INT PRIMARY KEY,
61///     status TEXT STORAGE DICTIONARY,        -- Low cardinality
62///     bio TEXT STORAGE CONTENT_ADDRESSED,    -- Large text
63///     scores FLOAT8[] STORAGE COLUMNAR       -- Analytics
64/// );
65/// ```
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
67pub enum ColumnStorageMode {
68    /// Standard row-oriented storage (default)
69    /// Best for: OLTP, point queries, mixed workloads
70    #[default]
71    Default,
72    /// Dictionary-encoded storage for low-cardinality strings
73    /// Best for: Enum-like values, status codes, country codes (<64K unique values)
74    Dictionary,
75    /// Content-addressed storage with hash-based deduplication
76    /// Best for: Large values (>1KB) with duplicates (documents, blobs)
77    ContentAddressed,
78    /// Column-grouped storage for analytics
79    /// Best for: Analytics, aggregations, range scans, time-series data
80    Columnar,
81}
82
83impl fmt::Display for ColumnStorageMode {
84    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85        match self {
86            ColumnStorageMode::Default => write!(f, "DEFAULT"),
87            ColumnStorageMode::Dictionary => write!(f, "DICTIONARY"),
88            ColumnStorageMode::ContentAddressed => write!(f, "CONTENT_ADDRESSED"),
89            ColumnStorageMode::Columnar => write!(f, "COLUMNAR"),
90        }
91    }
92}
93
94/// SQL data types (PostgreSQL compatible)
95///
96/// Represents the type of a column or value in the database. These types
97/// are designed to be compatible with PostgreSQL for wire protocol support.
98///
99/// # Type Aliases
100///
101/// Common PostgreSQL type aliases are supported:
102/// - `SERIAL` → Int4 with auto-increment
103/// - `BIGSERIAL` → Int8 with auto-increment
104/// - `INTEGER` → Int4
105/// - `BIGINT` → Int8
106/// - `REAL` → Float4
107/// - `DOUBLE PRECISION` → Float8
108#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub enum DataType {
110    /// Boolean type (true/false)
111    Boolean,
112    /// 16-bit signed integer (-32768 to 32767)
113    Int2,
114    /// 32-bit signed integer (-2^31 to 2^31-1)
115    Int4,
116    /// 64-bit signed integer (-2^63 to 2^63-1)
117    Int8,
118    /// 32-bit IEEE 754 floating point
119    Float4,
120    /// 64-bit IEEE 754 floating point
121    Float8,
122    /// Arbitrary precision numeric (stored as string)
123    Numeric,
124    /// Variable-length string with optional max length
125    Varchar(Option<usize>),
126    /// Unlimited-length string
127    Text,
128    /// Fixed-length string (padded with spaces)
129    Char(usize),
130    /// Binary data (byte array)
131    Bytea,
132    /// Calendar date (year, month, day)
133    Date,
134    /// Time of day without timezone
135    Time,
136    /// Date and time without timezone
137    Timestamp,
138    /// Date and time with timezone (stored as UTC)
139    Timestamptz,
140    /// Time interval (duration)
141    Interval,
142    /// Universally unique identifier (128-bit)
143    Uuid,
144    /// JSON text (stored as string)
145    Json,
146    /// Binary JSON (optimized for queries)
147    Jsonb,
148    /// Array of values of the inner type
149    Array(Box<DataType>),
150    /// Fixed-dimension vector for ML embeddings
151    Vector(usize),
152}
153
154impl fmt::Display for DataType {
155    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156        match self {
157            DataType::Boolean => write!(f, "BOOLEAN"),
158            DataType::Int2 => write!(f, "INT2"),
159            DataType::Int4 => write!(f, "INT4"),
160            DataType::Int8 => write!(f, "INT8"),
161            DataType::Float4 => write!(f, "FLOAT4"),
162            DataType::Float8 => write!(f, "FLOAT8"),
163            DataType::Numeric => write!(f, "NUMERIC"),
164            DataType::Varchar(Some(n)) => write!(f, "VARCHAR({})", n),
165            DataType::Varchar(None) => write!(f, "VARCHAR"),
166            DataType::Text => write!(f, "TEXT"),
167            DataType::Char(n) => write!(f, "CHAR({})", n),
168            DataType::Bytea => write!(f, "BYTEA"),
169            DataType::Date => write!(f, "DATE"),
170            DataType::Time => write!(f, "TIME"),
171            DataType::Timestamp => write!(f, "TIMESTAMP"),
172            DataType::Timestamptz => write!(f, "TIMESTAMPTZ"),
173            DataType::Interval => write!(f, "INTERVAL"),
174            DataType::Uuid => write!(f, "UUID"),
175            DataType::Json => write!(f, "JSON"),
176            DataType::Jsonb => write!(f, "JSONB"),
177            DataType::Array(inner) => write!(f, "{}[]", inner),
178            DataType::Vector(dim) => write!(f, "VECTOR({})", dim),
179        }
180    }
181}
182
183/// Runtime value representation
184///
185/// Values are the concrete data stored in tuples and returned from queries.
186/// Each value variant corresponds to a [`DataType`] and can be serialized
187/// for storage or transmitted over the wire protocol.
188///
189/// # Null Handling
190///
191/// SQL NULL is represented as `Value::Null`. NULL follows SQL semantics:
192/// - NULL compared to anything (including NULL) returns NULL
193/// - Use `IS NULL` / `IS NOT NULL` for null checks
194///
195/// # Type Coercion
196///
197/// Values can be coerced between compatible types during query execution.
198/// For example, Int4 can be promoted to Int8 or Float8 as needed.
199#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
200pub enum Value {
201    /// SQL NULL value
202    Null,
203    /// Boolean value
204    Boolean(bool),
205    /// 16-bit signed integer
206    Int2(i16),
207    /// 32-bit signed integer
208    Int4(i32),
209    /// 64-bit signed integer
210    Int8(i64),
211    /// 32-bit floating point
212    Float4(f32),
213    /// 64-bit floating point
214    Float8(f64),
215    /// Arbitrary precision numeric (stored as string to preserve precision)
216    Numeric(String),
217    /// Text string
218    String(String),
219    /// Binary data
220    Bytes(Vec<u8>),
221    /// UUID value
222    Uuid(uuid::Uuid),
223    /// Timestamp (stored as UTC)
224    Timestamp(chrono::DateTime<chrono::Utc>),
225    /// Date (year, month, day)
226    Date(chrono::NaiveDate),
227    /// Time of day without timezone
228    Time(chrono::NaiveTime),
229    /// Time interval (duration in microseconds for precision)
230    /// Positive for forward, negative for backward
231    Interval(i64),
232    /// JSON value (stored as string for bincode compatibility)
233    Json(String),
234    /// Array of values
235    Array(Vec<Value>),
236    /// Vector for ML embeddings (f32 for efficiency)
237    Vector(Vec<f32>),
238    /// Dictionary reference - stores dict_id for dictionary-encoded columns
239    /// The actual string value is stored in a separate dictionary structure
240    DictRef {
241        /// Dictionary ID mapping to the original string value
242        dict_id: u32,
243    },
244    /// Content-addressed reference - stores Blake3 hash of the original value
245    /// The actual value is stored separately with the hash as the key
246    CasRef {
247        /// Blake3 hash of the original value (32 bytes)
248        hash: [u8; 32],
249    },
250    /// Columnar reference - placeholder indicating value is in columnar storage
251    /// The actual value is retrieved from column-grouped batch storage
252    ColumnarRef,
253}
254
255impl Value {
256    /// Get the data type of this value
257    pub fn data_type(&self) -> DataType {
258        match self {
259            Value::Null => DataType::Text, // Null can be any type, default to Text
260            Value::Boolean(_) => DataType::Boolean,
261            Value::Int2(_) => DataType::Int2,
262            Value::Int4(_) => DataType::Int4,
263            Value::Int8(_) => DataType::Int8,
264            Value::Float4(_) => DataType::Float4,
265            Value::Float8(_) => DataType::Float8,
266            Value::Numeric(_) => DataType::Numeric,
267            Value::String(_) => DataType::Text,
268            Value::Bytes(_) => DataType::Bytea,
269            Value::Uuid(_) => DataType::Uuid,
270            Value::Timestamp(_) => DataType::Timestamp,
271            Value::Date(_) => DataType::Date,
272            Value::Time(_) => DataType::Time,
273            Value::Interval(_) => DataType::Interval,
274            Value::Json(_) => DataType::Jsonb,
275            Value::Array(arr) => {
276                // Get type from first element if available
277                if let Some(first) = arr.first() {
278                    DataType::Array(Box::new(first.data_type()))
279                } else {
280                    DataType::Array(Box::new(DataType::Text))
281                }
282            }
283            Value::Vector(vec) => DataType::Vector(vec.len()),
284            // Storage reference types - return Text as placeholder
285            // Actual type is determined by the column schema
286            Value::DictRef { .. } => DataType::Text,
287            Value::CasRef { .. } => DataType::Text,
288            Value::ColumnarRef => DataType::Text,
289        }
290    }
291}
292
293impl fmt::Display for Value {
294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295        match self {
296            Value::Null => write!(f, "NULL"),
297            Value::Boolean(b) => write!(f, "{}", b),
298            Value::Int2(i) => write!(f, "{}", i),
299            Value::Int4(i) => write!(f, "{}", i),
300            Value::Int8(i) => write!(f, "{}", i),
301            Value::Float4(fl) => write!(f, "{}", fl),
302            Value::Float8(fl) => write!(f, "{}", fl),
303            Value::Numeric(n) => write!(f, "{}", n),
304            Value::String(s) => write!(f, "'{}'", s),
305            Value::Bytes(b) => write!(f, "\\x{}", hex::encode(b)),
306            Value::Uuid(u) => write!(f, "'{}'", u),
307            Value::Timestamp(ts) => write!(f, "'{}'", ts.to_rfc3339()),
308            Value::Date(d) => write!(f, "'{}'", d.format("%Y-%m-%d")),
309            Value::Time(t) => write!(f, "'{}'", t.format("%H:%M:%S%.f")),
310            Value::Interval(micros) => {
311                // Format interval in a human-readable way
312                let total_secs = micros / 1_000_000;
313                let days = total_secs / 86400;
314                let hours = (total_secs % 86400) / 3600;
315                let mins = (total_secs % 3600) / 60;
316                let secs = total_secs % 60;
317                if days > 0 {
318                    write!(f, "{} days {:02}:{:02}:{:02}", days, hours, mins, secs)
319                } else {
320                    write!(f, "{:02}:{:02}:{:02}", hours, mins, secs)
321                }
322            }
323            Value::Json(j) => write!(f, "'{}'", j),
324            Value::Array(arr) => {
325                write!(f, "{{")?;
326                for (i, v) in arr.iter().enumerate() {
327                    if i > 0 {
328                        write!(f, ", ")?;
329                    }
330                    // Format array elements without type wrappers for cleaner output
331                    match v {
332                        Value::Int2(n) => write!(f, "{}", n)?,
333                        Value::Int4(n) => write!(f, "{}", n)?,
334                        Value::Int8(n) => write!(f, "{}", n)?,
335                        Value::Float4(n) => write!(f, "{}", n)?,
336                        Value::Float8(n) => write!(f, "{}", n)?,
337                        Value::String(s) => write!(f, "\"{}\"", s)?,
338                        Value::Boolean(b) => write!(f, "{}", b)?,
339                        Value::Null => write!(f, "NULL")?,
340                        other => write!(f, "{}", other)?,
341                    }
342                }
343                write!(f, "}}")
344            }
345            Value::Vector(vec) => write!(f, "[{}]", vec.iter()
346                .map(|v| v.to_string())
347                .collect::<Vec<_>>()
348                .join(", ")),
349            Value::DictRef { dict_id } => write!(f, "<dict:{}>", dict_id),
350            Value::CasRef { hash } => write!(f, "<cas:{}>", hex::encode(&hash[..8])),
351            Value::ColumnarRef => write!(f, "<columnar>"),
352        }
353    }
354}
355
356/// A tuple (row) of values
357///
358/// Tuples are the fundamental unit of data in HeliosDB Lite. Each tuple
359/// contains a vector of [`Value`]s corresponding to the columns in a table.
360///
361/// # Row Tracking
362///
363/// Tuples carry optional metadata for row identification:
364/// - `row_id`: Unique identifier within a table, used for UPDATE/DELETE
365/// - `branch_id`: Branch identifier for database branching (experimental)
366///
367/// # Example
368///
369/// ```rust
370/// use heliosdb_nano::{Tuple, Value};
371///
372/// // Create a simple tuple
373/// let row = Tuple::new(vec![
374///     Value::Int4(1),
375///     Value::String("Alice".to_string()),
376/// ]);
377///
378/// assert_eq!(row.len(), 2);
379/// assert_eq!(row.get(0), Some(&Value::Int4(1)));
380/// ```
381#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
382pub struct Tuple {
383    /// Column values in schema order
384    pub values: Vec<Value>,
385    /// Unique row identifier within the table (assigned by storage layer)
386    pub row_id: Option<u64>,
387    /// Branch identifier for copy-on-write branching (experimental)
388    #[serde(skip)]
389    pub branch_id: Option<u64>,
390}
391
392impl Default for Tuple {
393    fn default() -> Self {
394        Self { values: vec![], row_id: None, branch_id: None }
395    }
396}
397
398impl Tuple {
399    /// Create a new tuple
400    pub fn new(values: Vec<Value>) -> Self {
401        Self { values, row_id: None, branch_id: None }
402    }
403
404    /// Create a new tuple with row ID
405    pub fn with_row_id(values: Vec<Value>, row_id: u64) -> Self {
406        Self { values, row_id: Some(row_id), branch_id: None }
407    }
408
409    /// Create a new tuple with row ID and branch ID
410    pub fn with_row_and_branch_id(values: Vec<Value>, row_id: u64, branch_id: u64) -> Self {
411        Self { values, row_id: Some(row_id), branch_id: Some(branch_id) }
412    }
413
414    /// Get value at index
415    pub fn get(&self, index: usize) -> Option<&Value> {
416        self.values.get(index)
417    }
418
419    /// Number of values
420    pub fn len(&self) -> usize {
421        self.values.len()
422    }
423
424    /// Check if empty
425    pub fn is_empty(&self) -> bool {
426        self.values.is_empty()
427    }
428
429    /// Get schema inferred from tuple values
430    ///
431    /// Infers a schema by examining the types of values in this tuple.
432    /// This is a runtime type inspection and should be used with care
433    /// as it cannot detect all type nuances (e.g., VARCHAR vs TEXT).
434    pub fn schema(&self) -> Schema {
435        let columns: Vec<Column> = self.values
436            .iter()
437            .enumerate()
438            .map(|(i, val)| {
439                Column {
440                    name: format!("column_{}", i),
441                    data_type: val.data_type(),
442                    nullable: matches!(val, Value::Null),
443                    primary_key: false,
444                    source_table: None,
445                    source_table_name: None,
446                    default_expr: None,
447                    unique: false,
448                    storage_mode: ColumnStorageMode::Default,
449                }
450            })
451            .collect();
452
453        Schema::new(columns)
454    }
455}
456
457/// Column definition in a table schema
458///
459/// Defines the metadata for a single column including its name, type,
460/// and constraints. Used to build [`Schema`] definitions.
461///
462/// # Builder Pattern
463///
464/// Column supports a builder pattern for setting constraints:
465///
466/// ```rust
467/// use heliosdb_nano::{Column, DataType};
468///
469/// let id_col = Column::new("id", DataType::Int4)
470///     .primary_key();  // Sets primary_key=true, nullable=false
471///
472/// let name_col = Column::new("name", DataType::Text)
473///     .not_null();     // Sets nullable=false
474///
475/// let bio_col = Column::new("bio", DataType::Text);
476///     // Default: nullable=true, primary_key=false
477/// ```
478#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
479pub struct Column {
480    /// Column name (case-insensitive in queries)
481    pub name: String,
482    /// SQL data type
483    pub data_type: DataType,
484    /// Whether NULL values are allowed
485    pub nullable: bool,
486    /// Whether this column is part of the primary key
487    pub primary_key: bool,
488    /// Source table alias (for JOIN disambiguation with e.column syntax)
489    #[serde(default)]
490    pub source_table: Option<String>,
491    /// Source table actual name (for JOIN disambiguation with table.column syntax)
492    #[serde(default)]
493    pub source_table_name: Option<String>,
494    /// Default expression (serialized as JSON for storage)
495    /// This is evaluated when INSERT doesn't provide a value for this column
496    #[serde(default)]
497    pub default_expr: Option<String>,
498    /// UNIQUE constraint
499    #[serde(default)]
500    pub unique: bool,
501    /// Storage mode for per-column storage optimization
502    /// Controls how this column's values are stored (dictionary, CAS, columnar)
503    #[serde(default)]
504    pub storage_mode: ColumnStorageMode,
505}
506
507impl Column {
508    /// Create a new column
509    pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
510        Self {
511            name: name.into(),
512            data_type,
513            nullable: true,
514            primary_key: false,
515            source_table: None,
516            source_table_name: None,
517            default_expr: None,
518            unique: false,
519            storage_mode: ColumnStorageMode::Default,
520        }
521    }
522
523    /// Set the source table (for JOIN disambiguation)
524    pub fn with_source_table(mut self, table: impl Into<String>) -> Self {
525        self.source_table = Some(table.into());
526        self
527    }
528
529    /// Make column non-nullable
530    pub fn not_null(mut self) -> Self {
531        self.nullable = false;
532        self
533    }
534
535    /// Make column a primary key
536    pub fn primary_key(mut self) -> Self {
537        self.primary_key = true;
538        self.nullable = false;
539        self
540    }
541
542    /// Set default expression (as serialized JSON)
543    pub fn with_default(mut self, default_expr: impl Into<String>) -> Self {
544        self.default_expr = Some(default_expr.into());
545        self
546    }
547
548    /// Set UNIQUE constraint
549    pub fn unique(mut self) -> Self {
550        self.unique = true;
551        self
552    }
553
554    /// Set storage mode for per-column optimization
555    pub fn with_storage(mut self, mode: ColumnStorageMode) -> Self {
556        self.storage_mode = mode;
557        self
558    }
559}
560
561/// Table schema definition
562///
563/// A schema defines the structure of a table, including column names,
564/// types, and constraints. Schemas are used for:
565///
566/// - Table creation (`CREATE TABLE`)
567/// - Query planning and type checking
568/// - Result set metadata
569/// - Data serialization/deserialization
570///
571/// # Example
572///
573/// ```rust
574/// use heliosdb_nano::{Schema, Column, DataType};
575///
576/// let users_schema = Schema::new(vec![
577///     Column::new("id", DataType::Int4).primary_key(),
578///     Column::new("username", DataType::Varchar(Some(50))).not_null(),
579///     Column::new("email", DataType::Text).not_null(),
580///     Column::new("created_at", DataType::Timestamptz),
581/// ]);
582///
583/// // Find column by name
584/// let email_col = users_schema.get_column("email");
585/// assert!(email_col.is_some());
586///
587/// // Get column index for projections
588/// let idx = users_schema.get_column_index("username");
589/// assert_eq!(idx, Some(1));
590/// ```
591#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
592pub struct Schema {
593    /// Ordered list of column definitions
594    pub columns: Vec<Column>,
595}
596
597impl Schema {
598    /// Create a new schema
599    pub fn new(columns: Vec<Column>) -> Self {
600        Self { columns }
601    }
602
603    /// Get column by name
604    pub fn get_column(&self, name: &str) -> Option<&Column> {
605        self.columns.iter().find(|c| c.name == name)
606    }
607
608    /// Get column index by name
609    pub fn get_column_index(&self, name: &str) -> Option<usize> {
610        self.columns.iter().position(|c| c.name == name)
611    }
612
613    /// Get column index with optional table qualifier for disambiguation
614    ///
615    /// If table is provided, matches columns where source_table equals table AND name matches.
616    /// If no table is provided, falls back to simple name lookup.
617    pub fn get_qualified_column_index(&self, table: Option<&str>, name: &str) -> Option<usize> {
618        if let Some(table_name) = table {
619            // Look for column with matching source_table (alias) OR source_table_name (actual name)
620            self.columns.iter().position(|c| {
621                (c.source_table.as_deref() == Some(table_name)
622                    || c.source_table_name.as_deref() == Some(table_name))
623                && c.name == name
624            })
625        } else {
626            // No table qualifier - use simple name lookup
627            self.get_column_index(name)
628        }
629    }
630
631    /// Get column by index (bounds-checked)
632    pub fn get_column_at(&self, index: usize) -> Option<&Column> {
633        self.columns.get(index)
634    }
635
636    /// Get mutable column by index (bounds-checked)
637    pub fn get_column_at_mut(&mut self, index: usize) -> Option<&mut Column> {
638        self.columns.get_mut(index)
639    }
640
641    /// Number of columns
642    pub fn len(&self) -> usize {
643        self.columns.len()
644    }
645
646    /// Check if empty
647    pub fn is_empty(&self) -> bool {
648        self.columns.is_empty()
649    }
650
651    /// Merge two schemas (for JOIN operations)
652    ///
653    /// Combines columns from left and right schemas, handling name conflicts
654    /// by qualifying column names with table names when necessary.
655    pub fn merge(&self, other: &Schema) -> Self {
656        let mut columns = self.columns.clone();
657        columns.extend(other.columns.clone());
658        Self { columns }
659    }
660
661    /// Stamp every column in the schema with a source table (both the
662    /// alias slot and the actual table-name slot), so that qualified
663    /// column references like `"t"."col"` resolve against this schema
664    /// via [`Schema::get_qualified_column_index`].
665    ///
666    /// Use this when constructing an evaluator for a single-table DML
667    /// operation (UPDATE, DELETE, INSERT...RETURNING) — without the
668    /// stamp, the evaluator can only resolve unqualified columns, and
669    /// any `WHERE "t"."col" = …` predicate fails with
670    /// `Column 't.col' not found in schema` (B31).
671    #[must_use]
672    pub fn with_source_table_name(mut self, table: &str) -> Self {
673        for col in &mut self.columns {
674            if col.source_table.is_none() {
675                col.source_table = Some(table.to_string());
676            }
677            if col.source_table_name.is_none() {
678                col.source_table_name = Some(table.to_string());
679            }
680        }
681        self
682    }
683
684    /// Project schema to subset of columns
685    ///
686    /// Returns a new schema containing only the columns at the specified indices.
687    pub fn project(&self, indices: &[usize]) -> Self {
688        let columns = indices
689            .iter()
690            .filter_map(|&i| self.columns.get(i).cloned())
691            .collect();
692        Self { columns }
693    }
694}
695
696/// Hash implementation for Value
697///
698/// Enables using Value as a key in HashMap, which is required for HashJoinOperator.
699/// This implementation follows SQL semantics: NULL values have a consistent hash
700/// but are never equal to anything (handled by PartialEq).
701impl Hash for Value {
702    fn hash<H: Hasher>(&self, state: &mut H) {
703        // Numeric types hash to a common form so Int2(1), Int4(1), Int8(1)
704        // all hash identically. This is critical for hash joins where one
705        // side is SERIAL (Int4) and the other is BIGSERIAL (Int8).
706        match self {
707            Value::Null => {
708                0u8.hash(state); // consistent hash for NULL
709            }
710            Value::Boolean(b) => {
711                1u8.hash(state);
712                b.hash(state);
713            }
714            // All integer types hash as i64 so they match across widths
715            Value::Int2(i) => {
716                2u8.hash(state);
717                (*i as i64).hash(state);
718            }
719            Value::Int4(i) => {
720                2u8.hash(state);
721                (*i as i64).hash(state);
722            }
723            Value::Int8(i) => {
724                2u8.hash(state);
725                i.hash(state);
726            }
727            // All float types hash as f64 bits
728            Value::Float4(f) => {
729                3u8.hash(state);
730                (*f as f64).to_bits().hash(state);
731            }
732            Value::Float8(f) => {
733                3u8.hash(state);
734                f.to_bits().hash(state);
735            }
736            Value::Numeric(n) => {
737                // Hash numeric string representation
738                n.hash(state);
739            }
740            Value::String(s) => s.hash(state),
741            Value::Bytes(b) => b.hash(state),
742            Value::Uuid(u) => u.hash(state),
743            Value::Timestamp(ts) => {
744                // Hash the timestamp's nanosecond representation
745                ts.timestamp_nanos_opt().hash(state);
746            }
747            Value::Date(d) => {
748                // Hash date as string representation
749                d.to_string().hash(state);
750            }
751            Value::Time(t) => {
752                // Hash time as string representation
753                t.to_string().hash(state);
754            }
755            Value::Json(j) => {
756                // Hash JSON string representation
757                // Note: This is not ideal for performance but ensures consistency
758                j.to_string().hash(state);
759            }
760            Value::Array(arr) => {
761                arr.len().hash(state);
762                for val in arr {
763                    val.hash(state);
764                }
765            }
766            Value::Vector(vec) => {
767                vec.len().hash(state);
768                for f in vec {
769                    f.to_bits().hash(state);
770                }
771            }
772            Value::DictRef { dict_id } => {
773                dict_id.hash(state);
774            }
775            Value::CasRef { hash } => {
776                hash.hash(state);
777            }
778            Value::ColumnarRef => {
779                // Columnar references hash to a constant
780                // since the actual value is stored elsewhere
781                255u8.hash(state);
782            }
783            Value::Interval(microseconds) => {
784                microseconds.hash(state);
785            }
786        }
787    }
788}
789
790/// Implement Eq for Value to enable HashMap usage
791///
792/// This is safe because we already have PartialEq and the types
793/// that don't have perfect equality (floats, JSON) are handled appropriately.
794impl Eq for Value {}
795
796// Add hex crate to Cargo.toml for Bytes display
797// For now, use a simple implementation
798mod hex {
799    use std::fmt::Write;
800
801    pub fn encode(bytes: &[u8]) -> String {
802        let mut s = String::with_capacity(bytes.len() * 2);
803        for b in bytes {
804            let _ = write!(s, "{:02x}", b);
805        }
806        s
807    }
808}
809
810// Stub types for v3.0.0 API operations
811
812/// Vector store information
813#[derive(Debug, Clone, Serialize, Deserialize)]
814pub struct VectorStoreInfo {
815    /// Store name
816    pub name: String,
817    /// Vector dimensions
818    pub dimensions: u32,
819    /// Number of vectors
820    pub vector_count: u64,
821    /// Creation timestamp
822    pub created_at: String,
823    /// Distance metric (e.g., cosine, euclidean)
824    pub metric: String,
825    /// Index type (e.g., hnsw, flat)
826    pub index_type: String,
827}
828
829/// Agent session
830#[derive(Debug, Clone, Serialize, Deserialize)]
831pub struct AgentSession {
832    /// Session ID
833    pub id: String,
834    /// Session name
835    pub name: String,
836    /// Creation timestamp
837    pub created_at: String,
838    /// Last updated timestamp
839    pub updated_at: String,
840    /// Session ID (duplicate field for compatibility)
841    pub session_id: String,
842    /// Message count in session
843    pub message_count: u32,
844    /// Token count in session
845    pub token_count: u32,
846    /// Session metadata
847    pub metadata: serde_json::Value,
848}
849
850/// Agent message
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct AgentMessage {
853    /// Message ID
854    pub id: String,
855    /// Sender role (user, assistant, system)
856    pub role: String,
857    /// Message content
858    pub content: String,
859    /// Timestamp
860    pub created_at: String,
861    /// Message name
862    pub name: String,
863    /// Function call if any
864    pub function_call: Option<String>,
865    /// Tool calls if any
866    pub tool_calls: Option<serde_json::Value>,
867    /// Message metadata
868    pub metadata: serde_json::Value,
869    /// Message timestamp
870    pub timestamp: String,
871}
872
873/// Document data
874#[derive(Debug, Clone, Serialize, Deserialize)]
875pub struct DocumentData {
876    /// Document ID
877    pub id: String,
878    /// Document content
879    pub content: String,
880    /// Document metadata
881    pub metadata: Option<serde_json::Value>,
882    /// Creation timestamp
883    pub created_at: String,
884    /// Last updated timestamp
885    pub updated_at: String,
886    /// Document chunks
887    pub chunks: Vec<String>,
888}
889
890/// Document metadata
891#[derive(Debug, Clone, Serialize, Deserialize)]
892pub struct DocumentMetadata {
893    /// Document ID
894    pub id: String,
895    /// Document size
896    pub size: usize,
897    /// Creation timestamp
898    pub created_at: String,
899    /// Last updated timestamp
900    pub updated_at: String,
901    /// Document content preview
902    pub content: String,
903    /// Document metadata
904    pub metadata: Option<serde_json::Value>,
905}