heliosdb_nano/types.rs
1//! Core data types for HeliosDB Lite
2//!
3//! This module defines the fundamental types used throughout the database:
4//!
5//! - [`DataType`] - SQL data types (PostgreSQL compatible)
6//! - [`Value`] - Runtime values that can be stored and queried
7//! - [`Tuple`] - A row of values
8//! - [`Schema`] - Table schema with column definitions
9//! - [`Column`] - Column metadata (name, type, constraints)
10//!
11//! # Type System
12//!
13//! HeliosDB Lite uses a PostgreSQL-compatible type system with support for:
14//!
15//! - **Numeric types**: Int2, Int4, Int8, Float4, Float8, Numeric
16//! - **String types**: Text, Varchar, Char
17//! - **Binary types**: Bytea
18//! - **Date/Time types**: Date, Time, Timestamp, Timestamptz, Interval
19//! - **Structured types**: Json, Jsonb, Array, Vector (for embeddings)
20//! - **Special types**: Boolean, Uuid
21//!
22//! # Examples
23//!
24//! ```rust
25//! use heliosdb_nano::{DataType, Value, Column, Schema};
26//!
27//! // Define a schema
28//! let schema = Schema::new(vec![
29//! Column::new("id", DataType::Int4).primary_key(),
30//! Column::new("name", DataType::Text).not_null(),
31//! Column::new("email", DataType::Varchar(Some(255))),
32//! ]);
33//!
34//! // Create a value
35//! let name = Value::String("Alice".to_string());
36//! assert_eq!(name.data_type(), DataType::Text);
37//! ```
38
39use serde::{Deserialize, Serialize};
40use std::fmt;
41use std::hash::{Hash, Hasher};
42
43/// Column storage mode for per-column storage optimization
44///
45/// Allows fine-grained control over how individual columns are stored,
46/// enabling different compression and deduplication strategies based on
47/// the column's data characteristics.
48///
49/// # Storage Modes
50///
51/// - `Default`: Standard row-oriented storage, inline in tuple
52/// - `Dictionary`: Dictionary-encoded strings for low-cardinality columns
53/// - `ContentAddressed`: Hash-based deduplication for large values
54/// - `Columnar`: Column-grouped storage for analytics workloads
55///
56/// # Example
57///
58/// ```sql
59/// CREATE TABLE users (
60/// id INT PRIMARY KEY,
61/// status TEXT STORAGE DICTIONARY, -- Low cardinality
62/// bio TEXT STORAGE CONTENT_ADDRESSED, -- Large text
63/// scores FLOAT8[] STORAGE COLUMNAR -- Analytics
64/// );
65/// ```
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
67pub enum ColumnStorageMode {
68 /// Standard row-oriented storage (default)
69 /// Best for: OLTP, point queries, mixed workloads
70 #[default]
71 Default,
72 /// Dictionary-encoded storage for low-cardinality strings
73 /// Best for: Enum-like values, status codes, country codes (<64K unique values)
74 Dictionary,
75 /// Content-addressed storage with hash-based deduplication
76 /// Best for: Large values (>1KB) with duplicates (documents, blobs)
77 ContentAddressed,
78 /// Column-grouped storage for analytics
79 /// Best for: Analytics, aggregations, range scans, time-series data
80 Columnar,
81}
82
83impl fmt::Display for ColumnStorageMode {
84 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85 match self {
86 ColumnStorageMode::Default => write!(f, "DEFAULT"),
87 ColumnStorageMode::Dictionary => write!(f, "DICTIONARY"),
88 ColumnStorageMode::ContentAddressed => write!(f, "CONTENT_ADDRESSED"),
89 ColumnStorageMode::Columnar => write!(f, "COLUMNAR"),
90 }
91 }
92}
93
94/// SQL data types (PostgreSQL compatible)
95///
96/// Represents the type of a column or value in the database. These types
97/// are designed to be compatible with PostgreSQL for wire protocol support.
98///
99/// # Type Aliases
100///
101/// Common PostgreSQL type aliases are supported:
102/// - `SERIAL` → Int4 with auto-increment
103/// - `BIGSERIAL` → Int8 with auto-increment
104/// - `INTEGER` → Int4
105/// - `BIGINT` → Int8
106/// - `REAL` → Float4
107/// - `DOUBLE PRECISION` → Float8
108#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub enum DataType {
110 /// Boolean type (true/false)
111 Boolean,
112 /// 16-bit signed integer (-32768 to 32767)
113 Int2,
114 /// 32-bit signed integer (-2^31 to 2^31-1)
115 Int4,
116 /// 64-bit signed integer (-2^63 to 2^63-1)
117 Int8,
118 /// 32-bit IEEE 754 floating point
119 Float4,
120 /// 64-bit IEEE 754 floating point
121 Float8,
122 /// Arbitrary precision numeric (stored as string)
123 Numeric,
124 /// Variable-length string with optional max length
125 Varchar(Option<usize>),
126 /// Unlimited-length string
127 Text,
128 /// Fixed-length string (padded with spaces)
129 Char(usize),
130 /// Binary data (byte array)
131 Bytea,
132 /// Calendar date (year, month, day)
133 Date,
134 /// Time of day without timezone
135 Time,
136 /// Date and time without timezone
137 Timestamp,
138 /// Date and time with timezone (stored as UTC)
139 Timestamptz,
140 /// Time interval (duration)
141 Interval,
142 /// Universally unique identifier (128-bit)
143 Uuid,
144 /// JSON text (stored as string)
145 Json,
146 /// Binary JSON (optimized for queries)
147 Jsonb,
148 /// Array of values of the inner type
149 Array(Box<DataType>),
150 /// Fixed-dimension vector for ML embeddings
151 Vector(usize),
152}
153
154impl fmt::Display for DataType {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 match self {
157 DataType::Boolean => write!(f, "BOOLEAN"),
158 DataType::Int2 => write!(f, "INT2"),
159 DataType::Int4 => write!(f, "INT4"),
160 DataType::Int8 => write!(f, "INT8"),
161 DataType::Float4 => write!(f, "FLOAT4"),
162 DataType::Float8 => write!(f, "FLOAT8"),
163 DataType::Numeric => write!(f, "NUMERIC"),
164 DataType::Varchar(Some(n)) => write!(f, "VARCHAR({})", n),
165 DataType::Varchar(None) => write!(f, "VARCHAR"),
166 DataType::Text => write!(f, "TEXT"),
167 DataType::Char(n) => write!(f, "CHAR({})", n),
168 DataType::Bytea => write!(f, "BYTEA"),
169 DataType::Date => write!(f, "DATE"),
170 DataType::Time => write!(f, "TIME"),
171 DataType::Timestamp => write!(f, "TIMESTAMP"),
172 DataType::Timestamptz => write!(f, "TIMESTAMPTZ"),
173 DataType::Interval => write!(f, "INTERVAL"),
174 DataType::Uuid => write!(f, "UUID"),
175 DataType::Json => write!(f, "JSON"),
176 DataType::Jsonb => write!(f, "JSONB"),
177 DataType::Array(inner) => write!(f, "{}[]", inner),
178 DataType::Vector(dim) => write!(f, "VECTOR({})", dim),
179 }
180 }
181}
182
183/// Runtime value representation
184///
185/// Values are the concrete data stored in tuples and returned from queries.
186/// Each value variant corresponds to a [`DataType`] and can be serialized
187/// for storage or transmitted over the wire protocol.
188///
189/// # Null Handling
190///
191/// SQL NULL is represented as `Value::Null`. NULL follows SQL semantics:
192/// - NULL compared to anything (including NULL) returns NULL
193/// - Use `IS NULL` / `IS NOT NULL` for null checks
194///
195/// # Type Coercion
196///
197/// Values can be coerced between compatible types during query execution.
198/// For example, Int4 can be promoted to Int8 or Float8 as needed.
199#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
200pub enum Value {
201 /// SQL NULL value
202 Null,
203 /// Boolean value
204 Boolean(bool),
205 /// 16-bit signed integer
206 Int2(i16),
207 /// 32-bit signed integer
208 Int4(i32),
209 /// 64-bit signed integer
210 Int8(i64),
211 /// 32-bit floating point
212 Float4(f32),
213 /// 64-bit floating point
214 Float8(f64),
215 /// Arbitrary precision numeric (stored as string to preserve precision)
216 Numeric(String),
217 /// Text string
218 String(String),
219 /// Binary data
220 Bytes(Vec<u8>),
221 /// UUID value
222 Uuid(uuid::Uuid),
223 /// Timestamp (stored as UTC)
224 Timestamp(chrono::DateTime<chrono::Utc>),
225 /// Date (year, month, day)
226 Date(chrono::NaiveDate),
227 /// Time of day without timezone
228 Time(chrono::NaiveTime),
229 /// Time interval (duration in microseconds for precision)
230 /// Positive for forward, negative for backward
231 Interval(i64),
232 /// JSON value (stored as string for bincode compatibility)
233 Json(String),
234 /// Array of values
235 Array(Vec<Value>),
236 /// Vector for ML embeddings (f32 for efficiency)
237 Vector(Vec<f32>),
238 /// Dictionary reference - stores dict_id for dictionary-encoded columns
239 /// The actual string value is stored in a separate dictionary structure
240 DictRef {
241 /// Dictionary ID mapping to the original string value
242 dict_id: u32,
243 },
244 /// Content-addressed reference - stores Blake3 hash of the original value
245 /// The actual value is stored separately with the hash as the key
246 CasRef {
247 /// Blake3 hash of the original value (32 bytes)
248 hash: [u8; 32],
249 },
250 /// Columnar reference - placeholder indicating value is in columnar storage
251 /// The actual value is retrieved from column-grouped batch storage
252 ColumnarRef,
253}
254
255impl Value {
256 /// Get the data type of this value
257 pub fn data_type(&self) -> DataType {
258 match self {
259 Value::Null => DataType::Text, // Null can be any type, default to Text
260 Value::Boolean(_) => DataType::Boolean,
261 Value::Int2(_) => DataType::Int2,
262 Value::Int4(_) => DataType::Int4,
263 Value::Int8(_) => DataType::Int8,
264 Value::Float4(_) => DataType::Float4,
265 Value::Float8(_) => DataType::Float8,
266 Value::Numeric(_) => DataType::Numeric,
267 Value::String(_) => DataType::Text,
268 Value::Bytes(_) => DataType::Bytea,
269 Value::Uuid(_) => DataType::Uuid,
270 Value::Timestamp(_) => DataType::Timestamp,
271 Value::Date(_) => DataType::Date,
272 Value::Time(_) => DataType::Time,
273 Value::Interval(_) => DataType::Interval,
274 Value::Json(_) => DataType::Jsonb,
275 Value::Array(arr) => {
276 // Get type from first element if available
277 if let Some(first) = arr.first() {
278 DataType::Array(Box::new(first.data_type()))
279 } else {
280 DataType::Array(Box::new(DataType::Text))
281 }
282 }
283 Value::Vector(vec) => DataType::Vector(vec.len()),
284 // Storage reference types - return Text as placeholder
285 // Actual type is determined by the column schema
286 Value::DictRef { .. } => DataType::Text,
287 Value::CasRef { .. } => DataType::Text,
288 Value::ColumnarRef => DataType::Text,
289 }
290 }
291}
292
293impl fmt::Display for Value {
294 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295 match self {
296 Value::Null => write!(f, "NULL"),
297 Value::Boolean(b) => write!(f, "{}", b),
298 Value::Int2(i) => write!(f, "{}", i),
299 Value::Int4(i) => write!(f, "{}", i),
300 Value::Int8(i) => write!(f, "{}", i),
301 Value::Float4(fl) => write!(f, "{}", fl),
302 Value::Float8(fl) => write!(f, "{}", fl),
303 Value::Numeric(n) => write!(f, "{}", n),
304 Value::String(s) => write!(f, "'{}'", s),
305 Value::Bytes(b) => write!(f, "\\x{}", hex::encode(b)),
306 Value::Uuid(u) => write!(f, "'{}'", u),
307 Value::Timestamp(ts) => write!(f, "'{}'", ts.to_rfc3339()),
308 Value::Date(d) => write!(f, "'{}'", d.format("%Y-%m-%d")),
309 Value::Time(t) => write!(f, "'{}'", t.format("%H:%M:%S%.f")),
310 Value::Interval(micros) => {
311 // Format interval in a human-readable way
312 let total_secs = micros / 1_000_000;
313 let days = total_secs / 86400;
314 let hours = (total_secs % 86400) / 3600;
315 let mins = (total_secs % 3600) / 60;
316 let secs = total_secs % 60;
317 if days > 0 {
318 write!(f, "{} days {:02}:{:02}:{:02}", days, hours, mins, secs)
319 } else {
320 write!(f, "{:02}:{:02}:{:02}", hours, mins, secs)
321 }
322 }
323 Value::Json(j) => write!(f, "'{}'", j),
324 Value::Array(arr) => {
325 write!(f, "{{")?;
326 for (i, v) in arr.iter().enumerate() {
327 if i > 0 {
328 write!(f, ", ")?;
329 }
330 // Format array elements without type wrappers for cleaner output
331 match v {
332 Value::Int2(n) => write!(f, "{}", n)?,
333 Value::Int4(n) => write!(f, "{}", n)?,
334 Value::Int8(n) => write!(f, "{}", n)?,
335 Value::Float4(n) => write!(f, "{}", n)?,
336 Value::Float8(n) => write!(f, "{}", n)?,
337 Value::String(s) => write!(f, "\"{}\"", s)?,
338 Value::Boolean(b) => write!(f, "{}", b)?,
339 Value::Null => write!(f, "NULL")?,
340 other => write!(f, "{}", other)?,
341 }
342 }
343 write!(f, "}}")
344 }
345 Value::Vector(vec) => write!(f, "[{}]", vec.iter()
346 .map(|v| v.to_string())
347 .collect::<Vec<_>>()
348 .join(", ")),
349 Value::DictRef { dict_id } => write!(f, "<dict:{}>", dict_id),
350 Value::CasRef { hash } => write!(f, "<cas:{}>", hex::encode(&hash[..8])),
351 Value::ColumnarRef => write!(f, "<columnar>"),
352 }
353 }
354}
355
356/// A tuple (row) of values
357///
358/// Tuples are the fundamental unit of data in HeliosDB Lite. Each tuple
359/// contains a vector of [`Value`]s corresponding to the columns in a table.
360///
361/// # Row Tracking
362///
363/// Tuples carry optional metadata for row identification:
364/// - `row_id`: Unique identifier within a table, used for UPDATE/DELETE
365/// - `branch_id`: Branch identifier for database branching (experimental)
366///
367/// # Example
368///
369/// ```rust
370/// use heliosdb_nano::{Tuple, Value};
371///
372/// // Create a simple tuple
373/// let row = Tuple::new(vec![
374/// Value::Int4(1),
375/// Value::String("Alice".to_string()),
376/// ]);
377///
378/// assert_eq!(row.len(), 2);
379/// assert_eq!(row.get(0), Some(&Value::Int4(1)));
380/// ```
381#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
382pub struct Tuple {
383 /// Column values in schema order
384 pub values: Vec<Value>,
385 /// Unique row identifier within the table (assigned by storage layer)
386 pub row_id: Option<u64>,
387 /// Branch identifier for copy-on-write branching (experimental)
388 #[serde(skip)]
389 pub branch_id: Option<u64>,
390}
391
392impl Default for Tuple {
393 fn default() -> Self {
394 Self { values: vec![], row_id: None, branch_id: None }
395 }
396}
397
398impl Tuple {
399 /// Create a new tuple
400 pub fn new(values: Vec<Value>) -> Self {
401 Self { values, row_id: None, branch_id: None }
402 }
403
404 /// Create a new tuple with row ID
405 pub fn with_row_id(values: Vec<Value>, row_id: u64) -> Self {
406 Self { values, row_id: Some(row_id), branch_id: None }
407 }
408
409 /// Create a new tuple with row ID and branch ID
410 pub fn with_row_and_branch_id(values: Vec<Value>, row_id: u64, branch_id: u64) -> Self {
411 Self { values, row_id: Some(row_id), branch_id: Some(branch_id) }
412 }
413
414 /// Get value at index
415 pub fn get(&self, index: usize) -> Option<&Value> {
416 self.values.get(index)
417 }
418
419 /// Number of values
420 pub fn len(&self) -> usize {
421 self.values.len()
422 }
423
424 /// Check if empty
425 pub fn is_empty(&self) -> bool {
426 self.values.is_empty()
427 }
428
429 /// Get schema inferred from tuple values
430 ///
431 /// Infers a schema by examining the types of values in this tuple.
432 /// This is a runtime type inspection and should be used with care
433 /// as it cannot detect all type nuances (e.g., VARCHAR vs TEXT).
434 pub fn schema(&self) -> Schema {
435 let columns: Vec<Column> = self.values
436 .iter()
437 .enumerate()
438 .map(|(i, val)| {
439 Column {
440 name: format!("column_{}", i),
441 data_type: val.data_type(),
442 nullable: matches!(val, Value::Null),
443 primary_key: false,
444 source_table: None,
445 source_table_name: None,
446 default_expr: None,
447 unique: false,
448 storage_mode: ColumnStorageMode::Default,
449 }
450 })
451 .collect();
452
453 Schema::new(columns)
454 }
455}
456
457/// Column definition in a table schema
458///
459/// Defines the metadata for a single column including its name, type,
460/// and constraints. Used to build [`Schema`] definitions.
461///
462/// # Builder Pattern
463///
464/// Column supports a builder pattern for setting constraints:
465///
466/// ```rust
467/// use heliosdb_nano::{Column, DataType};
468///
469/// let id_col = Column::new("id", DataType::Int4)
470/// .primary_key(); // Sets primary_key=true, nullable=false
471///
472/// let name_col = Column::new("name", DataType::Text)
473/// .not_null(); // Sets nullable=false
474///
475/// let bio_col = Column::new("bio", DataType::Text);
476/// // Default: nullable=true, primary_key=false
477/// ```
478#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
479pub struct Column {
480 /// Column name (case-insensitive in queries)
481 pub name: String,
482 /// SQL data type
483 pub data_type: DataType,
484 /// Whether NULL values are allowed
485 pub nullable: bool,
486 /// Whether this column is part of the primary key
487 pub primary_key: bool,
488 /// Source table alias (for JOIN disambiguation with e.column syntax)
489 #[serde(default)]
490 pub source_table: Option<String>,
491 /// Source table actual name (for JOIN disambiguation with table.column syntax)
492 #[serde(default)]
493 pub source_table_name: Option<String>,
494 /// Default expression (serialized as JSON for storage)
495 /// This is evaluated when INSERT doesn't provide a value for this column
496 #[serde(default)]
497 pub default_expr: Option<String>,
498 /// UNIQUE constraint
499 #[serde(default)]
500 pub unique: bool,
501 /// Storage mode for per-column storage optimization
502 /// Controls how this column's values are stored (dictionary, CAS, columnar)
503 #[serde(default)]
504 pub storage_mode: ColumnStorageMode,
505}
506
507impl Column {
508 /// Create a new column
509 pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
510 Self {
511 name: name.into(),
512 data_type,
513 nullable: true,
514 primary_key: false,
515 source_table: None,
516 source_table_name: None,
517 default_expr: None,
518 unique: false,
519 storage_mode: ColumnStorageMode::Default,
520 }
521 }
522
523 /// Set the source table (for JOIN disambiguation)
524 pub fn with_source_table(mut self, table: impl Into<String>) -> Self {
525 self.source_table = Some(table.into());
526 self
527 }
528
529 /// Make column non-nullable
530 pub fn not_null(mut self) -> Self {
531 self.nullable = false;
532 self
533 }
534
535 /// Make column a primary key
536 pub fn primary_key(mut self) -> Self {
537 self.primary_key = true;
538 self.nullable = false;
539 self
540 }
541
542 /// Set default expression (as serialized JSON)
543 pub fn with_default(mut self, default_expr: impl Into<String>) -> Self {
544 self.default_expr = Some(default_expr.into());
545 self
546 }
547
548 /// Set UNIQUE constraint
549 pub fn unique(mut self) -> Self {
550 self.unique = true;
551 self
552 }
553
554 /// Set storage mode for per-column optimization
555 pub fn with_storage(mut self, mode: ColumnStorageMode) -> Self {
556 self.storage_mode = mode;
557 self
558 }
559}
560
561/// Table schema definition
562///
563/// A schema defines the structure of a table, including column names,
564/// types, and constraints. Schemas are used for:
565///
566/// - Table creation (`CREATE TABLE`)
567/// - Query planning and type checking
568/// - Result set metadata
569/// - Data serialization/deserialization
570///
571/// # Example
572///
573/// ```rust
574/// use heliosdb_nano::{Schema, Column, DataType};
575///
576/// let users_schema = Schema::new(vec![
577/// Column::new("id", DataType::Int4).primary_key(),
578/// Column::new("username", DataType::Varchar(Some(50))).not_null(),
579/// Column::new("email", DataType::Text).not_null(),
580/// Column::new("created_at", DataType::Timestamptz),
581/// ]);
582///
583/// // Find column by name
584/// let email_col = users_schema.get_column("email");
585/// assert!(email_col.is_some());
586///
587/// // Get column index for projections
588/// let idx = users_schema.get_column_index("username");
589/// assert_eq!(idx, Some(1));
590/// ```
591#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
592pub struct Schema {
593 /// Ordered list of column definitions
594 pub columns: Vec<Column>,
595}
596
597impl Schema {
598 /// Create a new schema
599 pub fn new(columns: Vec<Column>) -> Self {
600 Self { columns }
601 }
602
603 /// Get column by name
604 pub fn get_column(&self, name: &str) -> Option<&Column> {
605 self.columns.iter().find(|c| c.name == name)
606 }
607
608 /// Get column index by name
609 pub fn get_column_index(&self, name: &str) -> Option<usize> {
610 self.columns.iter().position(|c| c.name == name)
611 }
612
613 /// Get column index with optional table qualifier for disambiguation
614 ///
615 /// If table is provided, matches columns where source_table equals table AND name matches.
616 /// If no table is provided, falls back to simple name lookup.
617 pub fn get_qualified_column_index(&self, table: Option<&str>, name: &str) -> Option<usize> {
618 if let Some(table_name) = table {
619 // Look for column with matching source_table (alias) OR source_table_name (actual name)
620 self.columns.iter().position(|c| {
621 (c.source_table.as_deref() == Some(table_name)
622 || c.source_table_name.as_deref() == Some(table_name))
623 && c.name == name
624 })
625 } else {
626 // No table qualifier - use simple name lookup
627 self.get_column_index(name)
628 }
629 }
630
631 /// Get column by index (bounds-checked)
632 pub fn get_column_at(&self, index: usize) -> Option<&Column> {
633 self.columns.get(index)
634 }
635
636 /// Get mutable column by index (bounds-checked)
637 pub fn get_column_at_mut(&mut self, index: usize) -> Option<&mut Column> {
638 self.columns.get_mut(index)
639 }
640
641 /// Number of columns
642 pub fn len(&self) -> usize {
643 self.columns.len()
644 }
645
646 /// Check if empty
647 pub fn is_empty(&self) -> bool {
648 self.columns.is_empty()
649 }
650
651 /// Merge two schemas (for JOIN operations)
652 ///
653 /// Combines columns from left and right schemas, handling name conflicts
654 /// by qualifying column names with table names when necessary.
655 pub fn merge(&self, other: &Schema) -> Self {
656 let mut columns = self.columns.clone();
657 columns.extend(other.columns.clone());
658 Self { columns }
659 }
660
661 /// Stamp every column in the schema with a source table (both the
662 /// alias slot and the actual table-name slot), so that qualified
663 /// column references like `"t"."col"` resolve against this schema
664 /// via [`Schema::get_qualified_column_index`].
665 ///
666 /// Use this when constructing an evaluator for a single-table DML
667 /// operation (UPDATE, DELETE, INSERT...RETURNING) — without the
668 /// stamp, the evaluator can only resolve unqualified columns, and
669 /// any `WHERE "t"."col" = …` predicate fails with
670 /// `Column 't.col' not found in schema` (B31).
671 #[must_use]
672 pub fn with_source_table_name(mut self, table: &str) -> Self {
673 for col in &mut self.columns {
674 if col.source_table.is_none() {
675 col.source_table = Some(table.to_string());
676 }
677 if col.source_table_name.is_none() {
678 col.source_table_name = Some(table.to_string());
679 }
680 }
681 self
682 }
683
684 /// Project schema to subset of columns
685 ///
686 /// Returns a new schema containing only the columns at the specified indices.
687 pub fn project(&self, indices: &[usize]) -> Self {
688 let columns = indices
689 .iter()
690 .filter_map(|&i| self.columns.get(i).cloned())
691 .collect();
692 Self { columns }
693 }
694}
695
696/// Hash implementation for Value
697///
698/// Enables using Value as a key in HashMap, which is required for HashJoinOperator.
699/// This implementation follows SQL semantics: NULL values have a consistent hash
700/// but are never equal to anything (handled by PartialEq).
701impl Hash for Value {
702 fn hash<H: Hasher>(&self, state: &mut H) {
703 // Numeric types hash to a common form so Int2(1), Int4(1), Int8(1)
704 // all hash identically. This is critical for hash joins where one
705 // side is SERIAL (Int4) and the other is BIGSERIAL (Int8).
706 match self {
707 Value::Null => {
708 0u8.hash(state); // consistent hash for NULL
709 }
710 Value::Boolean(b) => {
711 1u8.hash(state);
712 b.hash(state);
713 }
714 // All integer types hash as i64 so they match across widths
715 Value::Int2(i) => {
716 2u8.hash(state);
717 (*i as i64).hash(state);
718 }
719 Value::Int4(i) => {
720 2u8.hash(state);
721 (*i as i64).hash(state);
722 }
723 Value::Int8(i) => {
724 2u8.hash(state);
725 i.hash(state);
726 }
727 // All float types hash as f64 bits
728 Value::Float4(f) => {
729 3u8.hash(state);
730 (*f as f64).to_bits().hash(state);
731 }
732 Value::Float8(f) => {
733 3u8.hash(state);
734 f.to_bits().hash(state);
735 }
736 Value::Numeric(n) => {
737 // Hash numeric string representation
738 n.hash(state);
739 }
740 Value::String(s) => s.hash(state),
741 Value::Bytes(b) => b.hash(state),
742 Value::Uuid(u) => u.hash(state),
743 Value::Timestamp(ts) => {
744 // Hash the timestamp's nanosecond representation
745 ts.timestamp_nanos_opt().hash(state);
746 }
747 Value::Date(d) => {
748 // Hash date as string representation
749 d.to_string().hash(state);
750 }
751 Value::Time(t) => {
752 // Hash time as string representation
753 t.to_string().hash(state);
754 }
755 Value::Json(j) => {
756 // Hash JSON string representation
757 // Note: This is not ideal for performance but ensures consistency
758 j.to_string().hash(state);
759 }
760 Value::Array(arr) => {
761 arr.len().hash(state);
762 for val in arr {
763 val.hash(state);
764 }
765 }
766 Value::Vector(vec) => {
767 vec.len().hash(state);
768 for f in vec {
769 f.to_bits().hash(state);
770 }
771 }
772 Value::DictRef { dict_id } => {
773 dict_id.hash(state);
774 }
775 Value::CasRef { hash } => {
776 hash.hash(state);
777 }
778 Value::ColumnarRef => {
779 // Columnar references hash to a constant
780 // since the actual value is stored elsewhere
781 255u8.hash(state);
782 }
783 Value::Interval(microseconds) => {
784 microseconds.hash(state);
785 }
786 }
787 }
788}
789
790/// Implement Eq for Value to enable HashMap usage
791///
792/// This is safe because we already have PartialEq and the types
793/// that don't have perfect equality (floats, JSON) are handled appropriately.
794impl Eq for Value {}
795
796// Add hex crate to Cargo.toml for Bytes display
797// For now, use a simple implementation
798mod hex {
799 use std::fmt::Write;
800
801 pub fn encode(bytes: &[u8]) -> String {
802 let mut s = String::with_capacity(bytes.len() * 2);
803 for b in bytes {
804 let _ = write!(s, "{:02x}", b);
805 }
806 s
807 }
808}
809
810// Stub types for v3.0.0 API operations
811
812/// Vector store information
813#[derive(Debug, Clone, Serialize, Deserialize)]
814pub struct VectorStoreInfo {
815 /// Store name
816 pub name: String,
817 /// Vector dimensions
818 pub dimensions: u32,
819 /// Number of vectors
820 pub vector_count: u64,
821 /// Creation timestamp
822 pub created_at: String,
823 /// Distance metric (e.g., cosine, euclidean)
824 pub metric: String,
825 /// Index type (e.g., hnsw, flat)
826 pub index_type: String,
827}
828
829/// Agent session
830#[derive(Debug, Clone, Serialize, Deserialize)]
831pub struct AgentSession {
832 /// Session ID
833 pub id: String,
834 /// Session name
835 pub name: String,
836 /// Creation timestamp
837 pub created_at: String,
838 /// Last updated timestamp
839 pub updated_at: String,
840 /// Session ID (duplicate field for compatibility)
841 pub session_id: String,
842 /// Message count in session
843 pub message_count: u32,
844 /// Token count in session
845 pub token_count: u32,
846 /// Session metadata
847 pub metadata: serde_json::Value,
848}
849
850/// Agent message
851#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct AgentMessage {
853 /// Message ID
854 pub id: String,
855 /// Sender role (user, assistant, system)
856 pub role: String,
857 /// Message content
858 pub content: String,
859 /// Timestamp
860 pub created_at: String,
861 /// Message name
862 pub name: String,
863 /// Function call if any
864 pub function_call: Option<String>,
865 /// Tool calls if any
866 pub tool_calls: Option<serde_json::Value>,
867 /// Message metadata
868 pub metadata: serde_json::Value,
869 /// Message timestamp
870 pub timestamp: String,
871}
872
873/// Document data
874#[derive(Debug, Clone, Serialize, Deserialize)]
875pub struct DocumentData {
876 /// Document ID
877 pub id: String,
878 /// Document content
879 pub content: String,
880 /// Document metadata
881 pub metadata: Option<serde_json::Value>,
882 /// Creation timestamp
883 pub created_at: String,
884 /// Last updated timestamp
885 pub updated_at: String,
886 /// Document chunks
887 pub chunks: Vec<String>,
888}
889
890/// Document metadata
891#[derive(Debug, Clone, Serialize, Deserialize)]
892pub struct DocumentMetadata {
893 /// Document ID
894 pub id: String,
895 /// Document size
896 pub size: usize,
897 /// Creation timestamp
898 pub created_at: String,
899 /// Last updated timestamp
900 pub updated_at: String,
901 /// Document content preview
902 pub content: String,
903 /// Document metadata
904 pub metadata: Option<serde_json::Value>,
905}