heliosdb-nano 3.30.0

PostgreSQL-compatible embedded database with TDE + ZKE encryption, HNSW vector search, Product Quantization, git-like branching, time-travel queries, materialized views, row-level security, and 50+ enterprise features
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
//! Query Router for Transparent Write Routing (TWR)
//!
//! This module implements Transparent Write Routing (TWR) from standbys to primary.
//! When a standby receives a write operation (DML/DDL), it routes the query to the
//! primary and returns the result to the client transparently.
//!
//! This enables:
//! - Applications to connect to any node (primary or standby)
//! - Automatic write routing to primary (TWR)
//! - Local read execution on standbys for load distribution
//!
//! # Behavior by Sync Mode
//!
//! - **Sync/Semi-Sync**: Writes routed to primary, results returned synchronously
//! - **Async**: Writes rejected (standby is read-only) - configurable

use std::io::{self, Read, Write};
use std::net::TcpStream;
use std::sync::RwLock;
use std::time::Duration;

/// Result from a forwarded query
#[derive(Debug, Clone)]
pub enum ForwardedResult {
    /// Query executed successfully with row data
    Rows {
        columns: Vec<ColumnInfo>,
        rows: Vec<Vec<Option<String>>>,
    },
    /// Command completed (INSERT, UPDATE, DELETE, etc.)
    Command {
        tag: String,
        rows_affected: u64,
    },
    /// Error from primary
    Error {
        severity: String,
        code: String,
        message: String,
        detail: Option<String>,
        hint: Option<String>,
    },
}

/// Column information from query result
#[derive(Debug, Clone)]
pub struct ColumnInfo {
    pub name: String,
    pub type_oid: i32,
}

/// Query forwarder that routes writes to primary
pub struct QueryForwarder {
    primary_host: String,
    primary_port: u16,
    connection_timeout: Duration,
    query_timeout: Duration,
    /// Connection pool (simple implementation)
    connections: RwLock<Vec<TcpStream>>,
    max_connections: usize,
}

impl QueryForwarder {
    /// Create a new query forwarder
    pub fn new(primary_host: String, primary_port: u16) -> Self {
        Self {
            primary_host,
            primary_port,
            connection_timeout: Duration::from_secs(5),
            query_timeout: Duration::from_secs(30),
            connections: RwLock::new(Vec::new()),
            max_connections: 10,
        }
    }

    /// Set connection timeout
    pub fn with_connection_timeout(mut self, timeout: Duration) -> Self {
        self.connection_timeout = timeout;
        self
    }

    /// Set query timeout
    pub fn with_query_timeout(mut self, timeout: Duration) -> Self {
        self.query_timeout = timeout;
        self
    }

    /// Forward a query to the primary and return the result
    pub fn forward_query(&self, query: &str) -> Result<ForwardedResult, ForwarderError> {
        // Get or create a connection
        let mut conn = self.get_connection()?;

        // Send the query using Simple Query protocol
        let result = self.execute_query(&mut conn, query);

        // Return connection to pool if still valid
        if result.is_ok() {
            self.return_connection(conn);
        }

        result
    }

    /// Get a connection from pool or create new one
    fn get_connection(&self) -> Result<TcpStream, ForwarderError> {
        // Try to get from pool
        if let Ok(mut pool) = self.connections.write() {
            if let Some(conn) = pool.pop() {
                // Verify connection is still alive
                if Self::is_connection_alive(&conn) {
                    return Ok(conn);
                }
            }
        }

        // Create new connection
        self.create_connection()
    }

    /// Return connection to pool
    fn return_connection(&self, conn: TcpStream) {
        if let Ok(mut pool) = self.connections.write() {
            if pool.len() < self.max_connections {
                pool.push(conn);
            }
            // Otherwise drop the connection
        }
    }

    /// Check if connection is still alive
    fn is_connection_alive(conn: &TcpStream) -> bool {
        // Try to set non-blocking and peek
        if conn.set_nonblocking(true).is_err() {
            return false;
        }
        let mut buf = [0u8; 1];
        let result = match conn.peek(&mut buf) {
            Ok(0) => false, // Connection closed
            Ok(_) => true,  // Data available
            Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => true, // No data, but alive
            Err(_) => false, // Error
        };
        let _ = conn.set_nonblocking(false);
        result
    }

    /// Create a new connection to the primary
    fn create_connection(&self) -> Result<TcpStream, ForwarderError> {
        use std::net::ToSocketAddrs;

        let addr_str = format!("{}:{}", self.primary_host, self.primary_port);

        // Resolve hostname to socket address (supports Docker DNS)
        let addr = addr_str
            .to_socket_addrs()
            .map_err(|e| ForwarderError::Connection(format!("Cannot resolve '{}': {}", addr_str, e)))?
            .next()
            .ok_or_else(|| ForwarderError::Connection(format!("No addresses found for '{}'", addr_str)))?;

        let conn = TcpStream::connect_timeout(&addr, self.connection_timeout)
            .map_err(|e| ForwarderError::Connection(format!("Failed to connect to primary at {}: {}", addr_str, e)))?;

        conn.set_read_timeout(Some(self.query_timeout))
            .map_err(|e| ForwarderError::Connection(format!("Failed to set read timeout: {}", e)))?;
        conn.set_write_timeout(Some(self.query_timeout))
            .map_err(|e| ForwarderError::Connection(format!("Failed to set write timeout: {}", e)))?;

        // Perform startup handshake
        let mut conn = conn;
        self.perform_startup(&mut conn)?;

        Ok(conn)
    }

    /// Perform PostgreSQL startup handshake
    fn perform_startup(&self, conn: &mut TcpStream) -> Result<(), ForwarderError> {

        // Build startup message
        // Protocol version 3.0 (196608 = 3 << 16)
        let mut params: Vec<u8> = Vec::new();

        // user parameter
        params.extend_from_slice(b"user\0");
        params.extend_from_slice(b"heliosdb\0");

        // database parameter
        params.extend_from_slice(b"database\0");
        params.extend_from_slice(b"heliosdb\0");

        // Null terminator for params
        params.push(0);

        let msg_len = 4 + 4 + params.len(); // length + protocol + params
        let mut msg = Vec::with_capacity(msg_len);
        msg.extend_from_slice(&(msg_len as i32).to_be_bytes());
        msg.extend_from_slice(&196608i32.to_be_bytes()); // Protocol 3.0
        msg.extend_from_slice(&params);

        conn.write_all(&msg)
            .map_err(|e| ForwarderError::Protocol(format!("Failed to send startup: {}", e)))?;
        conn.flush()
            .map_err(|e| ForwarderError::Protocol(format!("Failed to flush startup: {}", e)))?;

        // Read response - expect AuthenticationOk or ReadyForQuery
        loop {
            let msg_type = self.read_byte(conn)?;
            let msg_len = self.read_i32(conn)? as usize - 4;

            match msg_type {
                b'R' => {
                    // AuthenticationXxx
                    let auth_type = self.read_i32(conn)?;
                    if auth_type == 0 {
                        // AuthenticationOk
                        continue;
                    } else {
                        // Skip other auth bytes
                        let remaining = msg_len - 4;
                        if remaining > 0 {
                            let mut buf = vec![0u8; remaining];
                            conn.read_exact(&mut buf)
                                .map_err(|e| ForwarderError::Protocol(format!("Failed to read auth data: {}", e)))?;
                        }
                        // For now, we only support trust auth
                        // In production, would need to handle password, etc.
                    }
                }
                b'S' => {
                    // ParameterStatus - skip
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to read param status: {}", e)))?;
                }
                b'K' => {
                    // BackendKeyData - skip
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to read backend key: {}", e)))?;
                }
                b'Z' => {
                    // ReadyForQuery - we're done
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to read ready: {}", e)))?;
                    return Ok(());
                }
                b'E' => {
                    // ErrorResponse
                    let error = self.parse_error_response(conn, msg_len)?;
                    return Err(ForwarderError::Primary(error));
                }
                _ => {
                    // Skip unknown message
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to skip message: {}", e)))?;
                }
            }
        }
    }

    /// Execute a query on the connection
    fn execute_query(&self, conn: &mut TcpStream, query: &str) -> Result<ForwardedResult, ForwarderError> {
        // Send Simple Query message
        let query_bytes = query.as_bytes();
        let msg_len = 4 + query_bytes.len() + 1; // length + query + null

        let mut msg = Vec::with_capacity(1 + msg_len);
        msg.push(b'Q');
        msg.extend_from_slice(&(msg_len as i32).to_be_bytes());
        msg.extend_from_slice(query_bytes);
        msg.push(0);

        conn.write_all(&msg)
            .map_err(|e| ForwarderError::Protocol(format!("Failed to send query: {}", e)))?;
        conn.flush()
            .map_err(|e| ForwarderError::Protocol(format!("Failed to flush query: {}", e)))?;

        // Read response
        let mut columns: Vec<ColumnInfo> = Vec::new();
        let mut rows: Vec<Vec<Option<String>>> = Vec::new();
        let mut command_tag: Option<String> = None;

        loop {
            let msg_type = self.read_byte(conn)?;
            let msg_len = self.read_i32(conn)? as usize - 4;

            match msg_type {
                b'T' => {
                    // RowDescription
                    columns = self.parse_row_description(conn, msg_len)?;
                }
                b'D' => {
                    // DataRow
                    let row = self.parse_data_row(conn, msg_len, columns.len())?;
                    rows.push(row);
                }
                b'C' => {
                    // CommandComplete
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to read command complete: {}", e)))?;
                    // Remove null terminator
                    if let Some(0) = buf.last() {
                        buf.pop();
                    }
                    command_tag = Some(String::from_utf8_lossy(&buf).to_string());
                }
                b'Z' => {
                    // ReadyForQuery - done
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to read ready: {}", e)))?;

                    // Return appropriate result
                    if !columns.is_empty() || !rows.is_empty() {
                        return Ok(ForwardedResult::Rows { columns, rows });
                    } else if let Some(tag) = command_tag {
                        let rows_affected = Self::parse_rows_affected(&tag);
                        return Ok(ForwardedResult::Command { tag, rows_affected });
                    } else {
                        return Ok(ForwardedResult::Command {
                            tag: "OK".to_string(),
                            rows_affected: 0,
                        });
                    }
                }
                b'E' => {
                    // ErrorResponse
                    let error = self.parse_error_response(conn, msg_len)?;

                    // Still need to read until ReadyForQuery
                    loop {
                        let mt = self.read_byte(conn)?;
                        let ml = self.read_i32(conn)? as usize - 4;
                        let mut buf = vec![0u8; ml];
                        conn.read_exact(&mut buf).ok();
                        if mt == b'Z' {
                            break;
                        }
                    }

                    return Ok(error);
                }
                b'N' => {
                    // NoticeResponse - skip
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to read notice: {}", e)))?;
                }
                b'I' => {
                    // EmptyQueryResponse
                    // msg_len should be 0, nothing to read
                }
                _ => {
                    // Skip unknown message
                    let mut buf = vec![0u8; msg_len];
                    conn.read_exact(&mut buf)
                        .map_err(|e| ForwarderError::Protocol(format!("Failed to skip message type {}: {}", msg_type as char, e)))?;
                }
            }
        }
    }

    /// Parse RowDescription message
    fn parse_row_description(&self, conn: &mut TcpStream, _msg_len: usize) -> Result<Vec<ColumnInfo>, ForwarderError> {
        let num_fields = self.read_i16(conn)? as usize;
        let mut columns = Vec::with_capacity(num_fields);

        for _ in 0..num_fields {
            // Read null-terminated column name
            let name = self.read_string(conn)?;

            // Skip: table OID (4), column attr (2), type OID (4), type size (2), type mod (4), format (2)
            let _table_oid = self.read_i32(conn)?;
            let _column_attr = self.read_i16(conn)?;
            let type_oid = self.read_i32(conn)?;
            let _type_size = self.read_i16(conn)?;
            let _type_mod = self.read_i32(conn)?;
            let _format = self.read_i16(conn)?;

            columns.push(ColumnInfo { name, type_oid });
        }

        Ok(columns)
    }

    /// Parse DataRow message
    fn parse_data_row(&self, conn: &mut TcpStream, _msg_len: usize, num_columns: usize) -> Result<Vec<Option<String>>, ForwarderError> {
        let num_values = self.read_i16(conn)? as usize;
        let mut row = Vec::with_capacity(num_columns.max(num_values));

        for _ in 0..num_values {
            let len = self.read_i32(conn)?;
            if len == -1 {
                row.push(None); // NULL
            } else {
                let mut buf = vec![0u8; len as usize];
                conn.read_exact(&mut buf)
                    .map_err(|e| ForwarderError::Protocol(format!("Failed to read data: {}", e)))?;
                row.push(Some(String::from_utf8_lossy(&buf).to_string()));
            }
        }

        Ok(row)
    }

    /// Parse ErrorResponse message
    fn parse_error_response(&self, conn: &mut TcpStream, msg_len: usize) -> Result<ForwardedResult, ForwarderError> {
        let mut buf = vec![0u8; msg_len];
        conn.read_exact(&mut buf)
            .map_err(|e| ForwarderError::Protocol(format!("Failed to read error: {}", e)))?;

        let mut severity = String::from("ERROR");
        let mut code = String::from("XX000");
        let mut message = String::from("Unknown error");
        let mut detail = None;
        let mut hint = None;

        let mut i = 0;
        while i < buf.len() {
            let field_type = buf[i];
            i += 1;
            if field_type == 0 {
                break;
            }

            // Read null-terminated string
            let start = i;
            while i < buf.len() && buf[i] != 0 {
                i += 1;
            }
            let value = String::from_utf8_lossy(&buf[start..i]).to_string();
            i += 1; // Skip null terminator

            match field_type {
                b'S' => severity = value,
                b'C' => code = value,
                b'M' => message = value,
                b'D' => detail = Some(value),
                b'H' => hint = Some(value),
                _ => {} // Ignore other fields
            }
        }

        Ok(ForwardedResult::Error {
            severity,
            code,
            message,
            detail,
            hint,
        })
    }

    /// Parse rows affected from command tag
    fn parse_rows_affected(tag: &str) -> u64 {
        // Tags like "INSERT 0 1", "UPDATE 5", "DELETE 3"
        let parts: Vec<&str> = tag.split_whitespace().collect();
        if let Some(last) = parts.last() {
            last.parse().unwrap_or(0)
        } else {
            0
        }
    }

    // Helper functions for reading postgres protocol data

    fn read_byte(&self, conn: &mut TcpStream) -> Result<u8, ForwarderError> {
        let mut buf = [0u8; 1];
        conn.read_exact(&mut buf)
            .map_err(|e| ForwarderError::Protocol(format!("Failed to read byte: {}", e)))?;
        Ok(buf[0])
    }

    fn read_i16(&self, conn: &mut TcpStream) -> Result<i16, ForwarderError> {
        let mut buf = [0u8; 2];
        conn.read_exact(&mut buf)
            .map_err(|e| ForwarderError::Protocol(format!("Failed to read i16: {}", e)))?;
        Ok(i16::from_be_bytes(buf))
    }

    fn read_i32(&self, conn: &mut TcpStream) -> Result<i32, ForwarderError> {
        let mut buf = [0u8; 4];
        conn.read_exact(&mut buf)
            .map_err(|e| ForwarderError::Protocol(format!("Failed to read i32: {}", e)))?;
        Ok(i32::from_be_bytes(buf))
    }

    fn read_string(&self, conn: &mut TcpStream) -> Result<String, ForwarderError> {
        let mut bytes = Vec::new();
        loop {
            let b = self.read_byte(conn)?;
            if b == 0 {
                break;
            }
            bytes.push(b);
        }
        Ok(String::from_utf8_lossy(&bytes).to_string())
    }
}

/// Errors from query forwarding
#[derive(Debug)]
pub enum ForwarderError {
    /// Connection error
    Connection(String),
    /// Protocol error
    Protocol(String),
    /// Error from primary
    Primary(ForwardedResult),
    /// Not configured
    NotConfigured,
}

impl std::fmt::Display for ForwarderError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ForwarderError::Connection(msg) => write!(f, "Connection error: {}", msg),
            ForwarderError::Protocol(msg) => write!(f, "Protocol error: {}", msg),
            ForwarderError::Primary(result) => {
                if let ForwardedResult::Error { message, .. } = result {
                    write!(f, "Primary error: {}", message)
                } else {
                    write!(f, "Primary error")
                }
            }
            ForwarderError::NotConfigured => write!(f, "Query forwarder not configured"),
        }
    }
}

impl std::error::Error for ForwarderError {}

/// Global query forwarder instance (initialized when standby connects to primary)
static QUERY_FORWARDER: once_cell::sync::OnceCell<QueryForwarder> = once_cell::sync::OnceCell::new();

/// Initialize the global query forwarder
pub fn init_query_forwarder(primary_host: String, primary_port: u16) {
    let _ = QUERY_FORWARDER.set(QueryForwarder::new(primary_host, primary_port));
}

/// Get the global query forwarder
pub fn query_forwarder() -> Option<&'static QueryForwarder> {
    QUERY_FORWARDER.get()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_rows_affected() {
        assert_eq!(QueryForwarder::parse_rows_affected("INSERT 0 1"), 1);
        assert_eq!(QueryForwarder::parse_rows_affected("UPDATE 5"), 5);
        assert_eq!(QueryForwarder::parse_rows_affected("DELETE 10"), 10);
        assert_eq!(QueryForwarder::parse_rows_affected("SELECT 100"), 100);
        assert_eq!(QueryForwarder::parse_rows_affected("CREATE TABLE"), 0);
    }
}