lmrc_postgres/
validation.rs

1//! Configuration validation utilities
2//!
3//! This module provides comprehensive validation for PostgreSQL configuration values including:
4//! - Memory size formats (e.g., "256MB", "1GB")
5//! - CIDR notation for network addresses
6//! - Resource limits and constraints
7//! - Conflicting settings detection
8
9use crate::config::PostgresConfig;
10use crate::error::{Error, Result};
11use std::collections::HashMap;
12use std::net::IpAddr;
13use tracing::warn;
14
15/// Parse memory size string to bytes
16///
17/// Supports PostgreSQL memory units: kB, MB, GB, TB
18///
19/// # Examples
20///
21/// ```
22/// use lmrc_postgres::validation::parse_memory_size;
23///
24/// assert_eq!(parse_memory_size("256MB").unwrap(), 256 * 1024 * 1024);
25/// assert_eq!(parse_memory_size("1GB").unwrap(), 1024 * 1024 * 1024);
26/// assert_eq!(parse_memory_size("512kB").unwrap(), 512 * 1024);
27/// ```
28pub fn parse_memory_size(size_str: &str) -> Result<u64> {
29    let size_str = size_str.trim();
30
31    // Find where the number ends and unit begins
32    let split_pos = size_str
33        .char_indices()
34        .find(|(_, c)| c.is_alphabetic())
35        .map(|(i, _)| i)
36        .unwrap_or(size_str.len());
37
38    if split_pos == 0 {
39        return Err(Error::Configuration(format!(
40            "Invalid memory size format: '{}' (no numeric value)",
41            size_str
42        )));
43    }
44
45    let (num_part, unit_part) = size_str.split_at(split_pos);
46
47    // Parse numeric value
48    let value: u64 = num_part.trim().parse().map_err(|_| {
49        Error::Configuration(format!(
50            "Invalid memory size value: '{}' is not a valid number",
51            num_part
52        ))
53    })?;
54
55    // Parse unit (case-insensitive)
56    let unit = unit_part.trim().to_uppercase();
57    let multiplier = match unit.as_str() {
58        "" | "B" => 1,
59        "KB" => 1024,
60        "MB" => 1024 * 1024,
61        "GB" => 1024 * 1024 * 1024,
62        "TB" => 1024 * 1024 * 1024 * 1024,
63        _ => {
64            return Err(Error::Configuration(format!(
65                "Invalid memory unit: '{}'. Valid units are: B, kB, MB, GB, TB",
66                unit_part
67            )));
68        }
69    };
70
71    Ok(value * multiplier)
72}
73
74/// Validate memory size is within reasonable bounds
75///
76/// Checks that the memory size is:
77/// - At least the minimum required
78/// - Not excessively large (sanity check)
79pub fn validate_memory_size(size_str: &str, min_bytes: u64, max_bytes: u64) -> Result<u64> {
80    let bytes = parse_memory_size(size_str)?;
81
82    if bytes < min_bytes {
83        return Err(Error::Configuration(format!(
84            "Memory size '{}' ({} bytes) is below minimum required ({} bytes)",
85            size_str, bytes, min_bytes
86        )));
87    }
88
89    if bytes > max_bytes {
90        return Err(Error::Configuration(format!(
91            "Memory size '{}' ({} bytes) exceeds maximum allowed ({} bytes)",
92            size_str, bytes, max_bytes
93        )));
94    }
95
96    Ok(bytes)
97}
98
99/// Parse and validate CIDR notation
100///
101/// Validates IP addresses and CIDR ranges for PostgreSQL listen_addresses.
102/// Supports:
103/// - Single IP addresses (e.g., "192.168.1.100")
104/// - CIDR notation (e.g., "192.168.1.0/24", "10.0.0.0/8")
105/// - Special values: "*", "0.0.0.0/0", "::/0"
106///
107/// # Examples
108///
109/// ```
110/// use lmrc_postgres::validation::validate_cidr;
111///
112/// assert!(validate_cidr("192.168.1.100").is_ok());
113/// assert!(validate_cidr("10.0.0.0/8").is_ok());
114/// assert!(validate_cidr("0.0.0.0/0").is_ok());
115/// assert!(validate_cidr("*").is_ok());
116/// ```
117pub fn validate_cidr(address: &str) -> Result<()> {
118    let address = address.trim();
119
120    // Special cases
121    if address == "*" || address.is_empty() {
122        return Ok(());
123    }
124
125    // Check for CIDR notation
126    if let Some((ip_part, prefix_part)) = address.split_once('/') {
127        // Validate IP address part
128        let _ip: IpAddr = ip_part.parse().map_err(|_| {
129            Error::Configuration(format!(
130                "Invalid IP address in CIDR notation: '{}'",
131                ip_part
132            ))
133        })?;
134
135        // Validate prefix length
136        let prefix: u8 = prefix_part.parse().map_err(|_| {
137            Error::Configuration(format!(
138                "Invalid CIDR prefix length: '{}' is not a valid number",
139                prefix_part
140            ))
141        })?;
142
143        // Check prefix is in valid range (0-32 for IPv4, 0-128 for IPv6)
144        let max_prefix = if ip_part.contains(':') { 128 } else { 32 };
145
146        if prefix > max_prefix {
147            return Err(Error::Configuration(format!(
148                "Invalid CIDR prefix length: {} exceeds maximum of {}",
149                prefix, max_prefix
150            )));
151        }
152    } else {
153        // No slash, should be a plain IP address
154        let _ip: IpAddr = address
155            .parse()
156            .map_err(|_| Error::Configuration(format!("Invalid IP address: '{}'", address)))?;
157    }
158
159    Ok(())
160}
161
162/// Validate listen_addresses configuration
163///
164/// Supports:
165/// - Single address: "192.168.1.100"
166/// - Multiple addresses: "192.168.1.100,10.0.0.1"
167/// - CIDR ranges: "192.168.1.0/24"
168/// - Special values: "*", "0.0.0.0/0"
169pub fn validate_listen_addresses(addresses: &str) -> Result<()> {
170    if addresses.trim().is_empty() {
171        return Err(Error::Configuration(
172            "listen_addresses cannot be empty".to_string(),
173        ));
174    }
175
176    // Split by comma and validate each address
177    for addr in addresses.split(',') {
178        validate_cidr(addr.trim())?;
179    }
180
181    Ok(())
182}
183
184/// Check for conflicting PostgreSQL settings
185///
186/// Detects common configuration conflicts such as:
187/// - shared_buffers larger than available RAM
188/// - work_mem * max_connections exceeding RAM
189/// - maintenance_work_mem too large
190/// - checkpoint settings conflicts
191pub fn check_conflicting_settings(config: &PostgresConfig) -> Result<Vec<String>> {
192    let mut warnings = Vec::new();
193
194    // Parse memory sizes if available
195    let shared_buffers_bytes = config
196        .shared_buffers
197        .as_ref()
198        .and_then(|s| parse_memory_size(s).ok());
199
200    let work_mem_bytes = config
201        .work_mem
202        .as_ref()
203        .and_then(|s| parse_memory_size(s).ok());
204
205    let maintenance_work_mem_bytes = config
206        .maintenance_work_mem
207        .as_ref()
208        .and_then(|s| parse_memory_size(s).ok());
209
210    let effective_cache_size_bytes = config
211        .effective_cache_size
212        .as_ref()
213        .and_then(|s| parse_memory_size(s).ok());
214
215    // Rule 1: shared_buffers should be 25% of RAM (as a guideline)
216    if let Some(shared_buffers) = shared_buffers_bytes {
217        // Typical minimum is 128MB
218        if shared_buffers < 128 * 1024 * 1024 {
219            warnings.push(format!(
220                "shared_buffers ({}) is very low. Consider at least 128MB",
221                config.shared_buffers.as_ref().unwrap()
222            ));
223        }
224
225        // Typical maximum is 8GB-16GB depending on workload
226        if shared_buffers > 16 * 1024 * 1024 * 1024 {
227            warnings.push(format!(
228                "shared_buffers ({}) is very high. Values above 16GB rarely provide additional benefit",
229                config.shared_buffers.as_ref().unwrap()
230            ));
231        }
232    }
233
234    // Rule 2: work_mem * max_connections should not exceed available RAM
235    if let (Some(work_mem), Some(max_conn)) = (work_mem_bytes, config.max_connections) {
236        let total_work_mem = work_mem * max_conn as u64;
237
238        // If total exceeds 8GB, warn (this is a rough heuristic)
239        if total_work_mem > 8 * 1024 * 1024 * 1024 {
240            warnings.push(format!(
241                "work_mem ({}) * max_connections ({}) = {}MB total. This may exceed available RAM",
242                config.work_mem.as_ref().unwrap(),
243                max_conn,
244                total_work_mem / (1024 * 1024)
245            ));
246        }
247    }
248
249    // Rule 3: maintenance_work_mem should be reasonable
250    if let Some(maint_mem) = maintenance_work_mem_bytes {
251        if maint_mem < 64 * 1024 * 1024 {
252            warnings.push(format!(
253                "maintenance_work_mem ({}) is low. Consider at least 64MB for better maintenance operations",
254                config.maintenance_work_mem.as_ref().unwrap()
255            ));
256        }
257
258        if maint_mem > 2 * 1024 * 1024 * 1024 {
259            warnings.push(format!(
260                "maintenance_work_mem ({}) is very high. Values above 2GB rarely help",
261                config.maintenance_work_mem.as_ref().unwrap()
262            ));
263        }
264    }
265
266    // Rule 4: effective_cache_size should be larger than shared_buffers
267    if let (Some(ecs), Some(sb)) = (effective_cache_size_bytes, shared_buffers_bytes)
268        && ecs < sb
269    {
270        warnings.push(format!(
271            "effective_cache_size ({}) should be larger than shared_buffers ({})",
272            config.effective_cache_size.as_ref().unwrap(),
273            config.shared_buffers.as_ref().unwrap()
274        ));
275    }
276
277    // Rule 5: checkpoint_completion_target
278    if let Some(target) = config.checkpoint_completion_target
279        && target > 0.9
280    {
281        warnings.push(format!(
282            "checkpoint_completion_target ({}) is very high. Values above 0.9 may cause checkpoint spikes",
283            target
284        ));
285    }
286
287    // Rule 6: Port validation
288    if config.port < 1024 {
289        warnings.push(format!(
290            "Port {} requires root privileges. Consider using ports >= 1024",
291            config.port
292        ));
293    }
294
295    Ok(warnings)
296}
297
298/// Validate resource limits
299///
300/// Ensures PostgreSQL configuration respects system limits:
301/// - File descriptor limits (max_connections)
302/// - Shared memory limits (shared_buffers)
303/// - Connection limits
304pub fn validate_resource_limits(config: &PostgresConfig) -> Result<Vec<String>> {
305    let mut warnings = Vec::new();
306
307    // Check max_connections
308    if let Some(max_conn) = config.max_connections {
309        if max_conn < 10 {
310            warnings.push(format!(
311                "max_connections ({}) is very low. Most applications need at least 10-20 connections",
312                max_conn
313            ));
314        }
315
316        if max_conn > 1000 {
317            warnings.push(format!(
318                "max_connections ({}) is very high. Consider using connection pooling (pgBouncer, pgPool)",
319                max_conn
320            ));
321        }
322
323        // Each connection needs ~10MB overhead + work_mem
324        let estimated_overhead = max_conn as u64 * 10 * 1024 * 1024;
325        if estimated_overhead > 10 * 1024 * 1024 * 1024 {
326            warnings.push(format!(
327                "max_connections ({}) implies {}GB connection overhead. This may exceed system limits",
328                max_conn,
329                estimated_overhead / (1024 * 1024 * 1024)
330            ));
331        }
332    }
333
334    Ok(warnings)
335}
336
337/// Comprehensive configuration validation
338///
339/// Runs all validation checks on the PostgreSQL configuration:
340/// 1. Basic validation (from PostgresConfig::validate)
341/// 2. Memory size validation
342/// 3. CIDR notation validation
343/// 4. Conflicting settings detection
344/// 5. Resource limits validation
345///
346/// Returns warnings that don't prevent operation but should be reviewed.
347pub fn validate_comprehensive(config: &PostgresConfig) -> Result<Vec<String>> {
348    // Run basic validation first
349    config.validate()?;
350
351    let mut all_warnings = Vec::new();
352
353    // Validate memory sizes
354    if let Some(ref shared_buffers) = config.shared_buffers {
355        match validate_memory_size(shared_buffers, 1024 * 1024, 128 * 1024 * 1024 * 1024) {
356            Ok(_) => {}
357            Err(e) => return Err(e),
358        }
359    }
360
361    if let Some(ref work_mem) = config.work_mem {
362        match validate_memory_size(work_mem, 64 * 1024, 10 * 1024 * 1024 * 1024) {
363            Ok(_) => {}
364            Err(e) => return Err(e),
365        }
366    }
367
368    if let Some(ref maint_mem) = config.maintenance_work_mem {
369        match validate_memory_size(maint_mem, 1024 * 1024, 10 * 1024 * 1024 * 1024) {
370            Ok(_) => {}
371            Err(e) => return Err(e),
372        }
373    }
374
375    // Validate listen_addresses
376    validate_listen_addresses(&config.listen_addresses)?;
377
378    // Check for conflicting settings
379    let conflict_warnings = check_conflicting_settings(config)?;
380    all_warnings.extend(conflict_warnings);
381
382    // Check resource limits
383    let resource_warnings = validate_resource_limits(config)?;
384    all_warnings.extend(resource_warnings);
385
386    // Log warnings
387    for warning in &all_warnings {
388        warn!("Configuration warning: {}", warning);
389    }
390
391    Ok(all_warnings)
392}
393
394/// Auto-tune configuration based on system resources
395///
396/// Provides intelligent defaults based on available system resources.
397/// This implements PostgreSQL tuning best practices.
398///
399/// # Parameters
400///
401/// - `total_ram_mb`: Total system RAM in MB
402/// - `cpu_cores`: Number of CPU cores
403/// - `workload`: Workload type (Web, Mixed, DataWarehouse, OLTP)
404///
405/// Returns a HashMap of recommended configuration values.
406pub fn auto_tune(
407    total_ram_mb: u64,
408    cpu_cores: u32,
409    workload: WorkloadType,
410) -> HashMap<String, String> {
411    let mut config = HashMap::new();
412
413    // shared_buffers: 25% of RAM (typical recommendation)
414    let shared_buffers_mb = (total_ram_mb / 4).clamp(128, 16 * 1024);
415    config.insert(
416        "shared_buffers".to_string(),
417        format!("{}MB", shared_buffers_mb),
418    );
419
420    // effective_cache_size: 50-75% of RAM
421    let ecs_mb = match workload {
422        WorkloadType::Web => total_ram_mb / 2,
423        WorkloadType::Mixed => (total_ram_mb * 2) / 3,
424        WorkloadType::DataWarehouse => (total_ram_mb * 3) / 4,
425        WorkloadType::Oltp => (total_ram_mb * 2) / 3,
426    };
427    config.insert("effective_cache_size".to_string(), format!("{}MB", ecs_mb));
428
429    // work_mem: depends on workload and connections
430    let work_mem_mb = match workload {
431        WorkloadType::Web => 4,
432        WorkloadType::Mixed => 16,
433        WorkloadType::DataWarehouse => 64,
434        WorkloadType::Oltp => 8,
435    };
436    config.insert("work_mem".to_string(), format!("{}MB", work_mem_mb));
437
438    // maintenance_work_mem: ~5% of RAM
439    let maint_work_mem_mb = (total_ram_mb / 20).clamp(64, 2048);
440    config.insert(
441        "maintenance_work_mem".to_string(),
442        format!("{}MB", maint_work_mem_mb),
443    );
444
445    // max_connections: based on workload
446    let max_connections = match workload {
447        WorkloadType::Web => 200,
448        WorkloadType::Mixed => 100,
449        WorkloadType::DataWarehouse => 20,
450        WorkloadType::Oltp => 300,
451    };
452    config.insert("max_connections".to_string(), max_connections.to_string());
453
454    // max_worker_processes: based on CPU cores
455    let max_workers = cpu_cores.max(8);
456    config.insert("max_worker_processes".to_string(), max_workers.to_string());
457
458    // max_parallel_workers_per_gather: 2-4 per query
459    let parallel_workers = (cpu_cores / 4).clamp(2, 4);
460    config.insert(
461        "max_parallel_workers_per_gather".to_string(),
462        parallel_workers.to_string(),
463    );
464
465    // checkpoint_completion_target
466    config.insert(
467        "checkpoint_completion_target".to_string(),
468        "0.9".to_string(),
469    );
470
471    // wal_buffers: -1 (auto) or 16MB
472    config.insert("wal_buffers".to_string(), "16MB".to_string());
473
474    // random_page_cost: SSD vs HDD
475    let random_page_cost = match workload {
476        WorkloadType::Web => "1.1",
477        WorkloadType::Mixed => "1.1",
478        WorkloadType::DataWarehouse => "1.1",
479        WorkloadType::Oltp => "1.1",
480    };
481    config.insert("random_page_cost".to_string(), random_page_cost.to_string());
482
483    config
484}
485
486/// Workload types for auto-tuning
487#[derive(Debug, Clone, Copy, PartialEq, Eq)]
488pub enum WorkloadType {
489    /// Web application (moderate connections, OLTP + read-heavy)
490    Web,
491    /// Mixed workload (OLTP + analytics)
492    Mixed,
493    /// Data warehouse (complex queries, fewer connections)
494    DataWarehouse,
495    /// High-throughput OLTP (many connections, simple queries)
496    Oltp,
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn test_parse_memory_size() {
505        assert_eq!(parse_memory_size("256MB").unwrap(), 256 * 1024 * 1024);
506        assert_eq!(parse_memory_size("1GB").unwrap(), 1024 * 1024 * 1024);
507        assert_eq!(parse_memory_size("512kB").unwrap(), 512 * 1024);
508        assert_eq!(
509            parse_memory_size("2TB").unwrap(),
510            2 * 1024 * 1024 * 1024 * 1024
511        );
512        assert_eq!(parse_memory_size("100").unwrap(), 100);
513
514        // Case insensitive
515        assert_eq!(parse_memory_size("256mb").unwrap(), 256 * 1024 * 1024);
516        assert_eq!(parse_memory_size("1gb").unwrap(), 1024 * 1024 * 1024);
517
518        // Invalid formats
519        assert!(parse_memory_size("").is_err());
520        assert!(parse_memory_size("MB").is_err());
521        assert!(parse_memory_size("256XB").is_err());
522        assert!(parse_memory_size("abc").is_err());
523    }
524
525    #[test]
526    fn test_validate_memory_size() {
527        let min = 100 * 1024 * 1024; // 100MB
528        let max = 10 * 1024 * 1024 * 1024; // 10GB
529
530        assert!(validate_memory_size("256MB", min, max).is_ok());
531        assert!(validate_memory_size("1GB", min, max).is_ok());
532
533        // Too small
534        assert!(validate_memory_size("50MB", min, max).is_err());
535
536        // Too large
537        assert!(validate_memory_size("20GB", min, max).is_err());
538    }
539
540    #[test]
541    fn test_validate_cidr() {
542        // Valid single IPs
543        assert!(validate_cidr("192.168.1.100").is_ok());
544        assert!(validate_cidr("10.0.0.1").is_ok());
545        assert!(validate_cidr("::1").is_ok());
546
547        // Valid CIDR
548        assert!(validate_cidr("192.168.1.0/24").is_ok());
549        assert!(validate_cidr("10.0.0.0/8").is_ok());
550        assert!(validate_cidr("0.0.0.0/0").is_ok());
551        assert!(validate_cidr("::/0").is_ok());
552
553        // Special values
554        assert!(validate_cidr("*").is_ok());
555
556        // Invalid
557        assert!(validate_cidr("999.999.999.999").is_err());
558        assert!(validate_cidr("192.168.1.0/33").is_err());
559        assert!(validate_cidr("invalid").is_err());
560    }
561
562    #[test]
563    fn test_validate_listen_addresses() {
564        assert!(validate_listen_addresses("192.168.1.100").is_ok());
565        assert!(validate_listen_addresses("192.168.1.100,10.0.0.1").is_ok());
566        assert!(validate_listen_addresses("*").is_ok());
567        assert!(validate_listen_addresses("0.0.0.0/0").is_ok());
568
569        assert!(validate_listen_addresses("").is_err());
570        assert!(validate_listen_addresses("invalid").is_err());
571    }
572
573    #[test]
574    fn test_auto_tune() {
575        let tuned = auto_tune(16384, 8, WorkloadType::Web);
576
577        assert!(tuned.contains_key("shared_buffers"));
578        assert!(tuned.contains_key("effective_cache_size"));
579        assert!(tuned.contains_key("work_mem"));
580        assert!(tuned.contains_key("max_connections"));
581
582        // Check reasonable values
583        let shared_buffers = tuned.get("shared_buffers").unwrap();
584        assert!(shared_buffers.contains("MB") || shared_buffers.contains("GB"));
585    }
586}