Skip to main content

sqry_core/query/security/
audit.rs

1//! Query audit logging with durability guarantees
2//!
3//! This module provides audit logging for CD queries, including:
4//! - Flush-on-drop guarantee via `Drop` impl
5//! - File rotation when size exceeds max (default 10MB)
6//! - Error handling for I/O failures
7//! - Configurable flush interval
8//!
9//! # Durability Guarantees
10//!
11//! 1. **Flush-on-drop**: Via `Drop` implementation
12//! 2. **Periodic flush**: Every `flush_interval_secs`
13//! 3. **Immediate flush**: When buffer is full
14//! 4. **File rotation**: When size exceeds `max_file_size`
15//! 5. **I/O errors**: Logged but don't fail queries
16
17use std::fs::OpenOptions;
18use std::io::{BufWriter, Write};
19use std::path::{Path, PathBuf};
20use std::time::Instant;
21
22use chrono::{DateTime, Utc};
23use parking_lot::Mutex;
24use serde::Serialize;
25
26use super::guard::QueryCompletionStatus;
27
28/// Query audit log entry
29///
30/// Captures all relevant information about a query execution for audit purposes.
31#[derive(Debug, Serialize)]
32pub struct QueryAuditEntry {
33    /// Query text
34    pub query: String,
35
36    /// Timestamp (UTC)
37    pub timestamp: DateTime<Utc>,
38
39    /// Execution duration (ms)
40    pub duration_ms: u64,
41
42    /// Number of results
43    pub result_count: usize,
44
45    /// Peak memory usage (bytes)
46    pub memory_usage: usize,
47
48    /// Query predicates used (e.g., [`impl:Debug`, `lang:rust`])
49    pub predicates: Vec<String>,
50
51    /// Security outcome
52    pub outcome: QueryOutcome,
53
54    /// Applied security limits (for audit trail)
55    pub applied_limits: AppliedLimits,
56
57    /// User context (from environment)
58    pub user: String,
59}
60
61/// Applied security limits recorded in audit entry
62#[derive(Debug, Clone, Serialize)]
63pub struct AppliedLimits {
64    /// Timeout in milliseconds
65    pub timeout_ms: u64,
66    /// Result cap
67    pub result_cap: usize,
68    /// Memory limit in bytes
69    pub memory_limit: usize,
70}
71
72/// Query outcome for audit logging (per Codex iter10)
73///
74/// Captures both the completion status and whether partial results were returned.
75/// This allows audit logs to distinguish between:
76/// - Complete success (all results returned)
77/// - Partial success (some results returned, limit reached)
78/// - Failure (no results due to pre-execution rejection)
79#[derive(Debug, Clone, Serialize)]
80pub enum QueryOutcome {
81    /// Query completed successfully with all results
82    Success,
83    /// Query timed out, partial results may have been returned
84    Timeout {
85        /// Number of results returned before timeout
86        partial_results_count: usize,
87    },
88    /// Result cap exceeded, partial results returned
89    ResultCapExceeded {
90        /// Number of results returned
91        returned: usize,
92        /// The configured limit
93        limit: usize,
94    },
95    /// Memory limit exceeded, partial results returned
96    MemoryLimitExceeded {
97        /// Number of results returned
98        returned: usize,
99        /// Memory usage in bytes when limit was hit
100        memory_bytes: usize,
101    },
102    /// Cost limit exceeded (pre-execution rejection, no results)
103    CostLimitExceeded {
104        /// Estimated cost
105        estimated: usize,
106        /// Configured limit
107        limit: usize,
108    },
109    /// Error during execution
110    Error(String),
111}
112
113impl QueryOutcome {
114    /// Create outcome from `QueryCompletionStatus` (per Codex iter10)
115    #[must_use]
116    pub fn from_completion_status(status: &QueryCompletionStatus, result_count: usize) -> Self {
117        match status {
118            QueryCompletionStatus::Complete => Self::Success,
119            QueryCompletionStatus::TimedOut { .. } => Self::Timeout {
120                partial_results_count: result_count,
121            },
122            QueryCompletionStatus::ResultCapReached { count, limit } => Self::ResultCapExceeded {
123                returned: *count,
124                limit: *limit,
125            },
126            QueryCompletionStatus::MemoryLimitReached { usage_bytes, .. } => {
127                Self::MemoryLimitExceeded {
128                    returned: result_count,
129                    memory_bytes: *usage_bytes,
130                }
131            }
132        }
133    }
134
135    /// Returns true if partial results were returned
136    #[must_use]
137    pub fn has_partial_results(&self) -> bool {
138        match self {
139            Self::Timeout {
140                partial_results_count,
141            } if *partial_results_count > 0 => true,
142            Self::ResultCapExceeded { .. } | Self::MemoryLimitExceeded { .. } => true,
143            _ => false,
144        }
145    }
146
147    /// Returns true if the query completed successfully
148    #[must_use]
149    pub fn is_success(&self) -> bool {
150        matches!(self, Self::Success)
151    }
152}
153
154/// Audit logger configuration
155///
156/// ## Path Selection Rules (per Codex iter10)
157///
158/// The audit log path is resolved as follows:
159///
160/// 1. **CLI override** (`--audit-log <path>`): User-specified path takes precedence.
161/// 2. **Environment variable** (`SQRY_AUDIT_LOG`): Fallback if no CLI arg.
162/// 3. **Default**: `.sqry-audit.jsonl` in the current working directory.
163///
164/// ## Deployment Considerations
165///
166/// - **Read-only repos**: If deploying in a read-only directory, use `--audit-log`
167///   to specify a writable location (e.g., `~/.sqry/audit.jsonl` or `/tmp/sqry-audit.jsonl`).
168/// - **Permission denied**: The logger validates writability on startup and warns
169///   early if the path is unwritable.
170/// - **Multi-user**: Consider using user-specific paths (`~/.sqry/`) to avoid conflicts.
171#[derive(Debug, Clone)]
172pub struct AuditLogConfig {
173    /// Log file path
174    pub log_path: PathBuf,
175
176    /// Buffer size before auto-flush
177    pub buffer_size: usize,
178
179    /// Flush interval (seconds)
180    pub flush_interval_secs: u64,
181
182    /// Max file size before rotation (bytes)
183    pub max_file_size: usize,
184
185    /// Number of rotated files to keep
186    pub rotation_count: usize,
187
188    /// User context for audit trail (per Codex iter10)
189    /// Captured from environment: `$SQRY_USER`, `$USER`, or "unknown"
190    pub user_context: String,
191}
192
193impl Default for AuditLogConfig {
194    fn default() -> Self {
195        Self {
196            log_path: PathBuf::from(".sqry-audit.jsonl"),
197            buffer_size: 100,
198            flush_interval_secs: 60,
199            max_file_size: 10 * 1024 * 1024, // 10 MB
200            rotation_count: 5,
201            user_context: Self::detect_user_context(),
202        }
203    }
204}
205
206impl AuditLogConfig {
207    /// Detect user context from environment (per Codex iter10)
208    ///
209    /// Priority: `$SQRY_USER` > `$USER` > `$USERNAME` > "unknown"
210    #[must_use]
211    pub fn detect_user_context() -> String {
212        std::env::var("SQRY_USER")
213            .or_else(|_| std::env::var("USER"))
214            .or_else(|_| std::env::var("USERNAME"))
215            .unwrap_or_else(|_| "unknown".to_string())
216    }
217
218    /// Create config with a custom log path
219    #[must_use]
220    pub fn with_log_path(self, path: PathBuf) -> Self {
221        Self {
222            log_path: path,
223            ..self
224        }
225    }
226
227    /// Create config with a custom buffer size
228    #[must_use]
229    pub fn with_buffer_size(self, size: usize) -> Self {
230        Self {
231            buffer_size: size,
232            ..self
233        }
234    }
235
236    /// Validate that the audit log path is writable (per Codex iter10)
237    ///
238    /// Call this early in CLI startup to warn users if the path is unwritable.
239    /// This prevents silent audit failures that would violate compliance requirements.
240    ///
241    /// # Returns
242    ///
243    /// - `Ok(())` if path is writable (or can be created)
244    /// - `Err(PathValidationError)` with explanation if unwritable
245    ///
246    /// # Errors
247    ///
248    /// Returns error if the path is not writable due to permission or I/O issues.
249    pub fn validate_path(&self) -> Result<(), PathValidationError> {
250        let parent = self.log_path.parent().unwrap_or(Path::new("."));
251
252        // Check if parent directory exists and is writable
253        if parent.exists() {
254            // Try to create a test file to verify write permission
255            let test_path = parent.join(".sqry-audit-test");
256            match std::fs::write(&test_path, b"test") {
257                Ok(()) => {
258                    let _ = std::fs::remove_file(&test_path);
259                    Ok(())
260                }
261                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
262                    Err(PathValidationError::PermissionDenied {
263                        path: self.log_path.clone(),
264                        suggestion:
265                            "Use --audit-log to specify a writable path (e.g., ~/.sqry/audit.jsonl)"
266                                .into(),
267                    })
268                }
269                Err(e) => Err(PathValidationError::IoError(e)),
270            }
271        } else {
272            // Parent doesn't exist - check if we can create it
273            match std::fs::create_dir_all(parent) {
274                Ok(()) => {
275                    // Created successfully, remove to avoid leaving empty dirs
276                    let _ = std::fs::remove_dir(parent);
277                    Ok(())
278                }
279                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
280                    Err(PathValidationError::PermissionDenied {
281                        path: self.log_path.clone(),
282                        suggestion:
283                            "Use --audit-log to specify a writable path (e.g., ~/.sqry/audit.jsonl)"
284                                .into(),
285                    })
286                }
287                Err(e) => Err(PathValidationError::IoError(e)),
288            }
289        }
290    }
291}
292
293/// Path validation errors for early CLI feedback (per Codex iter10)
294#[derive(Debug, thiserror::Error)]
295pub enum PathValidationError {
296    /// Permission denied when trying to write to the audit log path
297    #[error("Permission denied for audit log path {path:?}: {suggestion}")]
298    PermissionDenied {
299        /// The path that couldn't be written
300        path: PathBuf,
301        /// Suggestion for resolving the issue
302        suggestion: String,
303    },
304
305    /// I/O error when validating the audit log path
306    #[error("I/O error validating audit log path: {0}")]
307    IoError(#[from] std::io::Error),
308}
309
310/// Query audit logger with durability guarantees
311///
312/// **DURABILITY GUARANTEES** (per Codex review):
313/// 1. Flush-on-drop via `Drop` implementation
314/// 2. Periodic flush every `flush_interval_secs`
315/// 3. Immediate flush when buffer is full
316/// 4. File rotation when size exceeds `max_file_size`
317/// 5. I/O errors are logged but don't fail queries
318pub struct QueryAuditLogger {
319    config: AuditLogConfig,
320    buffer: Mutex<Vec<QueryAuditEntry>>,
321    last_flush: Mutex<Instant>,
322}
323
324impl QueryAuditLogger {
325    /// Create a new audit logger
326    ///
327    /// # Errors
328    ///
329    /// Returns error if log directory cannot be created or isn't writable.
330    ///
331    /// # Directory Creation
332    ///
333    /// **Per Codex iter7 review**: The logger auto-creates parent directories
334    /// if they don't exist. This handles CLI overrides like `--audit-log ~/.sqry/audit.jsonl`
335    /// gracefully without requiring users to pre-create directories.
336    pub fn new(config: AuditLogConfig) -> Result<Self, std::io::Error> {
337        // **Rev 5** (per Codex iter7): Auto-create parent directory if missing
338        if let Some(parent) = config.log_path.parent()
339            && !parent.as_os_str().is_empty()
340            && !parent.exists()
341        {
342            std::fs::create_dir_all(parent).map_err(|e| {
343                std::io::Error::new(
344                    e.kind(),
345                    format!(
346                        "Failed to create audit log directory {}: {e}",
347                        parent.display()
348                    ),
349                )
350            })?;
351        }
352
353        Ok(Self {
354            config,
355            buffer: Mutex::new(Vec::new()),
356            last_flush: Mutex::new(Instant::now()),
357        })
358    }
359
360    /// Log a query execution
361    ///
362    /// This method buffers entries and flushes when:
363    /// - Buffer reaches `buffer_size`
364    /// - Time since last flush exceeds `flush_interval_secs`
365    pub fn log(&self, entry: QueryAuditEntry) {
366        let mut buffer = self.buffer.lock();
367        buffer.push(entry);
368
369        let should_flush = buffer.len() >= self.config.buffer_size
370            || self.last_flush.lock().elapsed().as_secs() >= self.config.flush_interval_secs;
371
372        if should_flush {
373            // Move entries out to release lock during I/O
374            let entries: Vec<_> = buffer.drain(..).collect();
375            drop(buffer);
376
377            self.flush_entries(&entries);
378            *self.last_flush.lock() = Instant::now();
379        }
380    }
381
382    /// Force flush all buffered entries
383    pub fn flush(&self) {
384        let entries: Vec<_> = self.buffer.lock().drain(..).collect();
385        if !entries.is_empty() {
386            self.flush_entries(&entries);
387        }
388    }
389
390    /// Get the number of buffered entries
391    #[must_use]
392    pub fn buffer_len(&self) -> usize {
393        self.buffer.lock().len()
394    }
395
396    /// Get the config
397    #[must_use]
398    pub fn config(&self) -> &AuditLogConfig {
399        &self.config
400    }
401
402    /// Flush entries to disk with rotation
403    fn flush_entries(&self, entries: &[QueryAuditEntry]) {
404        // Check if rotation needed
405        let max_file_size = u64::try_from(self.config.max_file_size).unwrap_or(u64::MAX);
406        if let Ok(metadata) = std::fs::metadata(&self.config.log_path)
407            && metadata.len() >= max_file_size
408            && let Err(e) = self.rotate_log()
409        {
410            eprintln!("Audit log rotation failed: {e}");
411        }
412
413        // Append entries
414        match OpenOptions::new()
415            .create(true)
416            .append(true)
417            .open(&self.config.log_path)
418        {
419            Ok(file) => {
420                let mut writer = BufWriter::new(file);
421                for entry in entries {
422                    if let Ok(json) = serde_json::to_string(entry) {
423                        let _ = writeln!(writer, "{json}");
424                    }
425                }
426                let _ = writer.flush();
427            }
428            Err(e) => {
429                eprintln!("Audit log write failed: {e}");
430            }
431        }
432    }
433
434    /// Rotate log files
435    fn rotate_log(&self) -> std::io::Result<()> {
436        // Rotate existing files: .4 -> .5, .3 -> .4, etc.
437        for i in (1..self.config.rotation_count).rev() {
438            let old_path = self.config.log_path.with_extension(format!("jsonl.{i}"));
439            let new_path = self
440                .config
441                .log_path
442                .with_extension(format!("jsonl.{}", i + 1));
443            if old_path.exists() {
444                std::fs::rename(&old_path, &new_path)?;
445            }
446        }
447
448        // Current -> .1
449        let backup_path = self.config.log_path.with_extension("jsonl.1");
450        if self.config.log_path.exists() {
451            std::fs::rename(&self.config.log_path, backup_path)?;
452        }
453
454        Ok(())
455    }
456}
457
458/// **FLUSH-ON-DROP GUARANTEE**
459impl Drop for QueryAuditLogger {
460    fn drop(&mut self) {
461        self.flush();
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468    use tempfile::TempDir;
469
470    #[test]
471    fn test_query_outcome_from_status() {
472        let complete = QueryCompletionStatus::Complete;
473        assert!(QueryOutcome::from_completion_status(&complete, 10).is_success());
474
475        let cap_reached = QueryCompletionStatus::ResultCapReached {
476            count: 100,
477            limit: 100,
478        };
479        let outcome = QueryOutcome::from_completion_status(&cap_reached, 100);
480        assert!(outcome.has_partial_results());
481    }
482
483    #[test]
484    fn test_query_outcome_partial_results() {
485        assert!(!QueryOutcome::Success.has_partial_results());
486        assert!(
487            QueryOutcome::Timeout {
488                partial_results_count: 5
489            }
490            .has_partial_results()
491        );
492        assert!(
493            !QueryOutcome::Timeout {
494                partial_results_count: 0
495            }
496            .has_partial_results()
497        );
498        assert!(
499            QueryOutcome::ResultCapExceeded {
500                returned: 100,
501                limit: 100
502            }
503            .has_partial_results()
504        );
505        assert!(
506            QueryOutcome::MemoryLimitExceeded {
507                returned: 50,
508                memory_bytes: 1000
509            }
510            .has_partial_results()
511        );
512        assert!(
513            !QueryOutcome::CostLimitExceeded {
514                estimated: 100,
515                limit: 50
516            }
517            .has_partial_results()
518        );
519    }
520
521    #[test]
522    fn test_default_config() {
523        let config = AuditLogConfig::default();
524        assert_eq!(config.log_path, PathBuf::from(".sqry-audit.jsonl"));
525        assert_eq!(config.buffer_size, 100);
526        assert_eq!(config.flush_interval_secs, 60);
527        assert_eq!(config.max_file_size, 10 * 1024 * 1024);
528        assert_eq!(config.rotation_count, 5);
529    }
530
531    #[test]
532    fn test_detect_user_context() {
533        // Just verify it doesn't panic and returns something
534        let user = AuditLogConfig::detect_user_context();
535        assert!(!user.is_empty() || user == "unknown");
536    }
537
538    #[test]
539    fn test_logger_creation() {
540        let temp = TempDir::new().unwrap();
541        let log_path = temp.path().join("audit.jsonl");
542
543        let config = AuditLogConfig {
544            log_path,
545            ..Default::default()
546        };
547
548        let logger = QueryAuditLogger::new(config).unwrap();
549        assert_eq!(logger.buffer_len(), 0);
550    }
551
552    #[test]
553    fn test_logger_buffering() {
554        let temp = TempDir::new().unwrap();
555        let log_path = temp.path().join("audit.jsonl");
556
557        let config = AuditLogConfig {
558            log_path: log_path.clone(),
559            buffer_size: 10, // High threshold so we don't auto-flush
560            flush_interval_secs: 3600,
561            ..Default::default()
562        };
563
564        let logger = QueryAuditLogger::new(config).unwrap();
565
566        let entry = QueryAuditEntry {
567            query: "impl:Debug".to_string(),
568            timestamp: Utc::now(),
569            duration_ms: 100,
570            result_count: 5,
571            memory_usage: 1024,
572            predicates: vec!["impl:Debug".to_string()],
573            outcome: QueryOutcome::Success,
574            applied_limits: AppliedLimits {
575                timeout_ms: 30000,
576                result_cap: 10000,
577                memory_limit: 512 * 1024 * 1024,
578            },
579            user: "test".to_string(),
580        };
581
582        logger.log(entry);
583        assert_eq!(logger.buffer_len(), 1);
584
585        // File shouldn't exist yet (not flushed)
586        assert!(!log_path.exists());
587
588        // Force flush
589        logger.flush();
590        assert_eq!(logger.buffer_len(), 0);
591
592        // File should now exist
593        assert!(log_path.exists());
594    }
595
596    #[test]
597    fn test_auto_flush_on_buffer_full() {
598        let temp = TempDir::new().unwrap();
599        let log_path = temp.path().join("audit.jsonl");
600
601        let config = AuditLogConfig {
602            log_path: log_path.clone(),
603            buffer_size: 2, // Small buffer to trigger flush
604            flush_interval_secs: 3600,
605            ..Default::default()
606        };
607
608        let logger = QueryAuditLogger::new(config).unwrap();
609
610        // Add two entries to trigger auto-flush
611        for i in 0..2 {
612            let entry = QueryAuditEntry {
613                query: format!("query:{i}"),
614                timestamp: Utc::now(),
615                duration_ms: 10,
616                result_count: i,
617                memory_usage: 512,
618                predicates: vec![],
619                outcome: QueryOutcome::Success,
620                applied_limits: AppliedLimits {
621                    timeout_ms: 30000,
622                    result_cap: 10000,
623                    memory_limit: 512 * 1024 * 1024,
624                },
625                user: "test".to_string(),
626            };
627            logger.log(entry);
628        }
629
630        // Buffer should be empty after auto-flush
631        assert_eq!(logger.buffer_len(), 0);
632
633        // File should exist
634        assert!(log_path.exists());
635    }
636
637    #[test]
638    fn test_flush_on_drop() {
639        let temp = TempDir::new().unwrap();
640        let log_path = temp.path().join("audit.jsonl");
641
642        {
643            let config = AuditLogConfig {
644                log_path: log_path.clone(),
645                buffer_size: 100, // High so we don't auto-flush
646                flush_interval_secs: 3600,
647                ..Default::default()
648            };
649
650            let logger = QueryAuditLogger::new(config).unwrap();
651
652            let entry = QueryAuditEntry {
653                query: "test".to_string(),
654                timestamp: Utc::now(),
655                duration_ms: 10,
656                result_count: 1,
657                memory_usage: 256,
658                predicates: vec![],
659                outcome: QueryOutcome::Success,
660                applied_limits: AppliedLimits {
661                    timeout_ms: 30000,
662                    result_cap: 10000,
663                    memory_limit: 512 * 1024 * 1024,
664                },
665                user: "test".to_string(),
666            };
667            logger.log(entry);
668
669            // File shouldn't exist yet
670            assert!(!log_path.exists());
671        } // Logger dropped here, should flush
672
673        // File should now exist due to Drop
674        assert!(log_path.exists());
675    }
676
677    #[test]
678    fn test_log_rotation() {
679        let temp = TempDir::new().unwrap();
680        let log_path = temp.path().join("audit.jsonl");
681
682        let config = AuditLogConfig {
683            log_path: log_path.clone(),
684            buffer_size: 1,    // Flush every entry
685            max_file_size: 50, // Very small to trigger rotation
686            rotation_count: 3,
687            flush_interval_secs: 0, // Always check flush
688            ..Default::default()
689        };
690
691        let logger = QueryAuditLogger::new(config).unwrap();
692
693        // Add several entries to trigger rotation
694        for i in 0..5 {
695            let entry = QueryAuditEntry {
696                query: format!("query:{i}"),
697                timestamp: Utc::now(),
698                duration_ms: 10,
699                result_count: i,
700                memory_usage: 512,
701                predicates: vec![],
702                outcome: QueryOutcome::Success,
703                applied_limits: AppliedLimits {
704                    timeout_ms: 30000,
705                    result_cap: 10000,
706                    memory_limit: 512 * 1024 * 1024,
707                },
708                user: "test".to_string(),
709            };
710            logger.log(entry);
711        }
712
713        // Main log should exist
714        assert!(log_path.exists());
715    }
716
717    #[test]
718    fn test_path_validation_writable() {
719        let temp = TempDir::new().unwrap();
720        let log_path = temp.path().join("audit.jsonl");
721
722        let config = AuditLogConfig {
723            log_path,
724            ..Default::default()
725        };
726
727        assert!(config.validate_path().is_ok());
728    }
729
730    #[test]
731    fn test_config_builder() {
732        let config = AuditLogConfig::default()
733            .with_log_path(PathBuf::from("/tmp/test.jsonl"))
734            .with_buffer_size(50);
735
736        assert_eq!(config.log_path, PathBuf::from("/tmp/test.jsonl"));
737        assert_eq!(config.buffer_size, 50);
738    }
739
740    #[test]
741    fn test_auto_create_parent_directory() {
742        let temp = TempDir::new().unwrap();
743        let nested_path = temp
744            .path()
745            .join("subdir")
746            .join("nested")
747            .join("audit.jsonl");
748
749        let config = AuditLogConfig {
750            log_path: nested_path.clone(),
751            ..Default::default()
752        };
753
754        let logger = QueryAuditLogger::new(config).unwrap();
755
756        // Parent directories should have been created
757        assert!(nested_path.parent().unwrap().exists());
758
759        // Add an entry and flush to verify it works
760        let entry = QueryAuditEntry {
761            query: "test".to_string(),
762            timestamp: Utc::now(),
763            duration_ms: 10,
764            result_count: 1,
765            memory_usage: 256,
766            predicates: vec![],
767            outcome: QueryOutcome::Success,
768            applied_limits: AppliedLimits {
769                timeout_ms: 30000,
770                result_cap: 10000,
771                memory_limit: 512 * 1024 * 1024,
772            },
773            user: "test".to_string(),
774        };
775        logger.log(entry);
776        logger.flush();
777
778        assert!(nested_path.exists());
779    }
780}