oxify_connect_vision/
logging.rs

1//! Enhanced logging module for structured and secure logging.
2//!
3//! This module provides:
4//! - Structured logging with contextual metadata
5//! - Log sampling for high-volume operations
6//! - Sensitive data redaction
7//! - Log level filtering
8//! - Performance metrics logging
9
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::Arc;
14use tracing::{debug, error, info, trace, warn};
15
16/// Log level enumeration
17#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
18pub enum LogLevel {
19    /// Trace level (most verbose)
20    Trace,
21    /// Debug level
22    Debug,
23    /// Info level
24    Info,
25    /// Warning level
26    Warn,
27    /// Error level
28    Error,
29}
30
31impl LogLevel {
32    /// Check if this level should be logged given a minimum level
33    pub fn should_log(&self, min_level: LogLevel) -> bool {
34        *self >= min_level
35    }
36}
37
38impl std::fmt::Display for LogLevel {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            LogLevel::Trace => write!(f, "TRACE"),
42            LogLevel::Debug => write!(f, "DEBUG"),
43            LogLevel::Info => write!(f, "INFO"),
44            LogLevel::Warn => write!(f, "WARN"),
45            LogLevel::Error => write!(f, "ERROR"),
46        }
47    }
48}
49
50/// Structured log entry
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct LogEntry {
53    /// Timestamp (Unix timestamp in milliseconds)
54    pub timestamp: u64,
55
56    /// Log level
57    pub level: LogLevel,
58
59    /// Log message
60    pub message: String,
61
62    /// Structured fields
63    pub fields: HashMap<String, String>,
64
65    /// Optional request ID for correlation
66    pub request_id: Option<String>,
67
68    /// Optional user ID
69    pub user_id: Option<String>,
70}
71
72impl LogEntry {
73    /// Create a new log entry
74    pub fn new(level: LogLevel, message: impl Into<String>) -> Self {
75        Self {
76            timestamp: current_timestamp_ms(),
77            level,
78            message: message.into(),
79            fields: HashMap::new(),
80            request_id: None,
81            user_id: None,
82        }
83    }
84
85    /// Add a field to the log entry
86    pub fn with_field(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
87        self.fields.insert(key.into(), value.into());
88        self
89    }
90
91    /// Set request ID
92    pub fn with_request_id(mut self, request_id: impl Into<String>) -> Self {
93        self.request_id = Some(request_id.into());
94        self
95    }
96
97    /// Set user ID
98    pub fn with_user_id(mut self, user_id: impl Into<String>) -> Self {
99        self.user_id = Some(user_id.into());
100        self
101    }
102
103    /// Convert to JSON string
104    pub fn to_json(&self) -> String {
105        serde_json::to_string(self)
106            .unwrap_or_else(|_| "{\"error\":\"failed to serialize log entry\"}".to_string())
107    }
108}
109
110/// Get current timestamp in milliseconds
111fn current_timestamp_ms() -> u64 {
112    std::time::SystemTime::now()
113        .duration_since(std::time::UNIX_EPOCH)
114        .unwrap()
115        .as_millis() as u64
116}
117
118/// Log sampling configuration
119#[derive(Debug, Clone)]
120pub struct SamplingConfig {
121    /// Sample rate (0.0 to 1.0)
122    pub rate: f64,
123
124    /// Always sample errors
125    pub always_sample_errors: bool,
126
127    /// Maximum samples per second
128    pub max_samples_per_second: Option<u64>,
129}
130
131impl Default for SamplingConfig {
132    fn default() -> Self {
133        Self {
134            rate: 1.0, // Sample everything by default
135            always_sample_errors: true,
136            max_samples_per_second: None,
137        }
138    }
139}
140
141impl SamplingConfig {
142    /// Create a sampling config with a specific rate
143    pub fn with_rate(rate: f64) -> Self {
144        Self {
145            rate: rate.clamp(0.0, 1.0),
146            always_sample_errors: true,
147            max_samples_per_second: None,
148        }
149    }
150
151    /// Sample 1% of logs
152    pub fn low() -> Self {
153        Self::with_rate(0.01)
154    }
155
156    /// Sample 10% of logs
157    pub fn medium() -> Self {
158        Self::with_rate(0.10)
159    }
160
161    /// Sample 50% of logs
162    pub fn high() -> Self {
163        Self::with_rate(0.50)
164    }
165
166    /// Sample all logs
167    pub fn all() -> Self {
168        Self::with_rate(1.0)
169    }
170}
171
172/// Log sampler for rate-limiting logs
173pub struct LogSampler {
174    config: SamplingConfig,
175    sample_count: Arc<AtomicU64>,
176    last_reset: Arc<AtomicU64>,
177}
178
179impl LogSampler {
180    /// Create a new log sampler
181    pub fn new(config: SamplingConfig) -> Self {
182        Self {
183            config,
184            sample_count: Arc::new(AtomicU64::new(0)),
185            last_reset: Arc::new(AtomicU64::new(current_timestamp_ms())),
186        }
187    }
188
189    /// Check if a log should be sampled
190    pub fn should_sample(&self, level: LogLevel) -> bool {
191        // Always sample errors if configured
192        if self.config.always_sample_errors && level >= LogLevel::Error {
193            return true;
194        }
195
196        // Check rate-based sampling
197        if self.config.rate >= 1.0 {
198            return true;
199        }
200
201        if self.config.rate <= 0.0 {
202            return false;
203        }
204
205        // Check max samples per second
206        if let Some(max_samples) = self.config.max_samples_per_second {
207            let now = current_timestamp_ms();
208            let last = self.last_reset.load(Ordering::Relaxed);
209
210            // Reset counter every second
211            if now - last >= 1000 {
212                self.sample_count.store(0, Ordering::Relaxed);
213                self.last_reset.store(now, Ordering::Relaxed);
214            }
215
216            let count = self.sample_count.fetch_add(1, Ordering::Relaxed);
217            if count >= max_samples {
218                return false;
219            }
220        }
221
222        // Random sampling based on rate
223        use std::collections::hash_map::RandomState;
224        use std::hash::BuildHasher;
225
226        let hash = RandomState::new().hash_one(current_timestamp_ms());
227
228        (hash as f64 / u64::MAX as f64) < self.config.rate
229    }
230
231    /// Get sample statistics
232    pub fn stats(&self) -> SamplingStats {
233        SamplingStats {
234            sample_count: self.sample_count.load(Ordering::Relaxed),
235            sample_rate: self.config.rate,
236        }
237    }
238}
239
240/// Sampling statistics
241#[derive(Debug, Clone)]
242pub struct SamplingStats {
243    pub sample_count: u64,
244    pub sample_rate: f64,
245}
246
247/// Patterns for sensitive data to redact
248#[allow(dead_code)]
249static SENSITIVE_PATTERNS: &[(&str, &str)] = &[
250    // Credit card patterns
251    (r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b", "[CARD]"),
252    // Email addresses
253    (
254        r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
255        "[EMAIL]",
256    ),
257    // Phone numbers
258    (r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b", "[PHONE]"),
259    // Social security numbers
260    (r"\b\d{3}-\d{2}-\d{4}\b", "[SSN]"),
261    // IP addresses
262    (r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b", "[IP]"),
263    // API keys (common patterns)
264    (r"\b[A-Za-z0-9]{32,}\b", "[KEY]"),
265];
266
267/// Redact sensitive data from a string
268pub fn redact_sensitive_data(input: &str) -> String {
269    // Simple pattern matching (in production, use regex crate)
270    // For now, we'll do basic replacements
271
272    let words: Vec<&str> = input.split_whitespace().collect();
273    let mut redacted_words = Vec::new();
274
275    for word in words {
276        if word.contains('@') && word.contains('.') {
277            // Looks like an email
278            redacted_words.push("[EMAIL]");
279        } else if word.len() >= 32 && word.chars().all(|c| c.is_alphanumeric()) {
280            // Looks like an API key
281            redacted_words.push("[KEY]");
282        } else {
283            redacted_words.push(word);
284        }
285    }
286
287    redacted_words.join(" ")
288}
289
290/// Structured logger with enhanced features
291pub struct StructuredLogger {
292    min_level: LogLevel,
293    sampler: Option<LogSampler>,
294    redact_sensitive: bool,
295    default_fields: HashMap<String, String>,
296}
297
298impl StructuredLogger {
299    /// Create a new structured logger
300    pub fn new(min_level: LogLevel) -> Self {
301        Self {
302            min_level,
303            sampler: None,
304            redact_sensitive: true,
305            default_fields: HashMap::new(),
306        }
307    }
308
309    /// Enable log sampling
310    pub fn with_sampling(mut self, config: SamplingConfig) -> Self {
311        self.sampler = Some(LogSampler::new(config));
312        self
313    }
314
315    /// Enable/disable sensitive data redaction
316    pub fn with_redaction(mut self, enabled: bool) -> Self {
317        self.redact_sensitive = enabled;
318        self
319    }
320
321    /// Add a default field to all log entries
322    pub fn with_default_field(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
323        self.default_fields.insert(key.into(), value.into());
324        self
325    }
326
327    /// Log a message
328    pub fn log(&self, mut entry: LogEntry) {
329        // Check log level
330        if !entry.level.should_log(self.min_level) {
331            return;
332        }
333
334        // Check sampling
335        if let Some(sampler) = &self.sampler {
336            if !sampler.should_sample(entry.level) {
337                return;
338            }
339        }
340
341        // Redact sensitive data
342        if self.redact_sensitive {
343            entry.message = redact_sensitive_data(&entry.message);
344        }
345
346        // Add default fields
347        for (key, value) in &self.default_fields {
348            entry
349                .fields
350                .entry(key.clone())
351                .or_insert_with(|| value.clone());
352        }
353
354        // Log using tracing
355        let json = entry.to_json();
356        match entry.level {
357            LogLevel::Trace => trace!("{}", json),
358            LogLevel::Debug => debug!("{}", json),
359            LogLevel::Info => info!("{}", json),
360            LogLevel::Warn => warn!("{}", json),
361            LogLevel::Error => error!("{}", json),
362        }
363    }
364
365    /// Log at trace level
366    pub fn trace(&self, message: impl Into<String>) {
367        self.log(LogEntry::new(LogLevel::Trace, message));
368    }
369
370    /// Log at debug level
371    pub fn debug(&self, message: impl Into<String>) {
372        self.log(LogEntry::new(LogLevel::Debug, message));
373    }
374
375    /// Log at info level
376    pub fn info(&self, message: impl Into<String>) {
377        self.log(LogEntry::new(LogLevel::Info, message));
378    }
379
380    /// Log at warn level
381    pub fn warn(&self, message: impl Into<String>) {
382        self.log(LogEntry::new(LogLevel::Warn, message));
383    }
384
385    /// Log at error level
386    pub fn error(&self, message: impl Into<String>) {
387        self.log(LogEntry::new(LogLevel::Error, message));
388    }
389}
390
391impl Default for StructuredLogger {
392    fn default() -> Self {
393        Self::new(LogLevel::Info)
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn test_log_level_ordering() {
403        assert!(LogLevel::Error > LogLevel::Warn);
404        assert!(LogLevel::Warn > LogLevel::Info);
405        assert!(LogLevel::Info > LogLevel::Debug);
406        assert!(LogLevel::Debug > LogLevel::Trace);
407    }
408
409    #[test]
410    fn test_log_level_should_log() {
411        assert!(LogLevel::Error.should_log(LogLevel::Info));
412        assert!(LogLevel::Warn.should_log(LogLevel::Info));
413        assert!(!LogLevel::Debug.should_log(LogLevel::Info));
414        assert!(!LogLevel::Trace.should_log(LogLevel::Info));
415    }
416
417    #[test]
418    fn test_log_entry_creation() {
419        let entry = LogEntry::new(LogLevel::Info, "test message");
420        assert_eq!(entry.level, LogLevel::Info);
421        assert_eq!(entry.message, "test message");
422        assert!(entry.fields.is_empty());
423    }
424
425    #[test]
426    fn test_log_entry_with_fields() {
427        let entry = LogEntry::new(LogLevel::Info, "test")
428            .with_field("key1", "value1")
429            .with_field("key2", "value2");
430
431        assert_eq!(entry.fields.get("key1"), Some(&"value1".to_string()));
432        assert_eq!(entry.fields.get("key2"), Some(&"value2".to_string()));
433    }
434
435    #[test]
436    fn test_log_entry_with_request_id() {
437        let entry = LogEntry::new(LogLevel::Info, "test").with_request_id("req-123");
438
439        assert_eq!(entry.request_id, Some("req-123".to_string()));
440    }
441
442    #[test]
443    fn test_log_entry_to_json() {
444        let entry = LogEntry::new(LogLevel::Info, "test").with_field("key", "value");
445
446        let json = entry.to_json();
447        assert!(json.contains("\"message\":\"test\""));
448        assert!(json.contains("\"level\":\"Info\""));
449    }
450
451    #[test]
452    fn test_sampling_config_default() {
453        let config = SamplingConfig::default();
454        assert_eq!(config.rate, 1.0);
455        assert!(config.always_sample_errors);
456    }
457
458    #[test]
459    fn test_sampling_config_presets() {
460        assert_eq!(SamplingConfig::low().rate, 0.01);
461        assert_eq!(SamplingConfig::medium().rate, 0.10);
462        assert_eq!(SamplingConfig::high().rate, 0.50);
463        assert_eq!(SamplingConfig::all().rate, 1.0);
464    }
465
466    #[test]
467    fn test_log_sampler_always_sample_errors() {
468        let config = SamplingConfig::with_rate(0.0); // Never sample
469        let sampler = LogSampler::new(config);
470
471        assert!(sampler.should_sample(LogLevel::Error));
472    }
473
474    #[test]
475    fn test_log_sampler_rate_zero() {
476        let mut config = SamplingConfig::with_rate(0.0);
477        config.always_sample_errors = false;
478        let sampler = LogSampler::new(config);
479
480        assert!(!sampler.should_sample(LogLevel::Info));
481    }
482
483    #[test]
484    fn test_log_sampler_rate_one() {
485        let config = SamplingConfig::with_rate(1.0);
486        let sampler = LogSampler::new(config);
487
488        assert!(sampler.should_sample(LogLevel::Info));
489        assert!(sampler.should_sample(LogLevel::Debug));
490    }
491
492    #[test]
493    fn test_redact_sensitive_data_email() {
494        let input = "Contact me at user@example.com for details";
495        let redacted = redact_sensitive_data(input);
496        assert!(redacted.contains("[EMAIL]"));
497        assert!(!redacted.contains("user@example.com"));
498    }
499
500    #[test]
501    fn test_redact_sensitive_data_api_key() {
502        let input = "API key: abcdef1234567890abcdef1234567890abcdef12";
503        let redacted = redact_sensitive_data(input);
504        assert!(redacted.contains("[KEY]"));
505    }
506
507    #[test]
508    fn test_redact_sensitive_data_no_sensitive() {
509        let input = "This is a normal message";
510        let redacted = redact_sensitive_data(input);
511        assert_eq!(redacted, input);
512    }
513
514    #[test]
515    fn test_structured_logger_creation() {
516        let logger = StructuredLogger::new(LogLevel::Info);
517        assert_eq!(logger.min_level, LogLevel::Info);
518        assert!(logger.redact_sensitive);
519    }
520
521    #[test]
522    fn test_structured_logger_with_sampling() {
523        let logger = StructuredLogger::new(LogLevel::Info).with_sampling(SamplingConfig::low());
524        assert!(logger.sampler.is_some());
525    }
526
527    #[test]
528    fn test_structured_logger_with_redaction() {
529        let logger = StructuredLogger::new(LogLevel::Info).with_redaction(false);
530        assert!(!logger.redact_sensitive);
531    }
532
533    #[test]
534    fn test_structured_logger_with_default_field() {
535        let logger = StructuredLogger::new(LogLevel::Info).with_default_field("service", "ocr");
536        assert_eq!(
537            logger.default_fields.get("service"),
538            Some(&"ocr".to_string())
539        );
540    }
541
542    #[test]
543    fn test_sampling_stats() {
544        let sampler = LogSampler::new(SamplingConfig::high());
545        let stats = sampler.stats();
546        assert_eq!(stats.sample_rate, 0.50);
547    }
548
549    #[test]
550    fn test_log_level_display() {
551        assert_eq!(format!("{}", LogLevel::Trace), "TRACE");
552        assert_eq!(format!("{}", LogLevel::Debug), "DEBUG");
553        assert_eq!(format!("{}", LogLevel::Info), "INFO");
554        assert_eq!(format!("{}", LogLevel::Warn), "WARN");
555        assert_eq!(format!("{}", LogLevel::Error), "ERROR");
556    }
557}