kindly_guard_server/scanner/mod.rs
1// Copyright 2025 Kindly Software Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//! Security scanner module for threat detection
15//!
16//! This module provides comprehensive security scanning capabilities for detecting
17//! various types of threats in text and JSON inputs. It combines multiple specialized
18//! scanners to provide defense-in-depth protection.
19//!
20//! # Architecture
21//!
22//! The scanner module follows a modular architecture with specialized sub-scanners:
23//!
24//! - **Unicode Scanner** (`unicode`): Detects Unicode-based attacks including:
25//! - Invisible characters (zero-width spaces, joiners)
26//! - BiDi override attacks for text spoofing
27//! - Homograph attacks using similar-looking characters
28//! - Dangerous control characters
29//!
30//! - **Injection Scanner** (`injection`): Detects various injection attacks:
31//! - SQL injection patterns
32//! - Command injection attempts
33//! - Prompt injection for LLMs
34//! - Path traversal attempts
35//! - LDAP, XML, and NoSQL injections
36//!
37//! - **XSS Scanner** (`xss_scanner`): Detects cross-site scripting:
38//! - Script tags and event handlers
39//! - JavaScript URLs and data URIs
40//! - HTML entity encoding bypasses
41//!
42//! - **Pattern Scanner** (`patterns`): Customizable threat patterns:
43//! - MCP-specific threats (session IDs, tokens)
44//! - Tool poisoning attempts
45//! - Custom patterns from configuration
46//!
47//! - **Crypto Scanner** (`crypto`): Detects cryptographic security issues:
48//! - Deprecated algorithms (MD5, SHA1, DES)
49//! - Insecure random number generation
50//! - Weak key sizes for 2025 standards
51//! - Insecure encryption modes (ECB)
52//! - Bad key derivation practices
53//!
54//! # Configuration
55//!
56//! The scanner behavior is controlled through `ScannerConfig`:
57//!
58//! ```toml
59//! [scanner]
60//! # Enable/disable specific threat detection types
61//! unicode_detection = true # Detect Unicode-based attacks
62//! injection_detection = true # Detect injection attempts
63//! path_traversal_detection = true # Detect directory traversal
64//! xss_detection = true # Detect XSS patterns
65//! crypto_detection = true # Detect weak crypto patterns
66//!
67//! # Performance and limits
68//! max_scan_depth = 20 # Max recursion for JSON scanning
69//! enhanced_mode = false # Enable advanced detection algorithms
70//! enable_event_buffer = false # Enable event correlation (requires enhanced feature)
71//!
72//! # Custom patterns
73//! custom_patterns = "/etc/kindly-guard/patterns.toml" # Optional custom patterns file
74//! ```
75//!
76//! # Security Principles
77//!
78//! 1. **Defense in Depth**: Multiple scanners provide overlapping protection
79//! 2. **Type Safety**: All threats are represented as typed enums, not strings
80//! 3. **Fail Safe**: Errors in one scanner don't affect others
81//! 4. **Performance**: Zero-copy scanning where possible, SIMD optimizations available
82//! 5. **Extensibility**: Plugin system allows custom threat detection
83//!
84//! # Usage Example
85//!
86//! ```no_run
87//! use kindly_guard_server::config::ScannerConfig;
88//! use kindly_guard_server::scanner::{SecurityScanner, Severity};
89//!
90//! // Configure scanner
91//! let config = ScannerConfig {
92//! unicode_detection: true,
93//! injection_detection: true,
94//! xss_detection: Some(true),
95//! max_scan_depth: 20,
96//! ..Default::default()
97//! };
98//!
99//! // Create scanner instance
100//! let scanner = SecurityScanner::new(config)?;
101//!
102//! // Scan text input
103//! let threats = scanner.scan_text("SELECT * FROM users WHERE id = '1' OR '1'='1'")?;
104//!
105//! // Handle detected threats
106//! for threat in threats {
107//! if threat.severity >= Severity::High {
108//! // Block the request
109//! return Err("Security threat detected");
110//! }
111//! }
112//! # Ok::<(), Box<dyn std::error::Error>>(())
113//! ```
114
115use serde::{Deserialize, Serialize};
116use std::fmt;
117use std::sync::Arc;
118use thiserror::Error;
119
120pub mod crypto;
121pub mod injection;
122pub mod patterns;
123pub mod sync_wrapper;
124pub mod unicode;
125pub mod xss_scanner;
126
127pub use crypto::CryptoScanner;
128pub use injection::InjectionScanner;
129pub use patterns::ThreatPatterns;
130pub use unicode::UnicodeScanner;
131pub use xss_scanner::{create_xss_scanner, XssScanner};
132
133/// Main security scanner combining all threat detection
134///
135/// The `SecurityScanner` is the central component for detecting security threats in text and JSON inputs.
136/// It combines multiple specialized scanners (Unicode, Injection, XSS, Crypto) and supports plugin-based extensions.
137///
138/// # Architecture
139/// - **Unicode Scanner**: Detects invisible characters, BiDi spoofing, homograph attacks
140/// - **Injection Scanner**: Detects SQL, command, prompt, and other injection attempts
141/// - **XSS Scanner**: Detects cross-site scripting patterns
142/// - **Crypto Scanner**: Detects weak cryptographic patterns and insecure implementations
143/// - **Plugin System**: Allows custom threat detection via external plugins
144///
145/// # Security Considerations
146/// - All scanners run with configurable depth limits to prevent DoS attacks
147/// - Pattern matching uses size-limited regex to prevent ReDoS attacks
148/// - Results are type-safe using enums, never raw strings for security decisions
149/// - Enhanced mode provides additional correlation and pattern analysis
150///
151/// # Performance
152/// - Scanners use zero-copy operations where possible
153/// - Text is scanned in a single pass per scanner
154/// - JSON scanning uses recursive descent with depth limiting
155/// - Enhanced mode may use SIMD optimizations when available
156pub struct SecurityScanner {
157 unicode_scanner: UnicodeScanner,
158 injection_scanner: InjectionScanner,
159 xss_scanner: Arc<dyn XssScanner>,
160 crypto_scanner: CryptoScanner,
161 pub patterns: ThreatPatterns,
162 config: crate::config::ScannerConfig,
163 plugin_manager: Option<Arc<dyn crate::plugins::PluginManagerTrait>>,
164 #[allow(dead_code)]
165 #[cfg(feature = "enhanced")]
166 event_processor: Option<Arc<dyn crate::traits::SecurityEventProcessor>>,
167}
168
169/// Represents a detected security threat
170#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
171pub struct Threat {
172 pub threat_type: ThreatType,
173 pub severity: Severity,
174 pub location: Location,
175 pub description: String,
176 pub remediation: Option<String>,
177}
178
179/// Types of security threats that can be detected
180///
181/// This enum categorizes different attack vectors and malicious patterns
182/// that the security scanner can identify. Each variant represents a specific
183/// threat type with unique characteristics and security implications.
184#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
185#[serde(rename_all = "snake_case")]
186pub enum ThreatType {
187 // Unicode threats
188 /// Invisible Unicode characters used to hide malicious content
189 ///
190 /// **Detection**: Zero-width spaces, joiners, and other invisible characters
191 /// **Security Impact**: Can hide malicious code or bypass filters
192 /// **Common Vectors**: Comments, usernames, file names
193 /// **Recommended Response**: Remove or escape invisible characters
194 UnicodeInvisible,
195
196 /// Bidirectional (BiDi) text spoofing attacks
197 ///
198 /// **Detection**: Right-to-left override (U+202E) and related BiDi control characters
199 /// **Security Impact**: Can reverse text display to mislead users (e.g., "txt.exe" appears as "exe.txt")
200 /// **Common Vectors**: File names, URLs, email addresses
201 /// **Recommended Response**: Block or escape BiDi control characters
202 UnicodeBiDi,
203
204 /// Homograph attacks using similar-looking Unicode characters
205 ///
206 /// **Detection**: Characters that visually resemble ASCII but have different code points
207 /// **Security Impact**: Phishing attacks, domain spoofing (e.g., "аpple.com" with Cyrillic 'а')
208 /// **Common Vectors**: URLs, domain names, usernames
209 /// **Recommended Response**: Convert to ASCII or warn users about mixed scripts
210 UnicodeHomograph,
211
212 /// Dangerous Unicode control characters
213 ///
214 /// **Detection**: Format control, line/paragraph separators, other control characters
215 /// **Security Impact**: Can break parsing, cause unexpected behavior, or bypass validation
216 /// **Common Vectors**: Any text input, configuration files
217 /// **Recommended Response**: Strip or escape control characters
218 UnicodeControl,
219
220 // Injection threats
221 /// Prompt injection attacks against LLMs and AI systems
222 ///
223 /// **Detection**: Instructions attempting to override system prompts or context
224 /// **Security Impact**: Can manipulate AI behavior, extract training data, or bypass restrictions
225 /// **Common Vectors**: Chat interfaces, AI-powered features, automated responses
226 /// **Recommended Response**: Wrap user input in safety context, escape control sequences
227 PromptInjection,
228
229 /// Command injection attacks targeting system shells
230 ///
231 /// **Detection**: Shell metacharacters, command separators, backticks
232 /// **Security Impact**: Remote code execution, system compromise
233 /// **Common Vectors**: System calls, file operations, process spawning
234 /// **Recommended Response**: Use parameterized commands, escape shell metacharacters
235 CommandInjection,
236
237 /// Path traversal attacks attempting directory traversal
238 ///
239 /// **Detection**: "../", "..\\", absolute paths, null bytes in paths
240 /// **Security Impact**: Unauthorized file access, information disclosure
241 /// **Common Vectors**: File uploads, include statements, template paths
242 /// **Recommended Response**: Normalize paths, validate against whitelist
243 PathTraversal,
244
245 /// SQL injection attacks against databases
246 ///
247 /// **Detection**: SQL keywords with quotes, UNION statements, comment sequences
248 /// **Security Impact**: Data breach, data manipulation, authentication bypass
249 /// **Common Vectors**: Search fields, login forms, URL parameters
250 /// **Recommended Response**: Use parameterized queries, escape special characters
251 SqlInjection,
252
253 /// Cross-site scripting (XSS) attacks
254 ///
255 /// **Detection**: JavaScript code, event handlers, script tags
256 /// **Security Impact**: Session hijacking, defacement, malware distribution
257 /// **Common Vectors**: User comments, profile fields, search results
258 /// **Recommended Response**: HTML encode output, use Content Security Policy
259 CrossSiteScripting,
260
261 /// LDAP injection attacks against directory services
262 ///
263 /// **Detection**: LDAP filter metacharacters, DN manipulation attempts
264 /// **Security Impact**: Authentication bypass, information disclosure
265 /// **Common Vectors**: Login systems, user lookups, group membership checks
266 /// **Recommended Response**: Escape LDAP metacharacters, use parameterized filters
267 LdapInjection,
268
269 /// XML injection and XXE (XML External Entity) attacks
270 ///
271 /// **Detection**: DTD declarations, ENTITY definitions, SYSTEM keywords
272 /// **Security Impact**: File disclosure, SSRF, denial of service
273 /// **Common Vectors**: XML APIs, SOAP services, configuration files
274 /// **Recommended Response**: Disable external entities, use safe XML parsers
275 XmlInjection,
276
277 /// NoSQL injection attacks against document databases
278 ///
279 /// **Detection**: MongoDB operators ($where, $ne), JavaScript in queries
280 /// **Security Impact**: Data breach, authentication bypass, denial of service
281 /// **Common Vectors**: REST APIs, search interfaces, user profiles
282 /// **Recommended Response**: Validate input types, avoid string concatenation in queries
283 NoSqlInjection,
284
285 // MCP-specific threats
286 /// Exposure of MCP session identifiers
287 ///
288 /// **Detection**: Session IDs in logs, URLs, or error messages
289 /// **Security Impact**: Session hijacking, unauthorized access to MCP resources
290 /// **Common Vectors**: Debug output, error messages, URLs
291 /// **Recommended Response**: Redact session IDs, use secure session management
292 SessionIdExposure,
293
294 /// Tool poisoning attacks against MCP tools
295 ///
296 /// **Detection**: Malicious tool definitions, backdoored implementations
297 /// **Security Impact**: Compromised tool execution, data exfiltration
298 /// **Common Vectors**: Tool repositories, dynamic tool loading
299 /// **Recommended Response**: Verify tool signatures, use tool allowlists
300 ToolPoisoning,
301
302 /// Token theft attempts targeting authentication tokens
303 ///
304 /// **Detection**: Token patterns in unexpected locations, extraction attempts
305 /// **Security Impact**: Account takeover, unauthorized API access
306 /// **Common Vectors**: Log files, error messages, client-side storage
307 /// **Recommended Response**: Implement token rotation, use secure storage
308 TokenTheft,
309
310 /// Denial of Service attempt through oversized content
311 ///
312 /// **Detection**: Content exceeding configured size limits
313 /// **Security Impact**: Resource exhaustion, service unavailability
314 /// **Common Vectors**: Large file uploads, oversized API payloads
315 /// **Recommended Response**: Reject oversized content, implement rate limiting
316 DosPotential,
317
318 // Plugin-detected threats
319 /// Custom threat detected by a security plugin
320 ///
321 /// **Detection**: Varies by plugin implementation
322 /// **Security Impact**: Depends on the specific threat
323 /// **Common Vectors**: Plugin-specific
324 /// **Recommended Response**: Consult plugin documentation for remediation
325 Custom(String),
326}
327
328/// Threat severity levels
329///
330/// Indicates the potential impact and urgency of a detected threat.
331/// The ordering is important: Low < Medium < High < Critical.
332#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
333#[serde(rename_all = "lowercase")]
334pub enum Severity {
335 /// Low severity threats
336 ///
337 /// **Characteristics**: Minimal immediate risk, informational findings
338 /// **Examples**: Suspicious but likely benign patterns, deprecated practices
339 /// **Response Time**: Can be addressed during regular maintenance
340 /// **Action**: Log and monitor, fix in next update cycle
341 Low,
342
343 /// Medium severity threats
344 ///
345 /// **Characteristics**: Moderate risk, potential for escalation
346 /// **Examples**: Weak encoding, information leakage, misconfiguration
347 /// **Response Time**: Should be addressed within days
348 /// **Action**: Prioritize for next release, implement compensating controls
349 Medium,
350
351 /// High severity threats
352 ///
353 /// **Characteristics**: Significant risk, likely exploitable
354 /// **Examples**: SQL injection, XSS, authentication bypass attempts
355 /// **Response Time**: Address within hours to days
356 /// **Action**: Immediate remediation, notify security team
357 High,
358
359 /// Critical severity threats
360 ///
361 /// **Characteristics**: Severe risk, actively exploitable, system compromise
362 /// **Examples**: Remote code execution, complete authentication bypass, data breach
363 /// **Response Time**: Immediate response required
364 /// **Action**: Emergency patching, incident response activation
365 Critical,
366}
367
368/// Location of a threat in the input
369///
370/// Provides precise information about where a threat was detected,
371/// enabling accurate reporting and targeted remediation.
372#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
373pub enum Location {
374 /// Threat found in plain text content
375 ///
376 /// **Fields**:
377 /// - `offset`: Byte offset from start of text
378 /// - `length`: Length of the threat in bytes
379 ///
380 /// **Usage**: For threats in plain text, source code, or string literals
381 /// **Example**: SQL injection at offset 142, length 25
382 Text { offset: usize, length: usize },
383
384 /// Threat found in JSON structure
385 ///
386 /// **Fields**:
387 /// - `path`: JSONPath-style location (e.g., "$.user.name", "$.items[2].value")
388 ///
389 /// **Usage**: For threats in JSON documents, API payloads, configuration files
390 /// **Example**: XSS attempt at path "$.comments[0].text"
391 Json { path: String },
392
393 /// Threat found in binary data
394 ///
395 /// **Fields**:
396 /// - `offset`: Byte offset in binary stream
397 ///
398 /// **Usage**: For threats in binary protocols, encoded data, file uploads
399 /// **Example**: Malicious pattern at byte offset 0x1A4F
400 Binary { offset: usize },
401}
402
403/// Scanner errors
404#[derive(Error, Debug)]
405pub enum ScanError {
406 #[error("Maximum scan depth exceeded")]
407 MaxDepthExceeded,
408
409 #[error("Invalid input format: {0}")]
410 InvalidInput(String),
411
412 #[error("Pattern compilation failed: {0}")]
413 PatternError(String),
414
415 #[error("Runtime error: {0}")]
416 RuntimeError(String),
417}
418
419/// Result type for scanner operations
420pub type ScanResult = Result<Vec<Threat>, ScanError>;
421
422impl SecurityScanner {
423 /// Set the plugin manager for this scanner
424 ///
425 /// Enables plugin-based threat detection by attaching a plugin manager to the scanner.
426 /// Plugins can detect custom threats specific to your application or domain.
427 ///
428 /// # Arguments
429 /// * `plugin_manager` - The plugin manager that will coordinate plugin scanning
430 ///
431 /// # Plugin Security
432 /// - Plugins run in isolated contexts with limited permissions
433 /// - Plugin errors are logged but don't fail the main scan
434 /// - Each plugin has configurable timeouts to prevent DoS
435 ///
436 /// # Example
437 /// ```no_run
438 /// # use std::sync::Arc;
439 /// # use kindly_guard_server::scanner::SecurityScanner;
440 /// # use kindly_guard_server::plugins::PluginManagerTrait;
441 /// # let plugin_manager: Arc<dyn PluginManagerTrait> = todo!();
442 /// let mut scanner = SecurityScanner::new(Default::default())?;
443 /// scanner.set_plugin_manager(plugin_manager);
444 /// # Ok::<(), Box<dyn std::error::Error>>(())
445 /// ```
446 pub fn set_plugin_manager(
447 &mut self,
448 plugin_manager: Arc<dyn crate::plugins::PluginManagerTrait>,
449 ) {
450 self.plugin_manager = Some(plugin_manager);
451 }
452
453 /// Create a new security scanner with the given configuration
454 ///
455 /// Initializes all sub-scanners and loads threat patterns based on the provided configuration.
456 ///
457 /// # Arguments
458 /// * `config` - Scanner configuration controlling detection features and limits
459 ///
460 /// # Configuration Options
461 /// - `unicode_detection`: Enable/disable Unicode threat detection
462 /// - `injection_detection`: Enable/disable injection attack detection
463 /// - `xss_detection`: Enable/disable XSS detection
464 /// - `max_scan_depth`: Maximum recursion depth for JSON scanning (default: 20)
465 /// - `custom_patterns`: Optional path to custom threat pattern file
466 /// - `enhanced_mode`: Enable enhanced detection with advanced correlation
467 ///
468 /// # Returns
469 /// - `Ok(SecurityScanner)` - Configured scanner ready for threat detection
470 /// - `Err(ScanError)` - If pattern loading or scanner initialization fails
471 ///
472 /// # Errors
473 /// - `ScanError::PatternError` - If custom patterns file is invalid
474 /// - `ScanError::InvalidInput` - If scanner initialization fails
475 ///
476 /// # Security Best Practices
477 /// - Always validate the custom patterns file path if provided
478 /// - Set appropriate `max_scan_depth` to prevent stack exhaustion
479 /// - Enable all detection types unless you have specific requirements
480 ///
481 /// # Example
482 /// ```no_run
483 /// use kindly_guard_server::config::ScannerConfig;
484 /// use kindly_guard_server::scanner::SecurityScanner;
485 ///
486 /// // Basic configuration with all protections enabled
487 /// let config = ScannerConfig {
488 /// unicode_detection: true,
489 /// injection_detection: true,
490 /// xss_detection: Some(true),
491 /// max_scan_depth: 20,
492 /// custom_patterns: None,
493 /// enhanced_mode: Some(false),
494 /// enable_event_buffer: false,
495 /// crypto_detection: true,
496 /// max_content_size: 5_242_880, // 5MB
497 /// };
498 ///
499 /// let scanner = SecurityScanner::new(config)?;
500 /// # Ok::<(), Box<dyn std::error::Error>>(())
501 /// ```
502 ///
503 /// # Performance Notes
504 /// - Scanner initialization is relatively expensive due to pattern compilation
505 /// - Reuse scanner instances across multiple scans for better performance
506 /// - Enhanced mode may increase memory usage but improves detection accuracy
507 pub fn new(config: crate::config::ScannerConfig) -> Result<Self, ScanError> {
508 Self::with_processor(config, None)
509 }
510
511 /// Create a new security scanner with an optional event processor
512 ///
513 /// This method allows creation of a scanner with enhanced capabilities through
514 /// an event processor for advanced threat correlation and pattern analysis.
515 ///
516 /// # Arguments
517 /// * `config` - Scanner configuration
518 /// * `event_processor` - Optional processor for enhanced threat analysis
519 ///
520 /// # Enhanced Mode Features
521 /// When an event processor is provided and `enable_event_buffer` is true:
522 /// - Real-time threat correlation across multiple scans
523 /// - Pattern analysis for identifying attack campaigns
524 /// - Performance optimizations through event batching
525 /// - Advanced metrics and analytics
526 ///
527 /// # Implementation Note
528 /// This follows the trait-based architecture pattern where enhanced implementations
529 /// are hidden behind trait abstractions, allowing for both standard and optimized
530 /// scanning modes without exposing implementation details.
531 pub fn with_processor(
532 config: crate::config::ScannerConfig,
533 #[allow(unused_variables)] event_processor: Option<
534 Arc<dyn crate::traits::SecurityEventProcessor>,
535 >,
536 ) -> Result<Self, ScanError> {
537 let patterns = if let Some(path) = &config.custom_patterns {
538 ThreatPatterns::load_from_file(path)?
539 } else {
540 ThreatPatterns::default()
541 };
542
543 // Use provided event processor if available and enabled
544 #[cfg(feature = "enhanced")]
545 let event_processor = if config.enable_event_buffer {
546 event_processor
547 } else {
548 None
549 };
550
551 #[cfg(not(feature = "enhanced"))]
552 let _event_processor: Option<Arc<dyn crate::traits::SecurityEventProcessor>> = None;
553
554 // Create scanners with optional enhancement
555 #[cfg(feature = "enhanced")]
556 let mut unicode_scanner = UnicodeScanner::with_config(config.allow_text_control_chars);
557 #[cfg(not(feature = "enhanced"))]
558 let unicode_scanner = UnicodeScanner::with_config(config.allow_text_control_chars);
559
560 #[cfg(feature = "enhanced")]
561 let mut injection_scanner = InjectionScanner::new(&patterns)?;
562 #[cfg(not(feature = "enhanced"))]
563 let injection_scanner = InjectionScanner::new(&patterns)?;
564 let crypto_scanner = CryptoScanner::new();
565
566 // Create XSS scanner with trait-based architecture
567 let xss_scanner = create_xss_scanner(
568 patterns.xss_patterns().to_vec(),
569 config.enhanced_mode.unwrap_or(false),
570 )?;
571
572 // Enhance scanners when processor is available
573 #[cfg(feature = "enhanced")]
574 if event_processor.is_some() {
575 unicode_scanner.enable_enhancement();
576 injection_scanner.enable_enhancement();
577 tracing::debug!("Scanner optimization enabled");
578 }
579
580 Ok(Self {
581 unicode_scanner,
582 injection_scanner,
583 xss_scanner,
584 crypto_scanner,
585 patterns,
586 config,
587 plugin_manager: None, // Will be set later
588 #[cfg(feature = "enhanced")]
589 event_processor,
590 })
591 }
592
593 /// Scan text for threats
594 pub fn scan_text(&self, text: &str) -> ScanResult {
595 let mut threats = Vec::new();
596
597 // Check for oversized content to prevent DoS
598 // Use max_input_size if set, otherwise fall back to max_content_size
599 let max_size = self
600 .config
601 .max_input_size
602 .unwrap_or(self.config.max_content_size);
603 if text.len() > max_size {
604 threats.push(Threat {
605 threat_type: ThreatType::DosPotential,
606 severity: Severity::High,
607 location: Location::Text {
608 offset: 0,
609 length: text.len(),
610 },
611 description: format!(
612 "Content size ({} bytes) exceeds maximum allowed size ({} bytes)",
613 text.len(),
614 max_size
615 ),
616 remediation: Some(
617 "Reduce content size or increase max_content_size configuration".to_string(),
618 ),
619 });
620 return Ok(threats);
621 }
622
623 // For large content, use chunk-based scanning with timeout
624 const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks
625 const MAX_SCAN_TIME: std::time::Duration = std::time::Duration::from_secs(5);
626
627 // If content is large, scan in chunks with early termination
628 if text.len() > CHUNK_SIZE {
629 return self.scan_text_chunked(text, CHUNK_SIZE, MAX_SCAN_TIME);
630 }
631
632 // For smaller content, use regular scanning
633 self.scan_text_regular(text)
634 }
635
636 /// Scan large text in chunks with timeout protection
637 fn scan_text_chunked(
638 &self,
639 text: &str,
640 chunk_size: usize,
641 max_scan_time: std::time::Duration,
642 ) -> ScanResult {
643 let mut all_threats = Vec::new();
644 let scan_start = std::time::Instant::now();
645
646 // Process text in chunks
647 for (chunk_offset, chunk) in text.as_bytes().chunks(chunk_size).enumerate() {
648 // Check timeout
649 if scan_start.elapsed() > max_scan_time {
650 tracing::warn!(
651 "Scan timeout reached after {} seconds, processed {} bytes of {}",
652 max_scan_time.as_secs(),
653 chunk_offset * chunk_size,
654 text.len()
655 );
656 all_threats.push(Threat {
657 threat_type: ThreatType::DosPotential,
658 severity: Severity::Medium,
659 location: Location::Text {
660 offset: chunk_offset * chunk_size,
661 length: text.len() - (chunk_offset * chunk_size),
662 },
663 description: "Scan timeout - content too large to scan completely".to_string(),
664 remediation: Some(
665 "Consider reducing content size or increasing scan timeout".to_string(),
666 ),
667 });
668 break;
669 }
670
671 // Convert chunk back to str safely
672 let chunk_str = match std::str::from_utf8(chunk) {
673 Ok(s) => s,
674 Err(e) => {
675 // If chunk boundary splits a UTF-8 sequence, try to find a valid boundary
676 let valid_up_to = e.valid_up_to();
677 if valid_up_to == 0 {
678 continue; // Skip this chunk if we can't find any valid UTF-8
679 }
680 match std::str::from_utf8(&chunk[..valid_up_to]) {
681 Ok(s) => s,
682 Err(_) => continue, // Skip invalid chunk
683 }
684 },
685 };
686
687 // Scan this chunk
688 let chunk_threats = self.scan_text_regular(chunk_str)?;
689
690 // Adjust threat locations to account for chunk offset
691 let byte_offset = chunk_offset * chunk_size;
692 for mut threat in chunk_threats {
693 if let Location::Text { ref mut offset, .. } = threat.location {
694 *offset += byte_offset;
695 }
696 all_threats.push(threat);
697 }
698 }
699
700 Ok(all_threats)
701 }
702
703 /// Regular scan implementation (extracted from original scan_text)
704 fn scan_text_regular(&self, text: &str) -> ScanResult {
705 let mut threats = Vec::new();
706
707 // Use enhanced scanning when available
708 #[cfg(feature = "enhanced")]
709 if let Some(processor) = &self.event_processor {
710 // Process scan event for correlation
711 let event = crate::traits::SecurityEvent {
712 event_type: "scan".to_string(),
713 client_id: "scanner".to_string(),
714 timestamp: std::time::SystemTime::now()
715 .duration_since(std::time::UNIX_EPOCH)
716 .unwrap_or_default()
717 .as_secs(),
718 metadata: serde_json::json!({
719 "preview": &text[..text.len().min(100)]
720 }),
721 };
722 // Check if we're already in a runtime context
723 if let Ok(_handle) = tokio::runtime::Handle::try_current() {
724 // We're in a runtime, use std::thread::spawn to avoid blocking
725 let processor_clone = processor.clone();
726 std::thread::spawn(move || {
727 let rt = tokio::runtime::Builder::new_current_thread()
728 .enable_all()
729 .build();
730 if let Ok(rt) = rt {
731 let _ = rt.block_on(processor_clone.process_event(event));
732 }
733 })
734 .join()
735 .ok();
736 } else {
737 // Not in a runtime, create a new one
738 let rt = tokio::runtime::Builder::new_current_thread()
739 .enable_all()
740 .build();
741 if let Ok(rt) = rt {
742 let _ = rt.block_on(processor.process_event(event));
743 }
744 }
745
746 tracing::trace!("Optimized scanning active");
747 }
748
749 if self.config.unicode_detection {
750 threats.extend(self.unicode_scanner.scan_text(text)?);
751 }
752
753 if self.config.injection_detection || self.config.path_traversal_detection {
754 // Get all threats from injection scanner
755 let injection_threats = self.injection_scanner.scan_text(text)?;
756
757 // Filter based on configuration
758 for threat in injection_threats {
759 match threat.threat_type {
760 ThreatType::PathTraversal => {
761 if self.config.path_traversal_detection {
762 threats.push(threat);
763 }
764 },
765 _ => {
766 if self.config.injection_detection {
767 threats.push(threat);
768 }
769 },
770 }
771 }
772 }
773
774 // Run crypto scanner
775 if self.config.crypto_detection {
776 threats.extend(self.crypto_scanner.scan_text(text)?);
777 }
778
779 // Run XSS scanner (async scanner in sync context)
780 if self.config.xss_detection.unwrap_or(true) {
781 // Check if we're already in a runtime context
782 let xss_threats = if let Ok(_handle) = tokio::runtime::Handle::try_current() {
783 // We're in a runtime, use std::thread::spawn to run in a separate thread
784 let text_clone = text.to_string();
785 let xss_scanner = self.xss_scanner.clone();
786 std::thread::spawn(move || {
787 let rt = tokio::runtime::Builder::new_current_thread()
788 .enable_all()
789 .build()
790 .map_err(|e| {
791 ScanError::RuntimeError(format!("Failed to create runtime: {}", e))
792 })?;
793
794 rt.block_on(xss_scanner.scan_xss(&text_clone))
795 })
796 .join()
797 .map_err(|_| ScanError::RuntimeError("Thread panic".to_string()))??
798 } else {
799 // Not in a runtime, create a new one
800 let rt = tokio::runtime::Builder::new_current_thread()
801 .enable_all()
802 .build()
803 .map_err(|e| {
804 ScanError::RuntimeError(format!("Failed to create runtime: {}", e))
805 })?;
806
807 rt.block_on(self.xss_scanner.scan_xss(text))?
808 };
809 threats.extend(xss_threats);
810 }
811
812 // Run plugin scanners if available
813 if let Some(plugin_manager) = &self.plugin_manager {
814 // Note: Plugin scanning is currently only supported when called from
815 // non-async contexts (e.g., from the MCP server). The CLI uses async
816 // and cannot call plugins from within its runtime.
817 if tokio::runtime::Handle::try_current().is_err() {
818 use crate::plugins::{ScanContext, ScanOptions};
819 use tokio::runtime::Runtime;
820
821 let context = ScanContext {
822 data: text.as_bytes(),
823 content_type: Some("text/plain"),
824 client_id: "scanner",
825 metadata: &std::collections::HashMap::new(),
826 options: ScanOptions::default(),
827 };
828
829 // Create runtime for async plugin calls
830 let rt = Runtime::new().map_err(|e| ScanError::InvalidInput(e.to_string()))?;
831
832 match rt.block_on(plugin_manager.scan_all(context)) {
833 Ok(plugin_results) => {
834 for (_plugin_id, plugin_threats) in plugin_results {
835 threats.extend(plugin_threats);
836 }
837 },
838 Err(e) => {
839 tracing::warn!("Plugin scan error: {}", e);
840 },
841 }
842 } else {
843 tracing::debug!("Plugin scanning skipped in async context");
844 }
845 }
846
847 // Track threats through processor for pattern analysis
848 #[cfg(feature = "enhanced")]
849 if !threats.is_empty() {
850 if let Some(processor) = &self.event_processor {
851 for threat in &threats {
852 let event = crate::traits::SecurityEvent {
853 event_type: "threat_detected".to_string(),
854 client_id: "scanner".to_string(),
855 timestamp: std::time::SystemTime::now()
856 .duration_since(std::time::UNIX_EPOCH)
857 .unwrap_or_default()
858 .as_secs(),
859 metadata: serde_json::json!({
860 "threat_type": match &threat.threat_type {
861 ThreatType::Custom(name) => name.clone(),
862 _ => format!("{:?}", threat.threat_type),
863 },
864 "severity": format!("{:?}", threat.severity)
865 }),
866 };
867 // Check if we're already in a runtime context
868 if let Ok(_handle) = tokio::runtime::Handle::try_current() {
869 // We're in a runtime, use std::thread::spawn to avoid blocking
870 let processor_clone = processor.clone();
871 std::thread::spawn(move || {
872 let rt = tokio::runtime::Builder::new_current_thread()
873 .enable_all()
874 .build();
875 if let Ok(rt) = rt {
876 let _ = rt.block_on(processor_clone.process_event(event));
877 }
878 })
879 .join()
880 .ok();
881 } else {
882 // Not in a runtime, create a new one
883 let rt = tokio::runtime::Builder::new_current_thread()
884 .enable_all()
885 .build();
886 if let Ok(rt) = rt {
887 let _ = rt.block_on(processor.process_event(event));
888 }
889 }
890 }
891 }
892 }
893
894 Ok(threats)
895 }
896
897 /// Scan JSON value for security threats
898 ///
899 /// Recursively scans a JSON structure for threats in all string values and object keys.
900 /// This method is essential for securing API endpoints that accept JSON payloads.
901 ///
902 /// # Arguments
903 /// * `value` - The JSON value to scan (can be any valid JSON type)
904 ///
905 /// # Returns
906 /// - `Ok(Vec<Threat>)` - List of detected threats with JSON path locations
907 /// - `Err(ScanError)` - If scanning fails or depth limit is exceeded
908 ///
909 /// # Security Considerations
910 /// - **Depth Limiting**: Prevents stack exhaustion from deeply nested JSON
911 /// - **Key Scanning**: Object keys are scanned as they can contain payloads
912 /// - **Path Tracking**: Each threat includes the JSON path for precise location
913 /// - **Type Safety**: Only string values are scanned (numbers/bools are safe)
914 ///
915 /// # JSON Path Format
916 /// Threats are reported with JSON paths for easy identification:
917 /// - Root: `$`
918 /// - Object field: `$.field` or `$.parent.child`
919 /// - Array element: `$[0]` or `$.array[2]`
920 /// - Nested: `$.users[0].name`
921 ///
922 /// # Error Handling
923 /// - `ScanError::MaxDepthExceeded` - If nesting exceeds `max_scan_depth`
924 /// - `ScanError::InvalidInput` - If JSON serialization fails
925 /// - Plugin errors are logged but don't fail the scan
926 ///
927 /// # Example
928 /// ```no_run
929 /// use kindly_guard_server::scanner::{SecurityScanner, Location};
930 /// use serde_json::json;
931 ///
932 /// # let scanner = SecurityScanner::new(Default::default())?;
933 /// // Scan a JSON API request
934 /// let request = json!({
935 /// "user": {
936 /// "name": "admin' OR '1'='1",
937 /// "bio": "Hello\u{202E}World", // BiDi override
938 /// "tags": ["safe", "<script>alert(1)</script>"]
939 /// }
940 /// });
941 ///
942 /// let threats = scanner.scan_json(&request)?;
943 ///
944 /// for threat in threats {
945 /// if let Location::Json { path } = &threat.location {
946 /// eprintln!("Threat at {}: {}", path, threat.description);
947 /// // Outputs:
948 /// // Threat at $.user.name: SQL Injection
949 /// // Threat at $.user.bio: BiDi Text Spoofing
950 /// // Threat at $.user.tags[1]: Cross-Site Scripting
951 /// }
952 /// }
953 /// # Ok::<(), Box<dyn std::error::Error>>(())
954 /// ```
955 ///
956 /// # Performance and Security Trade-offs
957 /// - **Depth vs Security**: Lower `max_scan_depth` prevents DoS but may miss threats
958 /// - **Memory Usage**: Large JSON structures consume memory proportional to depth
959 /// - **Scan Time**: O(n) where n is total number of string values in JSON
960 /// - **Recommendation**: Set depth limit based on expected legitimate nesting
961 ///
962 /// # Best Practices
963 /// - Validate JSON schema before scanning for structural attacks
964 /// - Consider rate limiting based on JSON size/complexity
965 /// - Log scan results for security monitoring and pattern analysis
966 /// - Implement allowlists for known-safe patterns to reduce false positives
967 pub fn scan_json(&self, value: &serde_json::Value) -> ScanResult {
968 let mut threats = self.scan_json_recursive(value, "$", 0)?;
969
970 // Run plugin scanners if available
971 if let Some(plugin_manager) = &self.plugin_manager {
972 // Note: Plugin scanning is currently only supported when called from
973 // non-async contexts (e.g., from the MCP server). The CLI uses async
974 // and cannot call plugins from within its runtime.
975 if tokio::runtime::Handle::try_current().is_err() {
976 use crate::plugins::{ScanContext, ScanOptions};
977 use tokio::runtime::Runtime;
978
979 // Convert JSON to bytes for plugin scanning
980 let json_bytes = serde_json::to_vec(value)
981 .map_err(|e| ScanError::InvalidInput(e.to_string()))?;
982
983 let context = ScanContext {
984 data: &json_bytes,
985 content_type: Some("application/json"),
986 client_id: "scanner",
987 metadata: &std::collections::HashMap::new(),
988 options: ScanOptions::default(),
989 };
990
991 // Create runtime for async plugin calls
992 let rt = Runtime::new().map_err(|e| ScanError::InvalidInput(e.to_string()))?;
993
994 match rt.block_on(plugin_manager.scan_all(context)) {
995 Ok(plugin_results) => {
996 for (_plugin_id, plugin_threats) in plugin_results {
997 // Convert plugin threats to have JSON location
998 for mut threat in plugin_threats {
999 if matches!(threat.location, Location::Text { .. }) {
1000 threat.location = Location::Json {
1001 path: "$".to_string(),
1002 };
1003 }
1004 threats.push(threat);
1005 }
1006 }
1007 },
1008 Err(e) => {
1009 tracing::warn!("Plugin scan error: {}", e);
1010 },
1011 }
1012 } else {
1013 tracing::debug!("Plugin scanning skipped in async context");
1014 }
1015 }
1016
1017 Ok(threats)
1018 }
1019
1020 /// Recursively scan JSON values with depth tracking
1021 ///
1022 /// # Implementation Details
1023 /// - Depth is tracked to prevent stack exhaustion from malicious deeply nested JSON
1024 /// - Object keys are scanned as they can contain injection payloads
1025 /// - Arrays are indexed numerically in the path (e.g., `$[0]`, `$[1]`)
1026 /// - Only string values are scanned; numbers, booleans, and null are inherently safe
1027 ///
1028 /// # Security Note
1029 /// This method is private to ensure depth tracking is always enforced. Public API
1030 /// must use `scan_json()` which initializes depth tracking correctly.
1031 fn scan_json_recursive(
1032 &self,
1033 value: &serde_json::Value,
1034 path: &str,
1035 depth: usize,
1036 ) -> ScanResult {
1037 if depth > self.config.max_scan_depth {
1038 return Err(ScanError::MaxDepthExceeded);
1039 }
1040
1041 let mut threats = Vec::new();
1042
1043 match value {
1044 serde_json::Value::String(s) => {
1045 let text_threats = self.scan_text(s)?;
1046 for mut threat in text_threats {
1047 threat.location = Location::Json {
1048 path: path.to_string(),
1049 };
1050 threats.push(threat);
1051 }
1052 },
1053 serde_json::Value::Object(map) => {
1054 for (key, val) in map {
1055 // Check the key itself
1056 if let Ok(key_threats) = self.scan_text(key) {
1057 for mut threat in key_threats {
1058 threat.location = Location::Json {
1059 path: format!("{path}.{key}"),
1060 };
1061 threats.push(threat);
1062 }
1063 }
1064
1065 // Recursively check the value
1066 let sub_path = format!("{path}.{key}");
1067 threats.extend(self.scan_json_recursive(val, &sub_path, depth + 1)?);
1068 }
1069 },
1070 serde_json::Value::Array(arr) => {
1071 for (i, val) in arr.iter().enumerate() {
1072 let sub_path = format!("{path}[{i}]");
1073 threats.extend(self.scan_json_recursive(val, &sub_path, depth + 1)?);
1074 }
1075 },
1076 _ => {}, // Numbers, booleans, null are safe
1077 }
1078
1079 Ok(threats)
1080 }
1081
1082 /// Get scanner statistics for monitoring and analysis
1083 ///
1084 /// Returns current statistics from all enabled scanners including:
1085 /// - Total number of scans performed
1086 /// - Number of threats detected by type
1087 /// - Performance metrics when enhanced mode is enabled
1088 ///
1089 /// # Thread Safety
1090 /// Statistics are collected using atomic operations and are safe to read
1091 /// while scanning is in progress on other threads.
1092 ///
1093 /// # Example
1094 /// ```no_run
1095 /// # use kindly_guard_server::scanner::SecurityScanner;
1096 /// # let scanner = SecurityScanner::new(Default::default())?;
1097 /// let stats = scanner.stats();
1098 /// println!("Total scans: {}", stats.total_scans);
1099 /// println!("Unicode threats: {}", stats.unicode_threats_detected);
1100 /// println!("Injection threats: {}", stats.injection_threats_detected);
1101 /// # Ok::<(), Box<dyn std::error::Error>>(())
1102 /// ```
1103 pub fn stats(&self) -> ScannerStats {
1104 #[cfg(feature = "enhanced")]
1105 let mut stats = ScannerStats {
1106 unicode_threats_detected: self.unicode_scanner.threats_detected(),
1107 injection_threats_detected: self.injection_scanner.threats_detected(),
1108 total_scans: self.unicode_scanner.total_scans() + self.injection_scanner.total_scans(),
1109 };
1110
1111 #[cfg(not(feature = "enhanced"))]
1112 let stats = ScannerStats {
1113 unicode_threats_detected: self.unicode_scanner.threats_detected(),
1114 injection_threats_detected: self.injection_scanner.threats_detected(),
1115 total_scans: self.unicode_scanner.total_scans() + self.injection_scanner.total_scans(),
1116 };
1117
1118 // Enhance stats with processor metrics
1119 #[cfg(feature = "enhanced")]
1120 if let Some(processor) = &self.event_processor {
1121 let processor_stats = processor.get_stats();
1122 // Add processed events to total scans for more accurate metrics
1123 stats.total_scans += processor_stats.events_processed / 10; // Approximate scan count
1124 tracing::trace!("Analytics enhanced");
1125 }
1126
1127 stats
1128 }
1129}
1130
1131/// Scanner statistics
1132#[derive(Debug, Clone, Serialize, Deserialize)]
1133pub struct ScannerStats {
1134 pub unicode_threats_detected: u64,
1135 pub injection_threats_detected: u64,
1136 pub total_scans: u64,
1137}
1138
1139impl fmt::Display for ThreatType {
1140 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1141 match self {
1142 Self::UnicodeInvisible => write!(f, "Invisible Unicode Character"),
1143 Self::UnicodeBiDi => write!(f, "BiDi Text Spoofing"),
1144 Self::UnicodeHomograph => write!(f, "Homograph Attack"),
1145 Self::UnicodeControl => write!(f, "Dangerous Control Character"),
1146 Self::PromptInjection => write!(f, "Prompt Injection"),
1147 Self::CommandInjection => write!(f, "Command Injection"),
1148 Self::PathTraversal => write!(f, "Path Traversal"),
1149 Self::SqlInjection => write!(f, "SQL Injection"),
1150 Self::CrossSiteScripting => write!(f, "Cross-Site Scripting"),
1151 Self::LdapInjection => write!(f, "LDAP Injection"),
1152 Self::XmlInjection => write!(f, "XML Injection/XXE"),
1153 Self::NoSqlInjection => write!(f, "NoSQL Injection"),
1154 Self::SessionIdExposure => write!(f, "Session ID Exposure"),
1155 Self::ToolPoisoning => write!(f, "Tool Poisoning"),
1156 Self::TokenTheft => write!(f, "Token Theft Risk"),
1157 Self::DosPotential => write!(f, "Denial of Service Potential"),
1158 Self::Custom(name) => write!(f, "{name}"),
1159 }
1160 }
1161}
1162
1163impl fmt::Display for Severity {
1164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1165 match self {
1166 Self::Low => write!(f, "Low"),
1167 Self::Medium => write!(f, "Medium"),
1168 Self::High => write!(f, "High"),
1169 Self::Critical => write!(f, "Critical"),
1170 }
1171 }
1172}
1173
1174impl fmt::Display for Threat {
1175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1176 write!(
1177 f,
1178 "{} [{}] at {}: {}",
1179 self.threat_type, self.severity, self.location, self.description
1180 )
1181 }
1182}
1183
1184impl fmt::Display for Location {
1185 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1186 match self {
1187 Self::Text { offset, length } => write!(f, "offset {}, length {}", offset, length),
1188 Self::Json { path } => write!(f, "JSON path '{}'", path),
1189 Self::Binary { offset } => write!(f, "binary offset {}", offset),
1190 }
1191 }
1192}
1193
1194// Implement the trait for compatibility with the trait-based architecture
1195/// Create a security scanner wrapped in Arc for thread-safe usage
1196pub fn create_security_scanner(
1197 config: &crate::config::ScannerConfig,
1198) -> Arc<dyn crate::traits::SecurityScannerTrait> {
1199 match SecurityScanner::new(config.clone()) {
1200 Ok(scanner) => Arc::new(scanner),
1201 Err(e) => {
1202 tracing::error!("Failed to create security scanner: {}", e);
1203 // Try to create scanner with default config
1204 match SecurityScanner::new(Default::default()) {
1205 Ok(scanner) => {
1206 tracing::warn!("Created scanner with default configuration as fallback");
1207 Arc::new(scanner)
1208 },
1209 Err(default_err) => {
1210 tracing::error!(
1211 "FATAL: Cannot create default scanner: {}. Using no-op scanner that denies all requests",
1212 default_err
1213 );
1214 // Return a no-op scanner that denies everything for safety
1215 Arc::new(NoOpScanner::new())
1216 },
1217 }
1218 },
1219 }
1220}
1221
1222impl crate::traits::SecurityScannerTrait for SecurityScanner {
1223 fn scan_text(&self, text: &str) -> Vec<Threat> {
1224 self.scan_text(text).unwrap_or_default()
1225 }
1226
1227 fn scan_json(&self, value: &serde_json::Value) -> Vec<Threat> {
1228 self.scan_json(value).unwrap_or_default()
1229 }
1230
1231 fn scan_with_depth(&self, text: &str, _max_depth: usize) -> Vec<Threat> {
1232 // TODO: Implement depth-limited scanning
1233 self.scan_text(text).unwrap_or_default()
1234 }
1235
1236 fn get_stats(&self) -> crate::traits::ScannerStats {
1237 crate::traits::ScannerStats {
1238 texts_scanned: 0, // TODO: Track this
1239 threats_found: 0, // TODO: Track this
1240 unicode_threats: 0, // TODO: Track this
1241 injection_threats: 0, // TODO: Track this
1242 pattern_threats: 0, // TODO: Track this
1243 avg_scan_time_us: 0, // TODO: Track this
1244 }
1245 }
1246
1247 fn reset_stats(&self) {
1248 // TODO: Implement stats reset when tracking is added
1249 }
1250}
1251
1252/// No-op scanner that denies all requests for safety
1253/// Used as a last resort when scanner creation fails
1254struct NoOpScanner;
1255
1256impl NoOpScanner {
1257 fn new() -> Self {
1258 Self
1259 }
1260}
1261
1262impl crate::traits::SecurityScannerTrait for NoOpScanner {
1263 fn scan_text(&self, text: &str) -> Vec<Threat> {
1264 // Return a critical threat for any non-empty input
1265 if !text.is_empty() {
1266 vec![Threat {
1267 threat_type: ThreatType::Custom("Scanner initialization failed".to_string()),
1268 severity: Severity::Critical,
1269 location: Location::Text {
1270 offset: 0,
1271 length: text.len(),
1272 },
1273 description:
1274 "Security scanner failed to initialize. All requests denied for safety."
1275 .to_string(),
1276 remediation: Some(
1277 "Contact system administrator to fix scanner initialization".to_string(),
1278 ),
1279 }]
1280 } else {
1281 vec![]
1282 }
1283 }
1284
1285 fn scan_json(&self, value: &serde_json::Value) -> Vec<Threat> {
1286 // Return a critical threat for any non-null JSON
1287 if !value.is_null() {
1288 vec![Threat {
1289 threat_type: ThreatType::Custom("Scanner initialization failed".to_string()),
1290 severity: Severity::Critical,
1291 location: Location::Json {
1292 path: "$".to_string(),
1293 },
1294 description:
1295 "Security scanner failed to initialize. All requests denied for safety."
1296 .to_string(),
1297 remediation: Some(
1298 "Contact system administrator to fix scanner initialization".to_string(),
1299 ),
1300 }]
1301 } else {
1302 vec![]
1303 }
1304 }
1305
1306 fn scan_with_depth(&self, text: &str, _max_depth: usize) -> Vec<Threat> {
1307 // Use the same logic as scan_text
1308 self.scan_text(text)
1309 }
1310
1311 fn get_stats(&self) -> crate::traits::ScannerStats {
1312 crate::traits::ScannerStats {
1313 texts_scanned: 0,
1314 threats_found: 0,
1315 unicode_threats: 0,
1316 injection_threats: 0,
1317 pattern_threats: 0,
1318 avg_scan_time_us: 0,
1319 }
1320 }
1321
1322 fn reset_stats(&self) {
1323 // No-op scanner has no stats to reset
1324 }
1325}
1326
1327#[cfg(test)]
1328mod tests {
1329 use super::*;
1330
1331 #[test]
1332 fn test_threat_type_display() {
1333 assert_eq!(
1334 ThreatType::UnicodeInvisible.to_string(),
1335 "Invisible Unicode Character"
1336 );
1337 assert_eq!(ThreatType::PromptInjection.to_string(), "Prompt Injection");
1338 }
1339
1340 #[test]
1341 fn test_severity_ordering() {
1342 assert!(Severity::Low < Severity::Medium);
1343 assert!(Severity::Medium < Severity::High);
1344 assert!(Severity::High < Severity::Critical);
1345 }
1346}