Skip to main content

ralph_workflow/common/
utils.rs

1//! Common utility functions.
2//!
3//! This module provides utility functions for command-line interface operations:
4//! - Shell command parsing
5//! - Text truncation for display
6//! - Secret redaction for logging
7
8use std::io;
9
10use regex::Regex;
11
12/// Split a shell-like command string into argv parts.
13///
14/// Supports quotes and backslash escapes (e.g. `cmd --flag "a b"`).
15///
16/// # Example
17///
18/// ```ignore
19/// let argv = split_command("echo 'hello world'")?;
20/// assert_eq!(argv, vec!["echo", "hello world"]);
21/// ```
22///
23/// # Errors
24///
25/// Returns an error if the command string has unmatched quotes.
26pub fn split_command(cmd: &str) -> io::Result<Vec<String>> {
27    let cmd = cmd.trim();
28    if cmd.is_empty() {
29        return Ok(vec![]);
30    }
31
32    shell_words::split(cmd).map_err(|err| {
33        io::Error::new(
34            io::ErrorKind::InvalidInput,
35            format!("Failed to parse command string: {err}"),
36        )
37    })
38}
39
40static SECRET_LIKE_RE: std::sync::LazyLock<Option<Regex>> = std::sync::LazyLock::new(|| {
41    // Fixed ReDoS vulnerability by:
42    // 1. Using \b (word boundary) anchors to prevent overlapping matches
43    // 2. Making patterns more specific with exact length ranges
44    // 3. Limiting max character class repetition to 100
45    Regex::new(
46        r"(?ix)
47        \b(
48          # OpenAI API keys
49          sk-[a-z0-9]{20,100} |
50          # GitHub tokens
51          ghp_[a-z0-9]{20,100} |
52          github_pat_[a-z0-9_]{20,100} |
53          # Slack tokens
54          xox[baprs]-[a-z0-9-]{10,100} |
55          # AWS access keys
56          AKIA[0-9A-Z]{16} |
57          # AWS session tokens
58          (?:Aws)?[A-Z0-9]{40,100} |
59          # Stripe keys
60          sk_live_[a-zA-Z0-9]{24,100} |
61          sk_test_[a-zA-Z0-9]{24,100} |
62          # Firebase tokens
63          [a-zA-Z0-9_/+-]{40,100}\.firebaseio\.com |
64          [a-z0-9:_-]{40,100}@apps\.googleusercontent\.com |
65          # Generic JWT patterns
66          ey[a-zA-Z0-9_-]{1,100}\.[a-zA-Z0-9_-]{1,100}\.[a-zA-Z0-9_-]{1,100}
67        )\b
68        ",
69    )
70    .ok()
71});
72
73fn is_sensitive_key(key: &str) -> bool {
74    let key = key.trim().trim_start_matches('-').trim_start_matches('-');
75    let key = key
76        .split_once('=')
77        .or_else(|| key.split_once(':'))
78        .map_or(key, |(k, _)| k)
79        .trim()
80        .to_ascii_lowercase()
81        .replace('_', "-");
82
83    matches!(
84        key.as_str(),
85        "token"
86            | "access-token"
87            | "api-key"
88            | "apikey"
89            | "auth"
90            | "authorization"
91            | "bearer"
92            | "client-secret"
93            | "password"
94            | "pass"
95            | "passwd"
96            | "private-key"
97            | "secret"
98    )
99}
100
101fn redact_arg_value(key: &str, value: &str) -> String {
102    if is_sensitive_key(key) {
103        return "<redacted>".to_string();
104    }
105    SECRET_LIKE_RE.as_ref().map_or_else(
106        || value.to_string(),
107        |re| re.replace_all(value, "<redacted>").to_string(),
108    )
109}
110
111fn shell_quote_for_log(arg: &str) -> String {
112    if arg.is_empty() {
113        return "''".to_string();
114    }
115    if !arg
116        .chars()
117        .any(|c| c.is_whitespace() || matches!(c, '"' | '\'' | '\\'))
118    {
119        return arg.to_string();
120    }
121    let escaped = arg.replace('\'', r#"'\"'\"'"#);
122    format!("'{escaped}'")
123}
124
125/// Format argv for logs, redacting likely secrets.
126pub fn format_argv_for_log(argv: &[String]) -> String {
127    let mut out = Vec::with_capacity(argv.len());
128    let mut redact_next_value = false;
129
130    for arg in argv {
131        if redact_next_value {
132            out.push("<redacted>".to_string());
133            redact_next_value = false;
134            continue;
135        }
136        redact_next_value = false;
137
138        if let Some((k, v)) = arg.split_once('=') {
139            // Flag-style (--token=...) or env-style (GITHUB_TOKEN=...)
140            let env_key = k.to_ascii_uppercase();
141            let looks_like_secret_env = env_key.contains("TOKEN")
142                || env_key.contains("SECRET")
143                || env_key.contains("PASSWORD")
144                || env_key.contains("PASS")
145                || env_key.contains("KEY");
146            if is_sensitive_key(k) || looks_like_secret_env {
147                out.push(format!("{}=<redacted>", shell_quote_for_log(k)));
148                continue;
149            }
150            let redacted = redact_arg_value(k, v);
151            out.push(shell_quote_for_log(&format!("{k}={redacted}")));
152            continue;
153        }
154
155        if is_sensitive_key(arg) {
156            out.push(shell_quote_for_log(arg));
157            redact_next_value = true;
158            continue;
159        }
160
161        let redacted = SECRET_LIKE_RE.as_ref().map_or_else(
162            || arg.clone(),
163            |re| re.replace_all(arg, "<redacted>").to_string(),
164        );
165        out.push(shell_quote_for_log(&redacted));
166    }
167
168    out.join(" ")
169}
170
171/// Truncate text to a limit with ellipsis.
172///
173/// Uses character count rather than byte length to avoid panics on UTF-8 text.
174/// Truncates at character boundaries and appends "..." when truncation occurs.
175///
176/// # Example
177///
178/// ```ignore
179/// assert_eq!(truncate_text("hello world", 8), "hello...");
180/// assert_eq!(truncate_text("short", 10), "short");
181/// ```
182pub fn truncate_text(text: &str, limit: usize) -> String {
183    // Handle edge case where limit is too small for even "..."
184    if limit <= 3 {
185        return text.chars().take(limit).collect();
186    }
187
188    let char_count = text.chars().count();
189    if char_count <= limit {
190        text.to_string()
191    } else {
192        // Leave room for "..."
193        let truncate_at = limit.saturating_sub(3);
194        let truncated: String = text.chars().take(truncate_at).collect();
195        format!("{truncated}...")
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn test_split_command_simple() {
205        let result = split_command("echo hello").unwrap();
206        assert_eq!(result, vec!["echo", "hello"]);
207    }
208
209    #[test]
210    fn test_split_command_with_quotes() {
211        let result = split_command("echo 'hello world'").unwrap();
212        assert_eq!(result, vec!["echo", "hello world"]);
213    }
214
215    #[test]
216    fn test_split_command_empty() {
217        let result = split_command("").unwrap();
218        assert!(result.is_empty());
219    }
220
221    #[test]
222    fn test_split_command_whitespace() {
223        let result = split_command("   ").unwrap();
224        assert!(result.is_empty());
225    }
226
227    #[test]
228    fn test_truncate_text_no_truncation() {
229        assert_eq!(truncate_text("hello", 10), "hello");
230        assert_eq!(truncate_text("hello", 5), "hello");
231    }
232
233    #[test]
234    fn test_truncate_text_with_ellipsis() {
235        // "hello world" is 11 chars, limit 8 means 5 chars + "..."
236        assert_eq!(truncate_text("hello world", 8), "hello...");
237    }
238
239    #[test]
240    fn test_truncate_text_unicode() {
241        // Should not panic on UTF-8 multibyte characters
242        let text = "日本語テスト"; // 6 Japanese characters
243        assert_eq!(truncate_text(text, 10), "日本語テスト");
244        assert_eq!(truncate_text(text, 6), "日本語テスト");
245        assert_eq!(truncate_text(text, 5), "日本...");
246    }
247
248    #[test]
249    fn test_truncate_text_emoji() {
250        // Emojis can be multi-byte but should be handled correctly
251        let text = "Hello 👋 World";
252        assert_eq!(truncate_text(text, 20), "Hello 👋 World");
253        assert_eq!(truncate_text(text, 10), "Hello 👋...");
254    }
255
256    #[test]
257    fn test_truncate_text_edge_cases() {
258        assert_eq!(truncate_text("abc", 3), "abc");
259        assert_eq!(truncate_text("abcd", 3), "abc"); // limit too small for ellipsis
260        assert_eq!(truncate_text("ab", 1), "a");
261        assert_eq!(truncate_text("", 5), "");
262    }
263}