Skip to main content

ralph_workflow/common/
utils.rs

1//! Common utility functions.
2//!
3//! This module provides utility functions for command-line interface operations:
4//! - Shell command parsing
5//! - Text truncation for display
6//! - Secret redaction for logging
7
8use std::io;
9
10use regex::Regex;
11
12/// Split a shell-like command string into argv parts.
13///
14/// Supports quotes and backslash escapes (e.g. `cmd --flag "a b"`).
15///
16/// # Example
17///
18/// ```ignore
19/// let argv = split_command("echo 'hello world'")?;
20/// assert_eq!(argv, vec!["echo", "hello world"]);
21/// ```
22///
23/// # Errors
24///
25/// Returns an error if the command string has unmatched quotes.
26pub fn split_command(cmd: &str) -> io::Result<Vec<String>> {
27    let cmd = cmd.trim();
28    if cmd.is_empty() {
29        return Ok(vec![]);
30    }
31
32    shell_words::split(cmd).map_err(|err| {
33        io::Error::new(
34            io::ErrorKind::InvalidInput,
35            format!("Failed to parse command string: {err}"),
36        )
37    })
38}
39
40static SECRET_LIKE_RE: std::sync::LazyLock<Option<Regex>> = std::sync::LazyLock::new(|| {
41    // Fixed ReDoS vulnerability by:
42    // 1. Using \b (word boundary) anchors to prevent overlapping matches
43    // 2. Making patterns more specific with exact length ranges
44    // 3. Limiting max character class repetition to 100
45    Regex::new(
46        r"(?ix)
47        \b(
48          # OpenAI API keys
49          sk-[a-z0-9]{20,100} |
50          # GitHub tokens
51          ghp_[a-z0-9]{20,100} |
52          github_pat_[a-z0-9_]{20,100} |
53          # Slack tokens
54          xox[baprs]-[a-z0-9-]{10,100} |
55          # AWS access keys
56          AKIA[0-9A-Z]{16} |
57          # AWS session tokens
58          (?:Aws)?[A-Z0-9]{40,100} |
59          # Stripe keys
60          sk_live_[a-zA-Z0-9]{24,100} |
61          sk_test_[a-zA-Z0-9]{24,100} |
62          # Firebase tokens
63          [a-zA-Z0-9_/+-]{40,100}\.firebaseio\.com |
64          [a-z0-9:_-]{40,100}@apps\.googleusercontent\.com |
65          # Generic JWT patterns
66          ey[a-zA-Z0-9_-]{1,100}\.[a-zA-Z0-9_-]{1,100}\.[a-zA-Z0-9_-]{1,100}
67        )\b
68        ",
69    )
70    .ok()
71});
72
73fn is_sensitive_key(key: &str) -> bool {
74    let key = key.trim().trim_start_matches('-').trim_start_matches('-');
75    let key = key
76        .split_once('=')
77        .or_else(|| key.split_once(':'))
78        .map_or(key, |(k, _)| k)
79        .trim()
80        .to_ascii_lowercase()
81        .replace('_', "-");
82
83    matches!(
84        key.as_str(),
85        "token"
86            | "access-token"
87            | "api-key"
88            | "apikey"
89            | "auth"
90            | "authorization"
91            | "bearer"
92            | "client-secret"
93            | "password"
94            | "pass"
95            | "passwd"
96            | "private-key"
97            | "secret"
98    )
99}
100
101fn redact_arg_value(key: &str, value: &str) -> String {
102    if is_sensitive_key(key) {
103        return "<redacted>".to_string();
104    }
105    SECRET_LIKE_RE.as_ref().map_or_else(
106        || value.to_string(),
107        |re| re.replace_all(value, "<redacted>").to_string(),
108    )
109}
110
111fn shell_quote_for_log(arg: &str) -> String {
112    if arg.is_empty() {
113        return "''".to_string();
114    }
115    if !arg
116        .chars()
117        .any(|c| c.is_whitespace() || matches!(c, '"' | '\'' | '\\'))
118    {
119        return arg.to_string();
120    }
121    let escaped = arg.replace('\'', r#"'\"'\"'"#);
122    format!("'{escaped}'")
123}
124
125/// Format argv for logs, redacting likely secrets.
126pub fn format_argv_for_log(argv: &[String]) -> String {
127    let mut out = Vec::with_capacity(argv.len());
128    let mut redact_next_value = false;
129
130    for arg in argv {
131        if redact_next_value {
132            out.push("<redacted>".to_string());
133            redact_next_value = false;
134            continue;
135        }
136        redact_next_value = false;
137
138        if let Some((k, v)) = arg.split_once('=') {
139            // Flag-style (--token=...) or env-style (GITHUB_TOKEN=...)
140            let env_key = k.to_ascii_uppercase();
141            let looks_like_secret_env = env_key.contains("TOKEN")
142                || env_key.contains("SECRET")
143                || env_key.contains("PASSWORD")
144                || env_key.contains("PASS")
145                || env_key.contains("KEY");
146            if is_sensitive_key(k) || looks_like_secret_env {
147                out.push(format!("{}=<redacted>", shell_quote_for_log(k)));
148                continue;
149            }
150            let redacted = redact_arg_value(k, v);
151            out.push(shell_quote_for_log(&format!("{k}={redacted}")));
152            continue;
153        }
154
155        if is_sensitive_key(arg) {
156            out.push(shell_quote_for_log(arg));
157            redact_next_value = true;
158            continue;
159        }
160
161        let redacted = SECRET_LIKE_RE.as_ref().map_or_else(
162            || arg.clone(),
163            |re| re.replace_all(arg, "<redacted>").to_string(),
164        );
165        out.push(shell_quote_for_log(&redacted));
166    }
167
168    out.join(" ")
169}
170
171/// Truncate text to a limit with ellipsis.
172///
173/// Uses character count rather than byte length to avoid panics on UTF-8 text.
174/// Truncates at character boundaries and appends "..." when truncation occurs.
175///
176/// # Example
177///
178/// ```ignore
179/// assert_eq!(truncate_text("hello world", 8), "hello...");
180/// assert_eq!(truncate_text("short", 10), "short");
181/// ```
182#[must_use]
183pub fn truncate_text(text: &str, limit: usize) -> String {
184    // Handle edge case where limit is too small for even "..."
185    if limit <= 3 {
186        return text.chars().take(limit).collect();
187    }
188
189    let char_count = text.chars().count();
190    if char_count <= limit {
191        text.to_string()
192    } else {
193        // Leave room for "..."
194        let truncate_at = limit.saturating_sub(3);
195        let truncated: String = text.chars().take(truncate_at).collect();
196        format!("{truncated}...")
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn test_split_command_simple() {
206        let result = split_command("echo hello").unwrap();
207        assert_eq!(result, vec!["echo", "hello"]);
208    }
209
210    #[test]
211    fn test_split_command_with_quotes() {
212        let result = split_command("echo 'hello world'").unwrap();
213        assert_eq!(result, vec!["echo", "hello world"]);
214    }
215
216    #[test]
217    fn test_split_command_empty() {
218        let result = split_command("").unwrap();
219        assert!(result.is_empty());
220    }
221
222    #[test]
223    fn test_split_command_whitespace() {
224        let result = split_command("   ").unwrap();
225        assert!(result.is_empty());
226    }
227
228    #[test]
229    fn test_truncate_text_no_truncation() {
230        assert_eq!(truncate_text("hello", 10), "hello");
231        assert_eq!(truncate_text("hello", 5), "hello");
232    }
233
234    #[test]
235    fn test_truncate_text_with_ellipsis() {
236        // "hello world" is 11 chars, limit 8 means 5 chars + "..."
237        assert_eq!(truncate_text("hello world", 8), "hello...");
238    }
239
240    #[test]
241    fn test_truncate_text_unicode() {
242        // Should not panic on UTF-8 multibyte characters
243        let text = "日本語テスト"; // 6 Japanese characters
244        assert_eq!(truncate_text(text, 10), "日本語テスト");
245        assert_eq!(truncate_text(text, 6), "日本語テスト");
246        assert_eq!(truncate_text(text, 5), "日本...");
247    }
248
249    #[test]
250    fn test_truncate_text_emoji() {
251        // Emojis can be multi-byte but should be handled correctly
252        let text = "Hello 👋 World";
253        assert_eq!(truncate_text(text, 20), "Hello 👋 World");
254        assert_eq!(truncate_text(text, 10), "Hello 👋...");
255    }
256
257    #[test]
258    fn test_truncate_text_edge_cases() {
259        assert_eq!(truncate_text("abc", 3), "abc");
260        assert_eq!(truncate_text("abcd", 3), "abc"); // limit too small for ellipsis
261        assert_eq!(truncate_text("ab", 1), "a");
262        assert_eq!(truncate_text("", 5), "");
263    }
264
265    #[test]
266    fn test_truncate_text_cjk_characters() {
267        // Each CJK character is 3 bytes in UTF-8
268        // This test ensures we truncate by character count, not byte count
269        let text = "日本語テスト"; // 6 CJK characters (18 bytes)
270                                   // limit=4 means 1 char + "..." (can't fit more)
271        assert_eq!(truncate_text(text, 4), "日...");
272        // Verify the original 6 char string fits in limit=6
273        assert_eq!(truncate_text(text, 6), "日本語テスト");
274    }
275
276    #[test]
277    fn test_truncate_text_mixed_multibyte() {
278        // Mix of single-byte ASCII and multi-byte characters
279        let text = "Hello 世界 test"; // 13 chars: "Hello " (6) + "世界" (2) + " test" (5)
280        assert_eq!(truncate_text(text, 20), "Hello 世界 test");
281        // limit=10: 7 chars + "..."
282        assert_eq!(truncate_text(text, 10), "Hello 世...");
283    }
284
285    #[test]
286    fn test_truncate_text_exact_boundary() {
287        // Truncation right at a multi-byte char boundary
288        let text = "ab日cd"; // 5 chars: 'a'(1) + 'b'(1) + '日'(3bytes, 1char) + 'c'(1) + 'd'(1)
289                             // limit=5: fits exactly 5 chars, no truncation
290        assert_eq!(truncate_text(text, 5), "ab日cd");
291        // limit=4: 1 char + "..." = "a..."
292        assert_eq!(truncate_text(text, 4), "a...");
293    }
294
295    #[test]
296    fn test_truncate_text_error_message_style() {
297        // Test style used in stderr preview (simulating 500 char limit for long content)
298        let text = "Error: ".to_string() + &"日".repeat(200);
299        let result = truncate_text(&text, 50);
300        assert!(result.ends_with("..."), "Result should end with '...'");
301        // Character count should be <= 50
302        assert!(
303            result.chars().count() <= 50,
304            "Result char count {} exceeds limit 50",
305            result.chars().count()
306        );
307    }
308
309    #[test]
310    fn test_truncate_text_4byte_emoji() {
311        // Emoji like 🎉 is 4 bytes in UTF-8 but 1 character
312        let text = "🎉🎊🎈"; // 3 emojis = 3 chars (12 bytes total)
313        assert_eq!(truncate_text(text, 3), "🎉🎊🎈"); // fits exactly in 3 chars
314        assert_eq!(truncate_text(text, 4), "🎉🎊🎈"); // 4 chars > 3 chars, no truncation
315                                                      // truncate_text uses chars not bytes, so 3 emojis = 3 chars
316                                                      // limit=5 means no truncation for 3 chars
317        assert_eq!(truncate_text(text, 5), "🎉🎊🎈");
318        // For truncation: need limit < char_count
319        // 3 chars, limit 2: can fit 0 chars + "..." (limit too small), so no ellipsis
320        assert_eq!(truncate_text(text, 2), "🎉🎊");
321    }
322
323    #[test]
324    fn test_truncate_text_combining_characters() {
325        // Test with combining characters (e.g., é as e + combining accent)
326        // Note: "é" can be 1 char (precomposed) or 2 chars (decomposed)
327        let text = "cafe\u{0301}"; // café with combining accent (5 chars including combiner)
328        let result = truncate_text(text, 10);
329        assert_eq!(result, "cafe\u{0301}"); // should fit without truncation
330    }
331}