Skip to main content

ralph_workflow/common/
utils.rs

1//! Common utility functions.
2//!
3//! This module provides utility functions for command-line interface operations:
4//! - Shell command parsing
5//! - Text truncation for display
6//! - Secret redaction for logging
7
8mod io;
9
10/// Split a shell-like command string into argv parts.
11///
12/// Supports quotes and backslash escapes (e.g. `cmd --flag "a b"`).
13///
14/// # Example
15///
16/// ```ignore
17/// let argv = split_command("echo 'hello world'")?;
18/// assert_eq!(argv, vec!["echo", "hello world"]);
19/// ```
20///
21/// # Errors
22///
23/// Returns an error if the command string has unmatched quotes.
24pub fn split_command(cmd: &str) -> std::io::Result<Vec<String>> {
25    let cmd = cmd.trim();
26    if cmd.is_empty() {
27        return Ok(vec![]);
28    }
29
30    shell_words::split(cmd).map_err(|err| {
31        std::io::Error::new(
32            std::io::ErrorKind::InvalidInput,
33            format!("Failed to parse command string: {err}"),
34        )
35    })
36}
37
38pub(crate) fn is_sensitive_key(key: &str) -> bool {
39    let key = key.trim().trim_start_matches('-').trim_start_matches('-');
40    let key = key
41        .split_once('=')
42        .or_else(|| key.split_once(':'))
43        .map_or(key, |(k, _)| k)
44        .trim()
45        .to_ascii_lowercase()
46        .replace('_', "-");
47
48    matches!(
49        key.as_str(),
50        "token"
51            | "access-token"
52            | "api-key"
53            | "apikey"
54            | "auth"
55            | "authorization"
56            | "bearer"
57            | "client-secret"
58            | "password"
59            | "pass"
60            | "passwd"
61            | "private-key"
62            | "secret"
63    )
64}
65
66fn redact_arg_value(key: &str, value: &str) -> String {
67    if is_sensitive_key(key) {
68        return "<redacted>".to_string();
69    }
70    io::secret_like_regex().map_or_else(
71        || value.to_string(),
72        |re| re.replace_all(value, "<redacted>").to_string(),
73    )
74}
75
76fn shell_quote_for_log(arg: &str) -> String {
77    if arg.is_empty() {
78        return "''".to_string();
79    }
80    if !arg
81        .chars()
82        .any(|c| c.is_whitespace() || matches!(c, '"' | '\'' | '\\'))
83    {
84        return arg.to_string();
85    }
86    let escaped = arg.replace('\'', r#"'\"'\"'"#);
87    format!("'{escaped}'")
88}
89
90/// Format argv for logs, redacting likely secrets.
91pub fn format_argv_for_log(argv: &[String]) -> String {
92    // Use indices to track state across iterations - simpler than scan for this use case
93    let indices = 0..argv.len();
94    let out: Vec<String> = indices
95        .map(|i| {
96            let arg = &argv[i];
97            // Check if previous arg was a sensitive key that should trigger redaction
98            let prev_was_sensitive = i > 0 && is_sensitive_key(&argv[i - 1]);
99
100            if prev_was_sensitive {
101                return "<redacted>".to_string();
102            }
103
104            if let Some((k, v)) = arg.split_once('=') {
105                // Flag-style (--token=...) or env-style (GITHUB_TOKEN=...)
106                let env_key = k.to_ascii_uppercase();
107                let looks_like_secret_env = env_key.contains("TOKEN")
108                    || env_key.contains("SECRET")
109                    || env_key.contains("PASSWORD")
110                    || env_key.contains("PASS")
111                    || env_key.contains("KEY");
112                if is_sensitive_key(k) || looks_like_secret_env {
113                    return format!("{}=<redacted>", shell_quote_for_log(k));
114                }
115                let redacted = redact_arg_value(k, v);
116                return shell_quote_for_log(&format!("{k}={redacted}"));
117            }
118
119            if is_sensitive_key(arg) {
120                // Return as-is, next iteration will redact
121                return arg.to_string();
122            }
123
124            let redacted = io::secret_like_regex().map_or_else(
125                || arg.clone(),
126                |re| re.replace_all(arg, "<redacted>").to_string(),
127            );
128            shell_quote_for_log(&redacted)
129        })
130        .collect();
131
132    out.join(" ")
133}
134
135/// Truncate text to a limit with ellipsis.
136///
137/// Uses character count rather than byte length to avoid panics on UTF-8 text.
138/// Truncates at character boundaries and appends "..." when truncation occurs.
139///
140/// # Example
141///
142/// ```ignore
143/// assert_eq!(truncate_text("hello world", 8), "hello...");
144/// assert_eq!(truncate_text("short", 10), "short");
145/// ```
146#[must_use]
147pub fn truncate_text(text: &str, limit: usize) -> String {
148    // Handle edge case where limit is too small for even "..."
149    if limit <= 3 {
150        return text.chars().take(limit).collect();
151    }
152
153    let char_count = text.chars().count();
154    if char_count <= limit {
155        text.to_string()
156    } else {
157        // Leave room for "..."
158        let truncate_at = limit.saturating_sub(3);
159        let truncated: String = text.chars().take(truncate_at).collect();
160        format!("{truncated}...")
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn test_split_command_simple() {
170        let result = split_command("echo hello").unwrap();
171        assert_eq!(result, vec!["echo", "hello"]);
172    }
173
174    #[test]
175    fn test_split_command_with_quotes() {
176        let result = split_command("echo 'hello world'").unwrap();
177        assert_eq!(result, vec!["echo", "hello world"]);
178    }
179
180    #[test]
181    fn test_split_command_empty() {
182        let result = split_command("").unwrap();
183        assert!(result.is_empty());
184    }
185
186    #[test]
187    fn test_split_command_whitespace() {
188        let result = split_command("   ").unwrap();
189        assert!(result.is_empty());
190    }
191
192    #[test]
193    fn test_truncate_text_no_truncation() {
194        assert_eq!(truncate_text("hello", 10), "hello");
195        assert_eq!(truncate_text("hello", 5), "hello");
196    }
197
198    #[test]
199    fn test_truncate_text_with_ellipsis() {
200        // "hello world" is 11 chars, limit 8 means 5 chars + "..."
201        assert_eq!(truncate_text("hello world", 8), "hello...");
202    }
203
204    #[test]
205    fn test_truncate_text_unicode() {
206        // Should not panic on UTF-8 multibyte characters
207        let text = "日本語テスト"; // 6 Japanese characters
208        assert_eq!(truncate_text(text, 10), "日本語テスト");
209        assert_eq!(truncate_text(text, 6), "日本語テスト");
210        assert_eq!(truncate_text(text, 5), "日本...");
211    }
212
213    #[test]
214    fn test_truncate_text_emoji() {
215        // Emojis can be multi-byte but should be handled correctly
216        let text = "Hello 👋 World";
217        assert_eq!(truncate_text(text, 20), "Hello 👋 World");
218        assert_eq!(truncate_text(text, 10), "Hello 👋...");
219    }
220
221    #[test]
222    fn test_truncate_text_edge_cases() {
223        assert_eq!(truncate_text("abc", 3), "abc");
224        assert_eq!(truncate_text("abcd", 3), "abc"); // limit too small for ellipsis
225        assert_eq!(truncate_text("ab", 1), "a");
226        assert_eq!(truncate_text("", 5), "");
227    }
228
229    #[test]
230    fn test_truncate_text_cjk_characters() {
231        // Each CJK character is 3 bytes in UTF-8
232        // This test ensures we truncate by character count, not byte count
233        let text = "日本語テスト"; // 6 CJK characters (18 bytes)
234                                   // limit=4 means 1 char + "..." (can't fit more)
235        assert_eq!(truncate_text(text, 4), "日...");
236        // Verify the original 6 char string fits in limit=6
237        assert_eq!(truncate_text(text, 6), "日本語テスト");
238    }
239
240    #[test]
241    fn test_truncate_text_mixed_multibyte() {
242        // Mix of single-byte ASCII and multi-byte characters
243        let text = "Hello 世界 test"; // 13 chars: "Hello " (6) + "世界" (2) + " test" (5)
244        assert_eq!(truncate_text(text, 20), "Hello 世界 test");
245        // limit=10: 7 chars + "..."
246        assert_eq!(truncate_text(text, 10), "Hello 世...");
247    }
248
249    #[test]
250    fn test_truncate_text_exact_boundary() {
251        // Truncation right at a multi-byte char boundary
252        let text = "ab日cd"; // 5 chars: 'a'(1) + 'b'(1) + '日'(3bytes, 1char) + 'c'(1) + 'd'(1)
253                             // limit=5: fits exactly 5 chars, no truncation
254        assert_eq!(truncate_text(text, 5), "ab日cd");
255        // limit=4: 1 char + "..." = "a..."
256        assert_eq!(truncate_text(text, 4), "a...");
257    }
258
259    #[test]
260    fn test_truncate_text_error_message_style() {
261        // Test style used in stderr preview (simulating 500 char limit for long content)
262        let text = "Error: ".to_string() + &"日".repeat(200);
263        let result = truncate_text(&text, 50);
264        assert!(result.ends_with("..."), "Result should end with '...'");
265        // Character count should be <= 50
266        assert!(
267            result.chars().count() <= 50,
268            "Result char count {} exceeds limit 50",
269            result.chars().count()
270        );
271    }
272
273    #[test]
274    fn test_truncate_text_4byte_emoji() {
275        // Emoji like 🎉 is 4 bytes in UTF-8 but 1 character
276        let text = "🎉🎊🎈"; // 3 emojis = 3 chars (12 bytes total)
277        assert_eq!(truncate_text(text, 3), "🎉🎊🎈"); // fits exactly in 3 chars
278        assert_eq!(truncate_text(text, 4), "🎉🎊🎈"); // 4 chars > 3 chars, no truncation
279                                                      // truncate_text uses chars not bytes, so 3 emojis = 3 chars
280                                                      // limit=5 means no truncation for 3 chars
281        assert_eq!(truncate_text(text, 5), "🎉🎊🎈");
282        // For truncation: need limit < char_count
283        // 3 chars, limit 2: can fit 0 chars + "..." (limit too small), so no ellipsis
284        assert_eq!(truncate_text(text, 2), "🎉🎊");
285    }
286
287    #[test]
288    fn test_truncate_text_combining_characters() {
289        // Test with combining characters (e.g., é as e + combining accent)
290        // Note: "é" can be 1 char (precomposed) or 2 chars (decomposed)
291        let text = "cafe\u{0301}"; // café with combining accent (5 chars including combiner)
292        let result = truncate_text(text, 10);
293        assert_eq!(result, "cafe\u{0301}"); // should fit without truncation
294    }
295}