1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
//! Helpers for rendering user-supplied commands for display.
//!
//! Commands passed to `nono` (e.g. after `--`) preserve each argument as a
//! separate `String`. When we echo those commands back to the user — in the
//! `nono learn` "Run with:" hint, the dry-run banner, `nono ps` details,
//! audit/rollback listings — we want the rendered line to round-trip: a user
//! copy-pasting it into a shell must execute the exact same argv that was
//! learned or recorded.
//!
//! A naive `command.join(" ")` breaks that contract as soon as any argument
//! contains whitespace, quotes, `$`, backslashes, etc. `echo 'foo bar' baz`
//! becomes `echo foo bar baz` (three args instead of two). See issue #660.
//!
//! This module centralises shell-quoting via [`shlex::try_quote`] so all
//! display sites stay consistent.
use std::borrow::Cow;
/// Quote a single argument for POSIX shell display.
///
/// Returns the input unchanged when it is already safe to display unquoted
/// (e.g. a simple identifier like `echo`). Falls back to a single-quoted
/// form when the argument contains a NUL byte, which `shlex::try_quote`
/// rejects. NUL cannot appear in a real shell argument, so this fallback
/// is only about keeping display infallible — we still want the user to
/// see *something* if a recorded command contains corrupt data.
fn quote_arg(arg: &str) -> Cow<'_, str> {
match shlex::try_quote(arg) {
Ok(quoted) => quoted,
Err(_) => Cow::Owned(format!("'{}'", arg.replace('\'', "'\\''"))),
}
}
/// Render a command (program + args) as a single shell-quoted line suitable
/// for display or copy-paste back into a terminal.
///
/// Each element is quoted independently with [`shlex::try_quote`] and joined
/// with spaces. Empty `command` returns an empty string.
pub(crate) fn format_command_line(command: &[String]) -> String {
command
.iter()
.map(|a| quote_arg(a))
.collect::<Vec<_>>()
.join(" ")
}
/// Truncate a string to at most `max_len` characters, appending an
/// ellipsis (`...`) when truncated.
///
/// Truncation operates on Unicode scalar values (`char`s), not bytes, so it
/// will never split a multi-byte UTF-8 sequence — byte slicing arbitrary
/// strings is a panic hazard whenever the input contains non-ASCII text.
/// Grapheme-cluster width (e.g. emoji ZWJ sequences, combining marks) is
/// out of scope; callers that render to a fixed-width terminal column
/// should size `max_len` conservatively.
///
/// Edge case: when `max_len < 3` and truncation is needed, the result is
/// just `"..."` (3 chars), which technically exceeds `max_len`. This
/// preserves the prior behaviour and matches the only sensible thing to
/// do — there is no shorter way to signal truncation. Callers should pass
/// `max_len >= 3`.
pub(crate) fn truncate_chars(s: &str, max_len: usize) -> String {
// Fast path: byte length >= char count always, so if bytes fit, chars fit
// too. When bytes exceed max_len we still need to check char count —
// a short string of multi-byte chars may have fewer chars than max_len
// and must not be truncated.
if s.len() <= max_len || s.chars().count() <= max_len {
return s.to_string();
}
let keep = max_len.saturating_sub(3);
let mut truncated: String = s.chars().take(keep).collect();
truncated.push_str("...");
truncated
}
/// Render a command via [`format_command_line`] and truncate it to at most
/// `max_len` characters, appending an ellipsis (`...`) when truncated.
///
/// Thin wrapper over [`truncate_chars`]; see that function for truncation
/// semantics.
pub(crate) fn truncate_command(command: &[String], max_len: usize) -> String {
truncate_chars(&format_command_line(command), max_len)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn plain_args_unquoted() {
assert_eq!(
format_command_line(&["echo".to_string(), "hello".to_string()]),
"echo hello"
);
}
#[test]
fn args_with_spaces_are_quoted() {
let out =
format_command_line(&["echo".to_string(), "foo bar".to_string(), "baz".to_string()]);
// Must preserve "foo bar" as a single argument when re-parsed.
let reparsed = shlex::split(&out).expect("round-trips through shlex");
assert_eq!(reparsed, vec!["echo", "foo bar", "baz"]);
}
#[test]
fn args_with_single_quotes_are_quoted() {
let out = format_command_line(&["echo".to_string(), "it's".to_string()]);
let reparsed = shlex::split(&out).expect("round-trips through shlex");
assert_eq!(reparsed, vec!["echo", "it's"]);
}
#[test]
fn args_with_double_quotes_are_quoted() {
let out = format_command_line(&["echo".to_string(), "a\"b".to_string()]);
let reparsed = shlex::split(&out).expect("round-trips through shlex");
assert_eq!(reparsed, vec!["echo", "a\"b"]);
}
#[test]
fn args_with_dollar_and_backslash_are_quoted() {
let out =
format_command_line(&["echo".to_string(), "$HOME".to_string(), "a\\b".to_string()]);
let reparsed = shlex::split(&out).expect("round-trips through shlex");
assert_eq!(reparsed, vec!["echo", "$HOME", "a\\b"]);
}
#[test]
fn empty_arg_is_quoted() {
let out = format_command_line(&["echo".to_string(), String::new()]);
let reparsed = shlex::split(&out).expect("round-trips through shlex");
assert_eq!(reparsed, vec!["echo", ""]);
}
#[test]
fn empty_command_returns_empty_string() {
assert_eq!(format_command_line(&[]), "");
}
#[test]
fn issue_660_repro() {
// From issue #660: `nono learn -- echo 'foo bar' 'baz'` must not
// render as `echo foo bar baz`.
let rendered =
format_command_line(&["echo".to_string(), "foo bar".to_string(), "baz".to_string()]);
let naive = ["echo", "foo bar", "baz"].join(" ");
assert_eq!(naive, "echo foo bar baz"); // what the bug produced
assert_ne!(rendered, naive);
let reparsed = shlex::split(&rendered).expect("round-trips through shlex");
assert_eq!(reparsed, vec!["echo", "foo bar", "baz"]);
}
#[test]
fn truncate_command_short_passes_through() {
let cmd = vec!["echo".to_string(), "hello".to_string()];
assert_eq!(truncate_command(&cmd, 40), "echo hello");
}
#[test]
fn truncate_command_handles_multibyte_utf8() {
// shlex single-quotes non-ASCII args, so the rendered line is
// `'éééééé'` — each `é` is 2 bytes in UTF-8. With `max_len = 5`,
// byte slicing at index `max_len - 3 = 2` lands inside the first
// `é` (between its two bytes, which is *not* a char boundary)
// and would panic under the previous implementation.
let cmd = vec!["éééééé".to_string()];
let result = truncate_command(&cmd, 5);
// Char-aware: keep `max_len - 3 = 2` chars (`'é`), append "...".
assert_eq!(result, "'é...");
assert!(result.chars().count() <= 5);
}
#[test]
fn truncate_command_max_len_smaller_than_ellipsis() {
// `max_len.saturating_sub(3)` underflows to 0 — we should still
// produce a non-panicking result.
let cmd = vec!["echo".to_string(), "hello world".to_string()];
let result = truncate_command(&cmd, 2);
assert_eq!(result, "...");
}
#[test]
fn truncate_chars_short_passes_through() {
assert_eq!(truncate_chars("hello", 40), "hello");
}
#[test]
fn truncate_chars_handles_multibyte_utf8_at_byte_boundary() {
// Regression: `print_side_by_side_diff` calls `truncate_chars` on
// diff lines with `col_width = (120-3)/2 = 58`. Byte-slicing at
// index `max_len - 3 = 54` panics when a 4-byte UTF-8 sequence
// straddles that boundary.
//
// Build a 58-char / 61-byte line where byte 54 is inside an emoji:
// 53 ASCII bytes + 😀 (bytes 53..57) + "tail" → 58 chars, 61 bytes.
// Byte 54 is a continuation byte (0x9F) inside the emoji.
let prefix: String = "x".repeat(53);
let line = format!("{prefix}\u{1F600}tail");
assert_eq!(line.chars().count(), 58);
assert_eq!(line.len(), 61);
assert!(!line.is_char_boundary(54)); // byte 54 is mid-emoji
// max_len = 57 < 58 chars → truncation must fire.
// Old byte-slicing code slices at byte 54 (= 57-3) → panic.
// New char-aware code takes 54 chars safely.
let result = truncate_chars(&line, 57);
assert!(result.ends_with("..."));
assert!(result.chars().count() <= 57);
}
#[test]
fn truncate_chars_max_len_smaller_than_ellipsis() {
assert_eq!(truncate_chars("hello world", 2), "...");
}
#[test]
fn truncate_chars_no_spurious_truncation_of_multibyte_string() {
// Regression for the Gemini review finding: a string of 5 emojis is
// 20 bytes but only 5 chars. With max_len = 10, the byte fast-path
// (20 > 10) falls through — but the char check (5 <= 10) must catch
// it and return the string unmodified. Without the `|| chars().count()
// <= max_len` guard, this would be spuriously truncated.
let s = "\u{1F600}".repeat(5); // 5 chars, 20 bytes
assert_eq!(s.len(), 20);
assert_eq!(s.chars().count(), 5);
assert_eq!(truncate_chars(&s, 10), s); // must NOT truncate
}
}