1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
//! Management subcommand handlers: index, status, update.
use std::collections::HashSet;
use std::io::{self, Write};
use std::path::PathBuf;
use crate::index::Index;
use crate::path_util::path_from_bytes;
use crate::Config;
use crate::git_util::{is_safe_git_path, resolve_git_binary};
pub(super) fn cmd_index(mut config: Config, _force: bool, stats: bool, quiet: bool) -> i32 {
// Index::build always rebuilds; --force is accepted for rg/ug compat.
// --quiet suppresses library progress output; default CLI behavior is verbose.
if quiet {
config.verbose = false;
} else if !config.verbose {
// Neither --verbose nor --quiet: default to verbose for CLI users.
config.verbose = true;
}
let index = match Index::build(config) {
Ok(idx) => idx,
Err(e) => {
eprintln!("st index: {e}");
return 2;
}
};
if stats {
let s = index.stats();
let stdout = io::stdout();
let mut out = stdout.lock();
if let Err(err) = writeln!(out, "Documents: {}", s.total_documents)
.and_then(|_| writeln!(out, "Segments: {}", s.total_segments))
.and_then(|_| writeln!(out, "Grams: {}", s.total_grams))
{
return handle_output(err);
}
}
drop(index);
0
}
pub(super) fn cmd_status(config: Config, json: bool) -> i32 {
let index = match Index::open(config.clone()) {
Ok(idx) => idx,
Err(e) => {
eprintln!("st status: {e}");
return 2;
}
};
let s = index.stats();
if json {
// Use serde_json to avoid malformed output when index_dir contains
// characters that need JSON escaping (quotes, backslashes, etc.).
let obj = serde_json::json!({
"documents": s.total_documents,
"segments": s.total_segments,
"grams": s.total_grams,
"index_dir": config.index_dir.display().to_string(),
});
let stdout = io::stdout();
let mut out = stdout.lock();
if let Err(err) = writeln!(out, "{obj}") {
return handle_output(err);
}
} else {
let stdout = io::stdout();
let mut out = stdout.lock();
if let Err(err) = writeln!(out, "Index: {}", config.index_dir.display())
.and_then(|_| writeln!(out, "Documents: {}", s.total_documents))
.and_then(|_| writeln!(out, "Segments: {}", s.total_segments))
.and_then(|_| writeln!(out, "Grams: {}", s.total_grams))
{
return handle_output(err);
}
if let Some(ref commit) = s.base_commit {
if let Err(err) = writeln!(out, "Commit: {commit}") {
return handle_output(err);
}
}
}
drop(index);
0
}
pub(super) fn cmd_update(config: Config, _flush: bool, quiet: bool) -> i32 {
let index = match Index::open(config.clone()) {
Ok(idx) => idx,
Err(e) => {
eprintln!("st update: {e}");
return 2;
}
};
// Security audit (command injection): no user-controlled data is interpolated
// as shell arguments. `resolve_git_binary()` resolves the git path via PATH
// with canonicalize (see its doc comment). `canonical_root` below is
// canonicalized before passing to `git -C`. All other arguments are static
// string literals. The only injection surface would be `--repo-root`, which
// is documented as trusted input.
let git = resolve_git_binary();
// The fallback path (/usr/bin/git on Unix) may not exist; verify before spawning.
if !git.is_file() {
eprintln!(
"st update: git not found (looked for {}); install git to detect changed files",
git.display()
);
drop(index);
return 2;
}
let mut changed: HashSet<PathBuf> = HashSet::new();
// Security: canonicalize repo_root before passing it to `git -C`.
//
// `git -C <path>` changes into the given directory before running. If
// <path> points to an attacker-controlled directory (e.g. --repo-root
// sourced from an untrusted environment variable or container bind-mount),
// git will execute hooks in that directory's .git/config (core.hooksPath,
// post-checkout, etc.) with the invoking user's privileges. Canonicalize
// resolves symlinks and produces an absolute path, eliminating relative-path
// tricks and final-component symlink redirections.
//
// Note: this does not prevent a user who deliberately passes a malicious
// path as --repo-root from triggering git hooks in that directory;
// --repo-root is trusted input and must not be sourced from untrusted data
// (e.g. artifact paths from untrusted CI jobs, user-supplied config).
let canonical_root = match config.repo_root.canonicalize() {
Ok(p) => p,
Err(e) => {
eprintln!(
"st update: invalid repo root \'{}\': {e}",
config.repo_root.display()
);
return 2;
}
};
// Parse NUL-terminated git output into changed paths.
//
// Using -z / -z causes git to use NUL instead of newline as the record
// separator, which is the only safe choice: filenames on Linux/macOS can
// contain literal newline bytes. Splitting on '\n' would produce two tokens
// from such a name, treating the spurious second token as a changed path
// and yielding exit code 1 on every update, masking real errors.
let parse_nul_paths = |bytes: &[u8]| -> Vec<PathBuf> {
bytes
.split(|&b| b == 0)
.map(path_from_bytes)
.filter(|path| is_safe_git_path(path))
.collect()
};
// Detect changed files via git diff against HEAD.
// This fails on repos with no commits, which is fine -- we fall through
// to untracked file detection below.
if let Ok(diff_output) = std::process::Command::new(&git)
.arg("-C")
.arg(&canonical_root)
.args(["diff", "-z", "--name-only", "HEAD"])
.output()
{
if diff_output.status.success() {
changed.extend(parse_nul_paths(&diff_output.stdout));
}
}
// Pick up staged changes (covers initial commit scenario where HEAD
// doesn't exist yet).
if let Ok(staged_output) = std::process::Command::new(&git)
.arg("-C")
.arg(&canonical_root)
.args(["diff", "-z", "--name-only", "--cached"])
.output()
{
if staged_output.status.success() {
changed.extend(parse_nul_paths(&staged_output.stdout));
}
}
// Pick up new untracked files that git-diff doesn't report.
if let Ok(ut_output) = std::process::Command::new(&git)
.arg("-C")
.arg(&canonical_root)
.args(["ls-files", "-z", "--others", "--exclude-standard"])
.output()
{
if ut_output.status.success() {
changed.extend(parse_nul_paths(&ut_output.stdout));
}
}
if changed.is_empty() {
if !quiet {
let stdout = io::stdout();
let mut out = stdout.lock();
if let Err(err) = writeln!(out, "st: no changes detected") {
return handle_output(err);
}
}
return 0;
}
let mut count = 0;
let mut notify_errors = 0usize;
for path in &changed {
// Join with canonical_root (not config.repo_root) so symlinked
// repo roots don't produce paths outside the resolved tree.
let abs = canonical_root.join(path);
if abs.exists() {
// Canonicalize and verify the resolved path is still under
// canonical_root. A compromised git binary could emit paths
// that exploit OS-specific resolution (e.g. symlinks inside
// the repo, Windows junctions) to escape the repo boundary.
match abs.canonicalize() {
Ok(resolved) if resolved.starts_with(&canonical_root) => {
if let Err(e) = index.notify_change(&resolved) {
eprintln!("st update: {}: {e}", path.display());
notify_errors += 1;
} else {
count += 1;
}
}
Ok(resolved) => {
eprintln!(
"st update: {}: resolves outside repo root ({})",
path.display(),
resolved.display()
);
notify_errors += 1;
}
Err(e) => {
eprintln!("st update: {}: {e}", path.display());
notify_errors += 1;
}
}
} else if let Err(e) = index.notify_delete(&abs) {
eprintln!("st update: {}: {e}", path.display());
notify_errors += 1;
} else {
count += 1;
}
}
if let Err(e) = index.commit_batch() {
eprintln!("st update: commit failed: {e}");
return 2;
}
if !quiet {
let stdout = io::stdout();
let mut out = stdout.lock();
if let Err(err) = writeln!(out, "st: updated {} file(s)", count) {
return handle_output(err);
}
}
if notify_errors > 0 {
1
} else {
drop(index);
0
}
}
fn handle_output(err: io::Error) -> i32 {
if err.kind() == io::ErrorKind::BrokenPipe {
0
} else {
eprintln!("st: {err}");
2
}
}
/// Print supported file types in ripgrep-compatible format.
pub(super) fn cmd_type_list() -> i32 {
use ignore::types::TypesBuilder;
let mut builder = TypesBuilder::new();
builder.add_defaults();
let mut entries: Vec<(String, Vec<String>)> = Vec::new();
for def in builder.definitions() {
let globs: Vec<String> = def.globs().iter().map(|g| g.to_string()).collect();
entries.push((def.name().to_string(), globs));
}
entries.sort_by(|a, b| a.0.cmp(&b.0));
let stdout = io::stdout();
let mut out = stdout.lock();
for (name, globs) in &entries {
let joined = globs.join(", ");
if writeln!(out, "{name}: {joined}").is_err() {
return 0; // broken pipe
}
}
0
}