1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
//! Secret redaction classifier for `apr login` / `apr pull` (CRUX-A-18).
//!
//! Contract: `contracts/crux-A-18-v1.yaml`.
//!
//! Pure redactor — takes any string the CLI is about to emit (stderr, log
//! line, error message) and a slice of known-secret strings, and returns
//! a string with every occurrence of every secret replaced by the literal
//! `<redacted>`. No I/O, no global state.
//!
//! The algorithm-level sub-claim we DO discharge here is exactly the
//! invariant FALSIFY-CRUX-A-18-003 demands: "apr pull / login stderr
//! does NOT contain the literal token". If every stderr write goes
//! through `redact_secrets`, and `redact_secrets` provably removes every
//! occurrence of every known secret, then the write-site invariant
//! reduces to a single call-site audit (caller must pipe through us).
//!
//! The actual wiring of every `eprintln!` through the redactor, the
//! `~/.apr/token` file-mode-0600 check, and the real HTTP 403 retry flow
//! are all discharged by separate integration harnesses (follow-up).
/// The literal token written in place of any detected secret.
/// Chosen to be short, obviously non-secret, and grep-friendly.
pub const REDACTED_MARKER: &str = "<redacted>";
/// Replace every occurrence of every secret in `input` with
/// `REDACTED_MARKER`. Iterates the secret list to a fixpoint so that
/// overlapping secrets are all caught.
///
/// - Empty secrets are ignored (would otherwise cause an infinite loop).
/// - Secrets shorter than 4 chars are also ignored as a guard-rail: we
/// refuse to redact ambient short strings that are almost certainly
/// not real tokens (e.g. `"ok"`, `"hf"`). HF tokens start with `hf_`
/// followed by at least 32 characters, so real tokens are always
/// well over this floor.
///
/// This is a pure function: same inputs → same output, no I/O.
pub fn redact_secrets(input: &str, secrets: &[&str]) -> String {
let mut out = input.to_string();
for secret in secrets {
if secret.len() < 4 {
continue;
}
out = out.replace(secret, REDACTED_MARKER);
}
out
}
/// Return true iff `input` contains any of the provided secrets.
/// Uses the same short-secret guard-rail as `redact_secrets` so that
/// callers can call both without drift.
///
/// This is the direct observational inverse of FALSIFY-003: after a
/// write is routed through `redact_secrets`, `contains_secret` on the
/// result MUST be false.
pub fn contains_secret(input: &str, secrets: &[&str]) -> bool {
for secret in secrets {
if secret.len() < 4 {
continue;
}
if input.contains(secret) {
return true;
}
}
false
}
/// Heuristic shape check for a HuggingFace access token. Used by
/// `apr login --stdin` to reject obvious non-tokens before persisting
/// them, so a typo doesn't write garbage to `~/.apr/token`.
///
/// Canonical HF tokens start with `hf_` and are ≥32 chars total. We
/// accept any `hf_` + ≥32 total length as plausibly valid; anything
/// else is rejected. This is advisory only — the real 403 retry is the
/// actual authority on whether a token works.
pub fn looks_like_hf_token(s: &str) -> bool {
let trimmed = s.trim();
trimmed.starts_with("hf_") && trimmed.len() >= 32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn redact_replaces_known_secret() {
let tok = "hf_abcdefghijklmnopqrstuvwxyz123456";
let line = format!("Authorization: Bearer {tok}");
let out = redact_secrets(&line, &[tok]);
assert_eq!(out, "Authorization: Bearer <redacted>");
assert!(!out.contains(tok));
}
#[test]
fn redact_is_identity_when_no_secret_present() {
let out = redact_secrets("no secret here", &["hf_absent_token_12345678901234567890"]);
assert_eq!(out, "no secret here");
}
#[test]
fn redact_handles_multiple_secrets() {
let a = "hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let b = "hf_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
let line = format!("{a} then {b}");
let out = redact_secrets(&line, &[a, b]);
assert!(!out.contains(a));
assert!(!out.contains(b));
assert_eq!(out, "<redacted> then <redacted>");
}
#[test]
fn redact_removes_every_occurrence() {
let tok = "hf_repeat_repeat_repeat_repeat_repeat";
let line = format!("{tok} and again {tok} and {tok}");
let out = redact_secrets(&line, &[tok]);
assert!(
!out.contains(tok),
"every occurrence must be removed: {out}"
);
}
#[test]
fn redact_is_idempotent() {
let tok = "hf_idemp_idemp_idemp_idemp_idemp_XXX";
let once = redact_secrets(&format!("foo {tok} bar"), &[tok]);
let twice = redact_secrets(&once, &[tok]);
assert_eq!(once, twice);
}
#[test]
fn redact_ignores_empty_and_short_secrets() {
// Empty secret MUST be ignored (else `replace("", ...)` loops).
let out = redact_secrets("nothing to see", &["", "hi", "a"]);
assert_eq!(out, "nothing to see");
}
#[test]
fn contains_secret_is_true_iff_redact_changes_output() {
let tok = "hf_present_present_present_present";
let line = format!("see {tok} there");
assert!(contains_secret(&line, &[tok]));
let redacted = redact_secrets(&line, &[tok]);
assert!(!contains_secret(&redacted, &[tok]));
}
#[test]
fn contains_secret_ignores_short_secrets() {
// Short "secret" MUST be ignored (guard against false positives
// on ambient strings).
assert!(!contains_secret("hi there friend", &["hi", "a"]));
}
#[test]
fn redact_then_contains_is_false_on_any_input() {
// CRUX-A-18 ALGO-003 sub-claim of FALSIFY-003: for any input
// containing any secret, after redaction the output no longer
// contains that secret. This is the observational property the
// full stderr-scrubbing invariant will reduce to, given that
// every write is routed through redact_secrets.
let secrets = [
"hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"hf_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
];
let inputs = [
"just a prefix",
"hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"prefix hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa suffix",
"both hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa and hf_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
"multi hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
];
for input in &inputs {
let redacted = redact_secrets(input, &secrets);
assert!(
!contains_secret(&redacted, &secrets),
"redaction failed to scrub all secrets from {input:?}: {redacted:?}",
);
}
}
#[test]
fn redact_is_deterministic() {
let tok = "hf_determ_determ_determ_determ_det";
let a = redact_secrets(&format!("x {tok} y"), &[tok]);
let b = redact_secrets(&format!("x {tok} y"), &[tok]);
assert_eq!(a, b);
}
#[test]
fn looks_like_hf_token_accepts_canonical_shape() {
assert!(looks_like_hf_token("hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
assert!(looks_like_hf_token(
" hf_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa "
));
}
#[test]
fn looks_like_hf_token_rejects_garbage() {
assert!(!looks_like_hf_token(""));
assert!(!looks_like_hf_token("not_a_token"));
assert!(!looks_like_hf_token("hf_short"));
assert!(!looks_like_hf_token(
"HF_CAPS_INSTEAD_OF_LOWER_12345678901234"
));
}
#[test]
fn redact_marker_is_stable() {
// Downstream log consumers grep for the marker; it must not drift.
assert_eq!(REDACTED_MARKER, "<redacted>");
}
#[test]
fn redact_empty_input_is_empty() {
assert_eq!(
redact_secrets("", &["hf_something_big_enough_to_matter_XX"]),
""
);
}
#[test]
fn redact_does_not_panic_on_weird_inputs() {
// Every non-panicking property: exercise a bag of pathological
// inputs including unicode, control chars, and very long strings.
for input in ["🎉🎉🎉", "\x00\x01\x02", &"x".repeat(10_000), ""] {
let _ = redact_secrets(input, &["hf_something_big_enough_to_matter_XX"]);
let _ = contains_secret(input, &["hf_something_big_enough_to_matter_XX"]);
}
}
}