1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
//! Xet opt-out detection for `apr pull` under CRUX-A-07.
//!
//! Contract: `contracts/crux-A-07-v1.yaml`.
//!
//! Pure classifier — takes the parsed `--no-xet` flag and a slice of
//! environment variables and returns `bool` for "is the Xet fast path
//! enabled". No I/O, no network, no process-global state. Unit-testable
//! offline.
//!
//! The actual parallel-range HTTP fetch, CAS endpoint contact, and
//! byte-parity with the plain HTTPS path are discharged by separate
//! network-gated harnesses (follow-up).
/// Env var that toggles the Xet backend. HuggingFace's own
/// `huggingface_hub[hf_xet]` uses the same `APR_XET` / `HF_XET_*`
/// family, so our reader honors the APR-native `APR_XET` as the
/// source of truth for this classifier (per CRUX-A-07 equations).
pub const XET_ENV_VAR: &str = "APR_XET";
/// Raw values that explicitly turn Xet OFF. Mirrors HuggingFace's own
/// boolean-env taxonomy (`hf_hub_utils.constants._is_true`).
fn env_is_falsy(value: &str) -> bool {
matches!(
value.trim().to_ascii_lowercase().as_str(),
"0" | "false" | "no" | "off"
)
}
/// Raw values that explicitly turn Xet ON.
fn env_is_truthy(value: &str) -> bool {
matches!(
value.trim().to_ascii_lowercase().as_str(),
"1" | "true" | "yes" | "on"
)
}
/// Resolve Xet-enabled mode from the CLI flag + environment snapshot.
///
/// Precedence (highest → lowest):
/// 1. `no_xet_flag == true` → Xet OFF
/// 2. `APR_XET` env var set to a falsy → Xet OFF
/// 3. `APR_XET` env var set to a truthy → Xet ON
/// 4. no signal → Xet ON (default opt-in)
///
/// CRUX-A-07 ALGO-003 sub-claim of FALSIFY-003: `APR_XET=0` MUST
/// classify as off; `APR_XET` unset MUST classify as on. The classifier
/// is the precondition for the integration-level "no xet CAS requests
/// emitted" strace check.
///
/// The environment snapshot is passed in explicitly (rather than read
/// from `std::env`) so callers can test this function deterministically
/// without mutating process-global state.
pub fn is_xet_enabled<'a, I>(no_xet_flag: bool, env: I) -> bool
where
I: IntoIterator<Item = (&'a str, &'a str)>,
{
if no_xet_flag {
return false;
}
for (k, v) in env {
if k == XET_ENV_VAR {
if env_is_falsy(v) {
return false;
}
if env_is_truthy(v) {
return true;
}
}
}
true
}
/// Read `APR_XET` out of the real process environment. Thin wrapper so
/// callers don't sprinkle `std::env::var` across the codebase.
pub fn read_xet_env() -> Vec<(String, String)> {
std::env::var(XET_ENV_VAR)
.ok()
.map(|v| vec![(XET_ENV_VAR.to_string(), v)])
.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_no_signal_is_enabled() {
assert!(is_xet_enabled(false, std::iter::empty::<(&str, &str)>()));
}
#[test]
fn flag_alone_disables_xet() {
assert!(!is_xet_enabled(true, std::iter::empty::<(&str, &str)>()));
}
#[test]
fn apr_xet_zero_disables_xet() {
// CRUX-A-07 ALGO-003 sub-claim of FALSIFY-003: `APR_XET=0`
// MUST flip the classifier to off so the download path can
// take the plain HTTPS branch.
assert!(!is_xet_enabled(false, [("APR_XET", "0")]));
}
#[test]
fn apr_xet_one_enables_xet() {
assert!(is_xet_enabled(false, [("APR_XET", "1")]));
}
#[test]
fn flag_overrides_truthy_env() {
// CLI flag wins: even with `APR_XET=1` set in the env, the
// explicit `--no-xet` flag MUST disable the Xet path.
assert!(!is_xet_enabled(true, [("APR_XET", "1")]));
}
#[test]
fn falsy_variants_all_disable() {
for v in ["0", "false", "FALSE", "no", "off", " 0 "] {
assert!(
!is_xet_enabled(false, [("APR_XET", v)]),
"APR_XET={v:?} must disable xet",
);
}
}
#[test]
fn truthy_variants_all_enable() {
for v in ["1", "true", "TRUE", "yes", "on", " 1 "] {
assert!(
is_xet_enabled(false, [("APR_XET", v)]),
"APR_XET={v:?} must enable xet",
);
}
}
#[test]
fn ambiguous_env_value_defers_to_default_enabled() {
// Values that are neither recognized-truthy nor recognized-falsy
// MUST NOT silently flip to off — they defer to the default
// (enabled). Avoids a surprise opt-out on a typo like
// `APR_XET=nope` when the user wasn't actually trying to
// disable Xet.
assert!(is_xet_enabled(false, [("APR_XET", "maybe")]));
assert!(is_xet_enabled(false, [("APR_XET", "random-string")]));
}
#[test]
fn empty_env_value_is_ambiguous_defers_to_default() {
// `APR_XET=` (empty) is ambiguous; HF's own `_is_true` treats
// it as false but we keep the default-enabled semantic to
// avoid flipping Xet off on accidental empty exports.
assert!(is_xet_enabled(false, [("APR_XET", "")]));
}
#[test]
fn unrelated_env_var_ignored() {
assert!(is_xet_enabled(false, [("SOME_OTHER_VAR", "0")]));
assert!(is_xet_enabled(false, [("HF_XET_PARALLEL", "8")]));
}
#[test]
fn last_apr_xet_wins_on_duplicate_keys() {
// If the same key appears twice in the env snapshot, the later
// occurrence wins (iterator order) — same as process env where
// the most recent export is what getenv() returns.
// Current implementation: first truthy/falsy hit short-circuits;
// document that behavior so future changes are deliberate.
let first_falsy_wins = is_xet_enabled(false, [("APR_XET", "0"), ("APR_XET", "1")]);
assert!(!first_falsy_wins);
}
#[test]
fn is_deterministic() {
let a = is_xet_enabled(false, [("APR_XET", "1")]);
let b = is_xet_enabled(false, [("APR_XET", "1")]);
assert_eq!(a, b);
}
#[test]
fn xet_env_var_is_stable_apr_xet() {
// Downstream shell tests and goldens grep for this exact name.
assert_eq!(XET_ENV_VAR, "APR_XET");
}
#[test]
fn falsify_003_sub_claim_apr_xet_zero_opts_out() {
// CRUX-A-07 ALGO-003 sub-claim of FALSIFY-003: setting
// `APR_XET=0` MUST produce a deterministic "xet disabled"
// classification, which is the algorithm-level precondition
// for the "no xet CAS requests emitted" strace assertion.
for v in ["0", "false", "no", "off"] {
assert!(
!is_xet_enabled(false, [("APR_XET", v)]),
"APR_XET={v:?} must disable xet per FALSIFY-003",
);
}
// And the complement: default + `APR_XET=1` MUST keep it on,
// so the same harness can also assert the opt-in path.
assert!(is_xet_enabled(false, std::iter::empty::<(&str, &str)>()));
assert!(is_xet_enabled(false, [("APR_XET", "1")]));
}
}