Skip to main content

xet_runtime/utils/
configuration_utils.rs

1use std::str::FromStr;
2
3use tracing::{Level, event, info, warn};
4
5use super::ByteSize;
6#[cfg(not(target_family = "wasm"))]
7use super::TemplatedPathBuf;
8
9#[cfg(not(feature = "elevated_information_level"))]
10pub const INFORMATION_LOG_LEVEL: Level = Level::DEBUG;
11#[cfg(feature = "elevated_information_level")]
12pub const INFORMATION_LOG_LEVEL: Level = Level::INFO;
13
14/// A trait to control how a value is parsed from an environment string or other config source
15/// if it's present. Also provides serialization back to a string representation that
16/// roundtrips with `parse_user_value`.
17pub trait ParsableConfigValue: std::fmt::Debug + Sized {
18    fn parse_user_value(value: &str) -> Option<Self>;
19
20    /// Serialize this value to a string that can be parsed back via `parse_user_value`.
21    fn to_config_string(&self) -> String;
22
23    /// Try to update this value in place from a string. Returns true on success.
24    /// The default implementation delegates to `parse_user_value`, but types like
25    /// `ConfigEnum` override this to use context-aware validation.
26    fn try_update_in_place(&mut self, value: &str) -> bool {
27        if let Some(v) = Self::parse_user_value(value) {
28            *self = v;
29            true
30        } else {
31            false
32        }
33    }
34
35    /// Parse the value, returning the default if it can't be parsed or the string is empty.  
36    /// Issue a warning if it can't be parsed.
37    fn parse_config_value(variable_name: &str, value: Option<String>, default: Self) -> Self {
38        match value {
39            Some(v) => match Self::parse_user_value(&v) {
40                Some(v) => {
41                    info!("Config: {variable_name} = {v:?} (user set)");
42                    v
43                },
44                None => {
45                    warn!(
46                        "Configuration value {v} for {variable_name} cannot be parsed into correct type; reverting to default."
47                    );
48                    info!("Config: {variable_name} = {default:?} (default due to parse error)");
49                    default
50                },
51            },
52            None => {
53                event!(INFORMATION_LOG_LEVEL, "Config: {variable_name} = {default:?} (default)");
54                default
55            },
56        }
57    }
58}
59
60/// Most values work with the FromStr implementation, but we want to override the behavior for some types
61/// (e.g. Option<T> and bool) to have custom parsing behavior.
62pub trait FromStrParseable: FromStr + std::fmt::Debug + std::fmt::Display {}
63
64impl<T: FromStrParseable> ParsableConfigValue for T {
65    fn parse_user_value(value: &str) -> Option<Self> {
66        value.parse::<T>().ok()
67    }
68
69    fn to_config_string(&self) -> String {
70        self.to_string()
71    }
72}
73
74// Implement FromStrParseable for all the base types where the FromStr parsing method just works.
75impl FromStrParseable for usize {}
76impl FromStrParseable for u8 {}
77impl FromStrParseable for u16 {}
78impl FromStrParseable for u32 {}
79impl FromStrParseable for u64 {}
80impl FromStrParseable for isize {}
81impl FromStrParseable for i8 {}
82impl FromStrParseable for i16 {}
83impl FromStrParseable for i32 {}
84impl FromStrParseable for i64 {}
85impl FromStrParseable for f32 {}
86impl FromStrParseable for f64 {}
87impl FromStrParseable for String {}
88impl FromStrParseable for ByteSize {}
89
90/// Special handling for bool:
91/// - true: "1","true","yes","y","on"  -> true
92/// - false: "0","false","no","n","off" -> false
93fn parse_bool_value(value: &str) -> Option<bool> {
94    let t = value.trim().to_ascii_lowercase();
95
96    match t.as_str() {
97        "0" | "false" | "no" | "n" | "off" => Some(false),
98        "1" | "true" | "yes" | "y" | "on" => Some(true),
99        _ => None,
100    }
101}
102
103impl ParsableConfigValue for bool {
104    fn parse_user_value(value: &str) -> Option<Self> {
105        parse_bool_value(value)
106    }
107
108    fn to_config_string(&self) -> String {
109        if *self { "true" } else { "false" }.to_owned()
110    }
111}
112
113/// Enable Option<T> to allow the default value to be None if nothing is set and appear as
114/// Some(Value) if the user specifies the value.
115impl<T: ParsableConfigValue> ParsableConfigValue for Option<T> {
116    fn parse_user_value(value: &str) -> Option<Self> {
117        if value.trim().is_empty() {
118            return Some(None);
119        }
120        T::parse_user_value(value).map(Some)
121    }
122
123    fn to_config_string(&self) -> String {
124        match self {
125            Some(v) => v.to_config_string(),
126            None => String::new(),
127        }
128    }
129}
130
131/// Implement proper parsing for Duration types as well.
132///
133/// Now the following suffixes are supported: s, ms, us, ns, m, h, d, etc.;
134/// see the humantime crate for the full list.
135impl ParsableConfigValue for std::time::Duration {
136    fn parse_user_value(value: &str) -> Option<Self> {
137        humantime::parse_duration(value).ok()
138    }
139
140    fn to_config_string(&self) -> String {
141        let total_ms = self.as_millis();
142        if self.subsec_nanos() == 0 {
143            format!("{}s", self.as_secs())
144        } else if self.subsec_nanos().is_multiple_of(1_000_000) {
145            format!("{total_ms}ms")
146        } else if self.subsec_nanos().is_multiple_of(1_000) {
147            format!("{}us", self.as_micros())
148        } else {
149            format!("{}ns", self.as_nanos())
150        }
151    }
152}
153
154#[cfg(not(target_family = "wasm"))]
155impl ParsableConfigValue for TemplatedPathBuf {
156    fn parse_user_value(value: &str) -> Option<Self> {
157        Some(Self::new(value))
158    }
159
160    fn to_config_string(&self) -> String {
161        self.template_string()
162    }
163}
164
165// Reexport this so that dependencies don't have weird other dependencies
166pub use lazy_static::lazy_static;
167
168#[macro_export]
169macro_rules! test_configurable_constants {
170    ($(
171        $(#[$meta:meta])*
172        ref $name:ident : $type:ty = $value:expr;
173    )+) => {
174        $(
175            #[allow(unused_imports)]
176            use $crate::configuration_utils::*;
177
178            lazy_static! {
179                $(#[$meta])*
180                pub static ref $name: $type = {
181                    #[cfg(debug_assertions)]
182                    {
183                        let default_value = $value;
184                        let maybe_env_value = std::env::var(concat!("HF_XET_",stringify!($name))).ok();
185                        <$type>::parse_config_value(stringify!($name), maybe_env_value, default_value)
186                    }
187                    #[cfg(not(debug_assertions))]
188                    {
189                        $value
190                    }
191                };
192            }
193        )+
194    };
195}
196
197pub use ctor as ctor_reexport;
198
199#[cfg(not(doctest))]
200/// A macro for **tests** that sets `HF_XET_<CONSTANT_NAME>` to `$value` **before**
201/// the constant is initialized, and then checks that the constant actually picks up
202/// that value. If the constant was already accessed (thus initialized), or if it
203/// doesn't match after being set, this macro panics.
204///
205/// Typically you would document *the macro itself* here, rather than placing
206/// doc comments above each call to `test_set_constants!`, because it doesn't
207/// define a new item.
208///
209/// # Example
210/// ```rust
211/// use xet_runtime::{test_configurable_constants, test_set_constants};
212/// test_configurable_constants! {
213///    /// Target chunk size
214///    ref CHUNK_TARGET_SIZE: u64 = 1024;
215///
216///    /// Max Chunk size, only adjustable in testing mode.
217///    ref MAX_CHUNK_SIZE: u64 = 4096;
218/// }
219///
220/// test_set_constants! {
221///    CHUNK_TARGET_SIZE = 2048;
222/// }
223/// assert_eq!(*CHUNK_TARGET_SIZE, 2048);
224/// ```
225#[macro_export]
226macro_rules! test_set_constants {
227    ($(
228        $var_name:ident = $val:expr;
229    )+) => {
230        use $crate::configuration_utils::ctor_reexport as ctor;
231
232        #[ctor::ctor]
233        fn set_constants_on_load() {
234            $(
235                let val = $val;
236                let val_str = format!("{val:?}");
237
238                // Construct the environment variable name, e.g. "HF_XET_MAX_NUM_CHUNKS"
239                let env_name = concat!("HF_XET_", stringify!($var_name));
240
241                // Set the environment
242                unsafe {
243                    std::env::set_var(env_name, &val_str);
244                }
245
246                // Force lazy_static to be read now:
247                let actual_value = *$var_name;
248
249                if format!("{actual_value:?}") != val_str {
250                    panic!(
251                        "test_set_constants! failed: wanted {} to be {:?}, but got {:?}",
252                        stringify!($var_name),
253                        val,
254                        actual_value
255                    );
256                }
257                eprintln!("> Set {} to {:?}",
258                        stringify!($var_name),
259                        val);
260            )+
261        }
262    }
263}
264
265#[cfg(not(doctest))]
266/// A macro for **tests** that sets config group environment variables **before**
267/// XetRuntime is initialized. The environment variables follow the pattern
268/// `HF_XET_{GROUP_NAME}_{FIELD_NAME}`.
269///
270/// This macro uses `ctor` to run on module load, ensuring environment variables
271/// are set before any config values are read.
272///
273/// # Example
274/// ```rust
275/// use xet_runtime::config::XetConfig;
276/// use xet_runtime::test_set_config;
277///
278/// test_set_config! {
279///     data {
280///         max_concurrent_uploads = 16;
281///         max_concurrent_downloads = 20;
282///     }
283///     client {
284///         upload_reporting_block_size = 1024000;
285///     }
286/// }
287///
288/// // Now XetConfig::new() will pick up these values
289/// let config = XetConfig::new();
290/// assert_eq!(config.data.max_concurrent_uploads, 16);
291/// ```
292#[macro_export]
293macro_rules! test_set_config {
294    ($(
295        $group_name:ident {
296            $(
297                $field_name:ident = $val:expr;
298            )+
299        }
300    )+) => {
301        use $crate::configuration_utils::ctor_reexport as config_ctor;
302
303        #[config_ctor::ctor]
304        fn set_config_on_load() {
305            $(
306                let group_name_upper = stringify!($group_name).to_uppercase();
307                $(
308                    let val = $val;
309                    let val_str = format!("{val:?}");
310                    let field_name_upper = stringify!($field_name).to_uppercase();
311
312                    // Construct the environment variable name: HF_XET_{GROUP_NAME}_{FIELD_NAME}
313                    let env_name = format!("HF_XET_{}_{}", group_name_upper, field_name_upper);
314
315                    // Set the environment
316                    unsafe {
317                        std::env::set_var(&env_name, &val_str);
318                    }
319
320                    eprintln!("> Set config {}.{} to {:?} (env: {})",
321                            stringify!($group_name),
322                            stringify!($field_name),
323                            val,
324                            env_name);
325                )+
326            )+
327        }
328    }
329}
330
331fn get_high_performance_flag() -> bool {
332    if let Ok(val) = std::env::var("HF_XET_HIGH_PERFORMANCE") {
333        parse_bool_value(&val).unwrap_or(false)
334    } else if let Ok(val) = std::env::var("HF_XET_HP") {
335        parse_bool_value(&val).unwrap_or(false)
336    } else {
337        false
338    }
339}
340
341lazy_static! {
342    /// To set the high performance mode to true, set either of the following environment variables to 1 or true:
343    ///  - HF_XET_HIGH_PERFORMANCE
344    ///  - HF_XET_HP
345    pub static ref HIGH_PERFORMANCE: bool = get_high_performance_flag();
346}
347
348#[inline]
349pub fn is_high_performance() -> bool {
350    *HIGH_PERFORMANCE
351}
352
353#[cfg(test)]
354mod tests {
355    use std::time::Duration;
356
357    use super::*;
358
359    fn assert_roundtrip<T: ParsableConfigValue + PartialEq + std::fmt::Debug>(value: T) {
360        let s = value.to_config_string();
361        let restored = T::parse_user_value(&s).unwrap_or_else(|| {
362            panic!("Failed to parse config string '{s}' back into {:?}", std::any::type_name::<T>())
363        });
364        assert_eq!(value, restored);
365    }
366
367    macro_rules! assert_roundtrips {
368        ($($value:expr),+ $(,)?) => {
369            $(assert_roundtrip($value);)+
370        };
371    }
372
373    #[test]
374    fn test_roundtrip_numeric_primitives() {
375        assert_roundtrips!(
376            0usize,
377            42usize,
378            usize::MAX,
379            0u8,
380            255u8,
381            0u16,
382            65535u16,
383            0u32,
384            123456u32,
385            0u64,
386            u64::MAX,
387            0isize,
388            -42isize,
389            isize::MAX,
390            -128i8,
391            127i8,
392            -32768i16,
393            32767i16,
394            0i32,
395            -123456i32,
396            0i64,
397            i64::MIN,
398            0.0f32,
399            3.14f32,
400            -1.5f32,
401            0.0f64,
402            std::f64::consts::PI,
403            -1e10f64
404        );
405    }
406
407    #[test]
408    fn test_roundtrip_bool_and_string() {
409        assert_roundtrips!(true, false, String::new(), "hello world".to_owned(), "http://localhost:8080".to_owned());
410    }
411
412    #[test]
413    fn test_roundtrip_byte_size() {
414        assert_roundtrips!(
415            ByteSize::new(0),
416            ByteSize::new(1000),
417            ByteSize::new(1_000_000),
418            ByteSize::new(8_000_000),
419            ByteSize::new(10_000_000_000)
420        );
421    }
422
423    #[test]
424    fn test_roundtrip_duration() {
425        assert_roundtrips!(
426            Duration::from_secs(0),
427            Duration::from_secs(60),
428            Duration::from_secs(120),
429            Duration::from_millis(200),
430            Duration::from_millis(3000),
431            Duration::from_secs(360),
432            Duration::from_micros(123),
433            Duration::from_nanos(123)
434        );
435    }
436
437    #[test]
438    fn test_roundtrip_option_some() {
439        assert_roundtrips!(Some(42usize), Some("hello".to_owned()));
440    }
441
442    #[test]
443    fn test_roundtrip_option_none_string() {
444        let none_val: Option<String> = None;
445        let s = none_val.to_config_string();
446        assert_eq!(s, "");
447    }
448
449    #[test]
450    fn test_parse_option_empty_string_as_none() {
451        assert_eq!(Option::<String>::parse_user_value(""), Some(None));
452        assert_eq!(Option::<String>::parse_user_value("   "), Some(None));
453        assert_eq!(Option::<usize>::parse_user_value(""), Some(None));
454    }
455
456    #[test]
457    fn test_parse_bool_empty_string_as_none() {
458        assert_eq!(bool::parse_user_value(""), None);
459        assert_eq!(bool::parse_user_value("   "), None);
460    }
461
462    #[cfg(not(target_family = "wasm"))]
463    #[test]
464    fn test_roundtrip_templated_path_buf() {
465        let path = TemplatedPathBuf::new("/some/simple/path");
466        let s = path.to_config_string();
467        assert_eq!(s, "/some/simple/path");
468        let restored = TemplatedPathBuf::parse_user_value(&s).unwrap();
469        assert_eq!(path.template_string(), restored.template_string());
470    }
471}