Skip to main content

xet_runtime/utils/
file_paths.rs

1use std::ffi::OsString;
2use std::path::{Path, PathBuf};
3
4use chrono::Local;
5
6/// A path buffer that can contain template variables like `{PID}` and `{TIMESTAMP}`.
7///
8/// This type stores both the original template and its evaluated form. Templates are
9/// evaluated on creation and can be re-evaluated to get fresh dynamic values.
10///
11/// Supported placeholders (case-insensitive, any case combination allowed):
12/// - `{PID}`, `{pid}`, `{Pid}`, etc.: Process ID of the current process
13/// - `{TIMESTAMP}`, `{timestamp}`, `{TimeStamp}`, etc.: ISO 8601 timestamp in local timezone with offset (e.g.,
14///   `2024-02-05T14-30-45-0500` for EST, `2024-02-05T19-30-45+0000` for UTC)
15///
16/// # Usage
17///
18/// - Use `new()` to create a `TemplatedPathBuf` that stores both template and evaluated path
19/// - Use `as_path()` to get a reference to the evaluated path
20/// - Use `re_evaluate()` to refresh dynamic values like `{TIMESTAMP}`
21/// - Use `evaluate()` static method for one-time evaluation without keeping the template
22///
23/// # Path normalization
24///
25/// Path evaluation performs the following transformations:
26/// - Expands `~` to the user's home directory
27/// - Replaces placeholders with actual values (timestamp uses local timezone with offset)
28/// - Converts to an absolute path
29#[derive(Clone, Debug)]
30pub struct TemplatedPathBuf {
31    template: PathBuf,
32    evaluated: PathBuf,
33}
34
35impl TemplatedPathBuf {
36    /// Creates a new `TemplatedPathBuf` from a path-like value.
37    pub fn new(path: impl Into<PathBuf>) -> Self {
38        let template = path.into();
39        let evaluated = Self::eval_impl(&template, &Self::default_substitutes());
40
41        Self { template, evaluated }
42    }
43
44    /// Convenience function to create and evaluate a template in one step.
45    ///
46    /// This is equivalent to calling `TemplatedPathBuf::new(path).as_path().to_path_buf()`.
47    /// Use this when you don't need to keep the template around.
48    pub fn evaluate(path: impl Into<PathBuf>) -> PathBuf {
49        Self::new(path).as_path().into()
50    }
51
52    /// Returns a reference to the evaluated path.
53    ///
54    /// The evaluated path has all template variables substituted with their actual values,
55    /// tilde expansion applied, and is converted to an absolute path.
56    pub fn as_path(&self) -> &Path {
57        &self.evaluated
58    }
59
60    /// Returns the original template path as a string.
61    pub fn template_string(&self) -> String {
62        self.template.to_string_lossy().into_owned()
63    }
64
65    /// Re-evaluates the template by replacing all placeholders with fresh values and expanding paths.
66    ///
67    /// This method updates the internal evaluated path and returns a reference to it.
68    /// Call this if you need to refresh dynamic values like `{PID}` or `{TIMESTAMP}`.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// use xet_runtime::utils::TemplatedPathBuf;
74    ///
75    /// let mut template = TemplatedPathBuf::new("~/logs/app_{PID}_{TIMESTAMP}.txt");
76    /// let path = template.as_path();
77    /// // Returns an absolute path like "/home/user/logs/app_12345_2024-01-15T10-30-45-0500.txt"
78    /// // (timestamp in local timezone with offset appended)
79    ///
80    /// // Later, re-evaluate to get a fresh timestamp
81    /// template.re_evaluate();
82    /// let new_path = template.as_path();
83    /// ```
84    pub fn re_evaluate(&mut self) -> &Path {
85        self.evaluated = Self::eval_impl(&self.template, &Self::default_substitutes());
86        &self.evaluated
87    }
88
89    fn default_substitutes() -> [Substitute; 2] {
90        [
91            ("pid", Box::new(|| std::process::id().to_string())),
92            ("timestamp", Box::new(|| Local::now().fixed_offset().format("%Y-%m-%dT%H-%M-%S%z").to_string())),
93        ]
94    }
95
96    /// Note: Templates can come from environment variables or other non-UTF-8 sources.
97    /// We work at the byte level using `into_encoded_bytes()` to preserve all path data
98    /// across all platforms (Unix raw bytes, Windows WTF-8), only interpreting ASCII
99    /// patterns like {PID} and {TIMESTAMP} which are guaranteed to be ASCII.
100    fn eval_impl(template: &Path, substitutes: &[Substitute]) -> PathBuf {
101        // Get platform-specific encoded bytes (Unix: raw bytes, Windows WTF-8)
102        let path_bytes = template.as_os_str().as_encoded_bytes();
103
104        // One-pass scan to replace placeholders (ASCII patterns)
105        let mut result = Vec::with_capacity(path_bytes.len());
106        let mut i = 0;
107
108        while i < path_bytes.len() {
109            if path_bytes[i] == b'{' {
110                // Try to find closing '}'
111                if let Some(close_offset) = path_bytes[i + 1..].iter().position(|&b| b == b'}') {
112                    let pattern_bytes = &path_bytes[i + 1..i + 1 + close_offset];
113
114                    // Try to match any substitute pattern (ASCII only, case-insensitive)
115                    let mut matched = false;
116                    if let Ok(pattern_str) = std::str::from_utf8(pattern_bytes) {
117                        for sub in substitutes {
118                            if pattern_str.eq_ignore_ascii_case(sub.0) {
119                                // Found a placeholder, replace it
120                                result.extend_from_slice(sub.1().as_bytes());
121                                i += close_offset + 2; // Skip past {pattern}
122                                matched = true;
123                                break;
124                            }
125                        }
126                    }
127
128                    if matched {
129                        continue;
130                    }
131                }
132            }
133            result.push(path_bytes[i]);
134            i += 1;
135        }
136
137        // Convert back to OsString preserving all bytes
138        // SAFETY: The input was a valid OsString, and we only substituted
139        // ASCII placeholders with ASCII text, so the output is also a valid OsString.
140        let substituted_path = unsafe { OsString::from_encoded_bytes_unchecked(result) };
141
142        // Expand tilde to home directory (preserves non-UTF-8 path data with path feature)
143        let expanded = shellexpand::path::tilde(Path::new(&substituted_path));
144        let expanded_path = expanded.as_ref();
145
146        // Convert to absolute path
147        std::path::absolute(expanded_path).unwrap_or_else(|_| expanded_path.to_path_buf())
148    }
149}
150
151// Implement From traits for easy conversion
152impl From<PathBuf> for TemplatedPathBuf {
153    fn from(path: PathBuf) -> Self {
154        Self::new(path)
155    }
156}
157
158impl From<&Path> for TemplatedPathBuf {
159    fn from(path: &Path) -> Self {
160        Self::new(path.to_path_buf())
161    }
162}
163
164impl From<String> for TemplatedPathBuf {
165    fn from(s: String) -> Self {
166        Self::new(PathBuf::from(s))
167    }
168}
169
170impl From<&str> for TemplatedPathBuf {
171    fn from(s: &str) -> Self {
172        Self::new(PathBuf::from(s))
173    }
174}
175
176type Substitute = (&'static str, Box<dyn Fn() -> String>);
177
178#[cfg(test)]
179mod tests {
180    use std::time::Duration;
181
182    use serial_test::serial;
183    use tempfile::tempdir;
184
185    use super::*;
186    use crate::utils::CwdGuard;
187    #[cfg(unix)]
188    use crate::utils::EnvVarGuard;
189
190    #[cfg(unix)]
191    const HOME_VAR: &str = "HOME";
192
193    #[test]
194    fn test_pid_substitution_case_insensitive() {
195        // Test that {PID}, {pid}, and {Pid} all work
196        let substitutes: [Substitute; 1] = [("pid", Box::new(|| "12345".to_string()))];
197        for pattern in ["log_{PID}.txt", "log_{pid}.txt", "log_{Pid}.txt"] {
198            let result = TemplatedPathBuf::eval_impl(Path::new(pattern), &substitutes);
199            assert!(result.ends_with("log_12345.txt"));
200        }
201    }
202
203    #[test]
204    fn test_timestamp_substitution_case_insensitive() {
205        // Test that {TIMESTAMP}, {timestamp}, and {TimeStamp} all work
206        let timestamp = chrono::DateTime::parse_from_rfc3339("2009-02-13T23:31:30Z").unwrap();
207        let substitutes: [Substitute; 1] =
208            [("timestamp", Box::new(move || timestamp.format("%Y-%m-%dT%H-%M-%S%z").to_string()))];
209        for pattern in ["log_{TIMESTAMP}.txt", "log_{timestamp}.txt", "log_{TimeStamp}.txt"] {
210            let result = TemplatedPathBuf::eval_impl(Path::new(pattern), &substitutes);
211            assert!(result.ends_with("log_2009-02-13T23-31-30+0000.txt"));
212        }
213    }
214
215    #[test]
216    fn test_multiple_substitutions() {
217        // Test that multiple occurrence of placeholders all get substituted
218        let timestamp = chrono::DateTime::parse_from_rfc3339("2009-02-13T23:31:30Z").unwrap();
219        let substitutes: [Substitute; 2] = [
220            ("pid", Box::new(|| "999".to_string())),
221            ("timestamp", Box::new(move || timestamp.format("%Y-%m-%dT%H-%M-%S%z").to_string())),
222        ];
223
224        let result = TemplatedPathBuf::eval_impl(Path::new("/var/log/app_{pid}_{TIMESTAMP}.log"), &substitutes);
225        #[cfg(unix)]
226        assert_eq!(result, PathBuf::from("/var/log/app_999_2009-02-13T23-31-30+0000.log"));
227        #[cfg(windows)]
228        assert!(result.ends_with("var\\log\\app_999_2009-02-13T23-31-30+0000.log"));
229
230        let result = TemplatedPathBuf::eval_impl(Path::new("/var/log_{pid}/app_{pid}_{TIMESTAMP}.log"), &substitutes);
231        #[cfg(unix)]
232        assert_eq!(result, PathBuf::from("/var/log_999/app_999_2009-02-13T23-31-30+0000.log"));
233        #[cfg(windows)]
234        assert!(result.ends_with("var\\log_999\\app_999_2009-02-13T23-31-30+0000.log"));
235    }
236
237    #[test]
238    fn test_non_ascii_paths_substitutions() {
239        let substitutes: [Substitute; 1] = [("pid", Box::new(|| "566".to_string()))];
240        let result =
241            TemplatedPathBuf::eval_impl(Path::new("-Me {pid} encantan los 🌶️ jalapeños . -我也喜欢"), &substitutes);
242        assert!(result.ends_with("-Me 566 encantan los 🌶️ jalapeños . -我也喜欢"));
243    }
244
245    #[test]
246    fn leaves_unrecognized_patterns_unsubstituted() {
247        let template = Path::new("path_with_{unrecognized}_{patterns}.txt");
248        let result = TemplatedPathBuf::evaluate(template);
249        assert!(result.ends_with(template));
250
251        let template = Path::new("path_with_{未识别的}_{patterns}.txt");
252        let result = TemplatedPathBuf::evaluate(template);
253        assert!(result.ends_with(template));
254    }
255
256    #[test]
257    fn test_as_path_and_re_evaluate() {
258        // Test as_path() returns the evaluated path with substitutions
259        let mut template = TemplatedPathBuf::new("/var/log/app_{PID}_{TIMESTAMP}.log");
260
261        // as_path() should return an absolute path with placeholders substituted
262        let path1 = template.as_path();
263        assert!(path1.is_absolute(), "Path should be absolute");
264
265        // Verify path contains expected structure
266        let path_str = path1.to_string_lossy();
267        let pid = std::process::id();
268        assert!(path_str.contains(&format!("app_{pid}")));
269        assert!(!path_str.contains("{TIMESTAMP}"), "TIMESTAMP placeholder should be substituted");
270
271        // Multiple calls to as_path() without re-evaluate should return same path
272        let path2 = template.as_path().to_path_buf();
273        assert_eq!(path1, &path2);
274
275        std::thread::sleep(Duration::from_secs(1));
276
277        // re_evaluate() should update the evaluated path and return a reference to it
278        let path3 = template.re_evaluate().to_path_buf();
279        assert_ne!(path3, path2, "re_evaluate() didn't return new result");
280
281        // as_path() after re_evaluate() should return the updated path
282        let path4 = template.as_path();
283        assert_eq!(path3, path4);
284    }
285
286    #[test]
287    #[serial(default_config_env)]
288    fn makes_relative_path_absolute() {
289        let tmp = tempdir().unwrap();
290        let base_path = tmp.path().canonicalize().unwrap();
291        let _cwd = CwdGuard::set(&base_path).unwrap();
292
293        let substitutes: [Substitute; 1] = [("pid", Box::new(|| "2563".to_string()))];
294        let got = TemplatedPathBuf::eval_impl(Path::new("subdir/{pid}file.txt"), &substitutes);
295        let expected = std::path::absolute(base_path.join("subdir/2563file.txt")).unwrap();
296
297        assert!(got.is_absolute(), "result should be absolute");
298        assert_eq!(got, expected);
299    }
300
301    #[test]
302    fn leaves_absolute_path_absolute() {
303        let tmp = tempdir().expect("temp dir");
304        let base_path = tmp.path().canonicalize().unwrap();
305
306        let abs_input = base_path.join("a").join("b.txt");
307        let expected = std::path::absolute(&abs_input).unwrap();
308
309        let got = TemplatedPathBuf::evaluate(&abs_input);
310        assert!(got.is_absolute(), "result should be absolute");
311        assert_eq!(got, expected);
312    }
313
314    #[cfg(unix)] // Windows doesn't work with HOME_VAR
315    #[test]
316    #[serial(default_config_env)]
317    fn expands_tilde_prefix_using_env_home() {
318        let home = tempdir().expect("temp home");
319        let _home_guard = EnvVarGuard::set(HOME_VAR, home.path());
320
321        let _cwd = CwdGuard::set(home.path()).expect("set cwd");
322
323        // "~" alone
324        let got_home = TemplatedPathBuf::evaluate("~");
325        assert_eq!(got_home, std::path::absolute(home.path()).unwrap());
326
327        // "~" with a trailing path
328        let substitutes: [Substitute; 1] = [("pid", Box::new(|| "123".to_string()))];
329        let got_sub = TemplatedPathBuf::eval_impl(Path::new("~/projects/demo_{pid}"), &substitutes);
330        let expected_sub = home.path().join("projects").join("demo_123");
331        assert_eq!(got_sub, expected_sub);
332        assert!(got_sub.is_absolute());
333    }
334
335    #[test]
336    #[serial(default_config_env)]
337    fn nonexistent_paths_are_still_absolutized() {
338        let tmp = tempdir().expect("temp dir");
339        let base_path = tmp.path().canonicalize().unwrap();
340
341        let _cwd = CwdGuard::set(&base_path).expect("set cwd");
342
343        let rel = "does/not/exist/yet";
344        let got = TemplatedPathBuf::evaluate(rel);
345        let expected = std::path::absolute(base_path.join(rel)).unwrap();
346
347        assert!(got.is_absolute());
348        assert_eq!(got, expected);
349    }
350
351    // "~no-such-user" stays literal and is made absolute relative to CWD.
352    #[test]
353    #[serial(default_config_env)]
354    fn unknown_tilde_user_is_literal_relative() {
355        let tmp = tempfile::tempdir().unwrap();
356        let base_path = tmp.path().canonicalize().unwrap();
357        let _cwd = CwdGuard::set(&base_path).unwrap();
358
359        let inp = "~user_that_definitely_does_not_exist_1234";
360        let got = TemplatedPathBuf::evaluate(inp);
361        let expected = std::path::absolute(base_path.join(inp)).unwrap();
362
363        assert!(got.is_absolute());
364        assert_eq!(got, expected);
365    }
366}