Skip to main content

tryaudex_core/
rotation.rs

1use std::path::PathBuf;
2use std::sync::atomic::AtomicBool;
3use std::sync::Arc;
4use std::time::Duration;
5
6use crate::credentials::TempCredentials;
7use crate::error::Result;
8
9/// Configuration for automatic credential rotation.
10#[derive(Debug, Clone)]
11pub struct RotationConfig {
12    /// Rotate credentials this many seconds before they expire.
13    pub rotate_before_secs: u64,
14    /// Minimum session TTL (in seconds) to enable rotation.
15    /// Sessions shorter than this use static credentials.
16    pub min_ttl_for_rotation_secs: u64,
17}
18
19impl Default for RotationConfig {
20    fn default() -> Self {
21        Self {
22            rotate_before_secs: 300,        // 5 minutes before expiry
23            min_ttl_for_rotation_secs: 900, // only rotate if TTL >= 15 minutes
24        }
25    }
26}
27
28/// A file that holds provider credentials, updated atomically.
29/// Automatically cleaned up when dropped.
30pub struct CredentialFile {
31    path: PathBuf,
32}
33
34impl CredentialFile {
35    /// Create a new credential file in a temp directory.
36    pub fn new() -> Result<Self> {
37        let dir = std::env::temp_dir().join("audex");
38        std::fs::create_dir_all(&dir)?;
39        // Restrict directory to owner-only to prevent other users from listing
40        // credential files on shared machines.
41        // R6-M58: propagate permission errors instead of silently
42        // discarding them. A credential file in a world-readable temp
43        // directory is a credential leak on shared machines.
44        #[cfg(unix)]
45        {
46            use std::os::unix::fs::PermissionsExt;
47            std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700))?;
48        }
49        // R6-M14: include PID in the filename so concurrent audex processes
50        // owned by the same UID can be distinguished in the shared temp
51        // directory (and so a stale file from a crashed process can be
52        // correlated to the process that left it behind). The UUIDv4 is
53        // retained to prevent collision if the same PID issues multiple
54        // credential files in the same second.
55        let pid = std::process::id();
56        let path = dir.join(format!("creds-{}-{}.ini", pid, uuid::Uuid::new_v4()));
57        Ok(Self { path })
58    }
59
60    /// Path to the credential file.
61    pub fn path(&self) -> &std::path::Path {
62        &self.path
63    }
64
65    /// Write credentials in AWS shared credentials file format.
66    /// Uses atomic write (write to tmp, then rename) to avoid partial reads.
67    pub fn write_aws(&self, creds: &TempCredentials) -> Result<()> {
68        let content = format!(
69            "[default]\naws_access_key_id = {}\naws_secret_access_key = {}\naws_session_token = {}\n",
70            creds.access_key_id, creds.secret_access_key, creds.session_token
71        );
72        atomic_write(&self.path, content.as_bytes())
73    }
74
75    /// Write a GCP access token as plain text.
76    /// GCP SDKs don't support reading pre-fetched tokens from a credential file,
77    /// so we write the raw token for tools that can read a token file directly.
78    /// The subprocess should use `CLOUDSDK_AUTH_ACCESS_TOKEN` (set at launch)
79    /// and `AUDEX_GCP_TOKEN_FILE` (for tools that re-read on rotation).
80    pub fn write_gcp(&self, token: &str) -> Result<()> {
81        atomic_write(&self.path, token.as_bytes())
82    }
83
84    /// Write an Azure access token to the credential file.
85    pub fn write_azure(&self, token: &str) -> Result<()> {
86        let json = serde_json::json!({
87            "type": "azure_access_token",
88            "token": token,
89        });
90        atomic_write(&self.path, json.to_string().as_bytes())
91    }
92
93    /// Remove the credential file from disk.
94    pub fn cleanup(&self) {
95        let _ = std::fs::remove_file(&self.path);
96        let _ = std::fs::remove_file(self.path.with_extension("tmp"));
97    }
98}
99
100impl Drop for CredentialFile {
101    fn drop(&mut self) {
102        self.cleanup();
103    }
104}
105
106/// Atomic write: write to a .tmp sibling then rename over the target.
107/// Sets file permissions to 0600 (owner-only) on Unix to prevent other
108/// users from reading credentials on shared machines.
109///
110/// # Windows file permission limitation
111///
112/// On Windows, this function sets the hidden attribute via `attrib +H` but
113/// does **not** enforce ACL-based access control. The credential file remains
114/// readable by other users with access to the same filesystem path. For
115/// production use on Windows, replace the `attrib` call with proper DACL
116/// manipulation using the [`windows-acl`](https://crates.io/crates/windows-acl)
117/// crate (e.g. `SecurityDescriptor::new` + `set_dacl`).
118fn atomic_write(path: &std::path::Path, data: &[u8]) -> Result<()> {
119    use std::io::Write;
120
121    let tmp = path.with_extension("tmp");
122    // R6-M21: open the file explicitly so we can fsync before rename.
123    // Previously std::fs::write + rename left a window where power loss
124    // could commit a rename over an empty/partial data block because the
125    // rename only guarantees metadata durability — the data pages may
126    // still be in the page cache. fsync the data, then rename; a
127    // crash-consistent filesystem will either retain the old contents
128    // or present the fully-written new contents.
129    {
130        let mut file = std::fs::OpenOptions::new()
131            .write(true)
132            .create(true)
133            .truncate(true)
134            .open(&tmp)?;
135        file.write_all(data)?;
136        file.sync_all()?;
137    }
138    #[cfg(unix)]
139    {
140        use std::os::unix::fs::PermissionsExt;
141        std::fs::set_permissions(&tmp, std::fs::Permissions::from_mode(0o600))?;
142    }
143    #[cfg(windows)]
144    {
145        // On Windows, mark the file as hidden and restrict via read-only attribute.
146        // Full ACL restriction would require the windows-acl crate; this is a
147        // best-effort guard for shared machines.
148        tracing::warn!(
149            path = %tmp.display(),
150            "Windows credential file permissions are not enforced: the file may be \
151             readable by other users on this machine. Use the windows-acl crate for \
152             production deployments to apply a restrictive DACL."
153        );
154        let _ = std::process::Command::new("attrib")
155            .args(["+H", &tmp.to_string_lossy()])
156            .status();
157    }
158    std::fs::rename(&tmp, path)?;
159    Ok(())
160}
161
162/// Handle to a running credential rotation background thread.
163/// Dropping the handle signals the thread to stop.
164///
165/// R6-M22: exposes a `failed` flag that the background thread sets when it
166/// gives up after `MAX_RETRY_FAILURES` consecutive rotation attempts.
167/// Callers should check `has_failed()` on session end so operators learn
168/// that the subprocess was running against a stale credential file; the
169/// previous behaviour was to silently stop the rotation thread and leave
170/// the main thread unaware that scoping had effectively lapsed.
171pub struct RotationHandle {
172    stop: Arc<AtomicBool>,
173    failed: Arc<AtomicBool>,
174    thread: Option<std::thread::JoinHandle<()>>,
175}
176
177impl RotationHandle {
178    /// Signal the rotation thread to stop and wait for it to finish.
179    pub fn stop(mut self) {
180        self.stop.store(true, std::sync::atomic::Ordering::Release);
181        if let Some(handle) = self.thread.take() {
182            let _ = handle.join();
183        }
184    }
185
186    /// Returns true if the background rotation thread gave up after the
187    /// configured retry ceiling, meaning the credential file has not been
188    /// updated since the first failure and the subprocess has been running
189    /// with stale credentials.
190    pub fn has_failed(&self) -> bool {
191        self.failed.load(std::sync::atomic::Ordering::Acquire)
192    }
193}
194
195impl Drop for RotationHandle {
196    fn drop(&mut self) {
197        self.stop.store(true, std::sync::atomic::Ordering::Release);
198        if let Some(handle) = self.thread.take() {
199            let _ = handle.join();
200        }
201    }
202}
203
204/// Start a background credential rotation thread.
205///
206/// The `refresh_and_write` closure is called whenever new credentials are needed.
207/// It must fetch new credentials, write them to disk, and return the new expiry time.
208/// This design is provider-agnostic: the caller decides how to fetch and persist
209/// credentials (AWS via write_aws, GCP via write_gcp, Azure via write_azure).
210/// The rotation loop checks every 10 seconds whether it's time to rotate.
211/// R6-M60: returns Result so a thread-spawn failure surfaces as an error
212/// to the caller instead of panicking the entire process. In constrained
213/// environments (cgroup thread limits, seccomp filters) spawn can fail.
214pub fn start_rotation<F>(
215    initial_expires_at: chrono::DateTime<chrono::Utc>,
216    rotate_before: Duration,
217    refresh_and_write: F,
218) -> Result<RotationHandle>
219where
220    F: Fn() -> std::result::Result<chrono::DateTime<chrono::Utc>, String> + Send + 'static,
221{
222    let stop = Arc::new(AtomicBool::new(false));
223    let stop_clone = stop.clone();
224    let failed = Arc::new(AtomicBool::new(false));
225    let failed_clone = failed.clone();
226
227    let thread = std::thread::Builder::new()
228        .name("audex-rotation".into())
229        .spawn(move || {
230            let rotate_chrono =
231                chrono::Duration::from_std(rotate_before).unwrap_or(chrono::Duration::seconds(300));
232            // R6-M59: use checked subtraction. If rotate_before is larger
233            // than the distance from epoch to initial_expires_at (an
234            // impossible-in-practice but defensible edge case), the
235            // previous unchecked subtraction would wrap to a far-future
236            // timestamp and skip the first rotation entirely.
237            let mut next_rotation = initial_expires_at
238                .checked_sub_signed(rotate_chrono)
239                .unwrap_or(initial_expires_at);
240            let mut consecutive_failures: u32 = 0;
241            const MAX_RETRY_FAILURES: u32 = 10;
242            const MAX_BACKOFF_SECS: i64 = 300; // 5 minutes
243
244            loop {
245                if stop_clone.load(std::sync::atomic::Ordering::Acquire) {
246                    break;
247                }
248
249                let now = chrono::Utc::now();
250                if now >= next_rotation {
251                    if consecutive_failures >= MAX_RETRY_FAILURES {
252                        tracing::error!(
253                            "Credential rotation gave up after {} consecutive failures — \
254                             subprocess is running with stale credentials",
255                            consecutive_failures
256                        );
257                        // R6-M22: signal the main thread so it can surface
258                        // the rotation failure to the operator instead of
259                        // silently continuing to read the stale credential
260                        // file.
261                        failed_clone.store(true, std::sync::atomic::Ordering::Release);
262                        break;
263                    }
264
265                    tracing::info!("Credential rotation triggered");
266                    match refresh_and_write() {
267                        Ok(new_expires_at) => {
268                            tracing::info!(
269                                "Credentials rotated successfully, new expiry: {}",
270                                new_expires_at
271                            );
272                            next_rotation = new_expires_at - rotate_chrono;
273                            consecutive_failures = 0;
274                        }
275                        Err(e) => {
276                            consecutive_failures += 1;
277                            let backoff = 30i64
278                                .saturating_mul(2i64.saturating_pow(consecutive_failures - 1))
279                                .min(MAX_BACKOFF_SECS);
280                            tracing::error!(
281                                "Failed to rotate credentials (attempt {}/{}): {}",
282                                consecutive_failures,
283                                MAX_RETRY_FAILURES,
284                                e
285                            );
286                            next_rotation = now + chrono::Duration::seconds(backoff);
287                        }
288                    }
289                }
290
291                // R6-M57: sleep in 1-second ticks and re-check the stop
292                // flag each tick. The previous 10-second sleep meant the
293                // process hung for up to 10 seconds after stop() was called,
294                // delaying shutdown and credential cleanup.
295                for _ in 0..10 {
296                    if stop_clone.load(std::sync::atomic::Ordering::Acquire) {
297                        break;
298                    }
299                    std::thread::sleep(Duration::from_secs(1));
300                }
301            }
302
303            tracing::debug!("Credential rotation thread stopped");
304        })
305        .map_err(|e| {
306            crate::error::AvError::InvalidPolicy(format!(
307                "Failed to spawn credential rotation thread: {}",
308                e
309            ))
310        })?;
311
312    Ok(RotationHandle {
313        stop,
314        failed,
315        thread: Some(thread),
316    })
317}
318
319/// Check if rotation should be enabled for a given TTL.
320pub fn should_rotate(ttl: Duration, config: &RotationConfig) -> bool {
321    ttl.as_secs() >= config.min_ttl_for_rotation_secs
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn test_should_rotate_long_ttl() {
330        let config = RotationConfig::default();
331        assert!(should_rotate(Duration::from_secs(3600), &config)); // 1 hour
332        assert!(should_rotate(Duration::from_secs(900), &config)); // exactly 15 min
333    }
334
335    #[test]
336    fn test_should_not_rotate_short_ttl() {
337        let config = RotationConfig::default();
338        assert!(!should_rotate(Duration::from_secs(600), &config)); // 10 min
339        assert!(!should_rotate(Duration::from_secs(300), &config)); // 5 min
340        assert!(!should_rotate(Duration::from_secs(60), &config)); // 1 min
341    }
342
343    #[test]
344    fn test_credential_file_write_aws() {
345        let cred_file = CredentialFile::new().unwrap();
346        let creds = TempCredentials {
347            access_key_id: "AKIAIOSFODNN7EXAMPLE".to_string(),
348            secret_access_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(),
349            session_token: "FwoGZXtoken123".to_string(),
350            expires_at: chrono::Utc::now() + chrono::Duration::hours(1),
351        };
352        cred_file.write_aws(&creds).unwrap();
353
354        let content = std::fs::read_to_string(cred_file.path()).unwrap();
355        assert!(content.contains("[default]"));
356        assert!(content.contains("AKIAIOSFODNN7EXAMPLE"));
357        assert!(content.contains("wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"));
358        assert!(content.contains("FwoGZXtoken123"));
359    }
360
361    #[test]
362    fn test_credential_file_write_gcp() {
363        let cred_file = CredentialFile::new().unwrap();
364        cred_file.write_gcp("ya29.test-token-123").unwrap();
365
366        let content = std::fs::read_to_string(cred_file.path()).unwrap();
367        assert_eq!(content, "ya29.test-token-123");
368    }
369
370    #[test]
371    fn test_credential_file_cleanup_on_drop() {
372        let path;
373        {
374            let cred_file = CredentialFile::new().unwrap();
375            cred_file.write_gcp("test").unwrap();
376            path = cred_file.path().to_path_buf();
377            assert!(path.exists());
378        }
379        // After drop, file should be cleaned up
380        assert!(!path.exists());
381    }
382
383    #[test]
384    fn test_rotation_handle_stop() {
385        use std::sync::atomic::AtomicU32;
386
387        let cred_file = Arc::new(CredentialFile::new().unwrap());
388        let call_count = Arc::new(AtomicU32::new(0));
389        let call_count_clone = call_count.clone();
390
391        // Set expiry in the past so rotation triggers immediately
392        let expires_at = chrono::Utc::now() - chrono::Duration::seconds(10);
393
394        let initial_creds = TempCredentials {
395            access_key_id: "AKIATEST".to_string(),
396            secret_access_key: "secret".to_string(),
397            session_token: "token".to_string(),
398            expires_at: chrono::Utc::now() + chrono::Duration::hours(1),
399        };
400
401        // Write initial credentials so the file exists
402        cred_file.write_aws(&initial_creds).unwrap();
403
404        let cred_file_clone = cred_file.clone();
405        let handle = start_rotation(expires_at, Duration::from_secs(60), move || {
406            call_count_clone.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
407            let new_creds = TempCredentials {
408                access_key_id: "AKIAROTATED".to_string(),
409                secret_access_key: "new-secret".to_string(),
410                session_token: "new-token".to_string(),
411                expires_at: chrono::Utc::now() + chrono::Duration::hours(1),
412            };
413            let expires = new_creds.expires_at;
414            cred_file_clone
415                .write_aws(&new_creds)
416                .map_err(|e| e.to_string())?;
417            Ok(expires)
418        })
419        .unwrap();
420
421        // Give the rotation thread time to fire
422        std::thread::sleep(Duration::from_millis(500));
423        handle.stop();
424
425        // refresh_fn should have been called at least once
426        assert!(call_count.load(std::sync::atomic::Ordering::Relaxed) >= 1);
427
428        // Credential file should have rotated credentials
429        let content = std::fs::read_to_string(cred_file.path()).unwrap();
430        assert!(content.contains("AKIAROTATED"));
431    }
432
433    #[test]
434    fn test_rotation_config_defaults() {
435        let config = RotationConfig::default();
436        assert_eq!(config.rotate_before_secs, 300);
437        assert_eq!(config.min_ttl_for_rotation_secs, 900);
438    }
439
440    #[test]
441    fn test_atomic_write_is_atomic() {
442        let dir = std::env::temp_dir().join("audex-test-atomic");
443        let _ = std::fs::create_dir_all(&dir);
444        let path = dir.join("test-atomic.txt");
445
446        atomic_write(&path, b"hello world").unwrap();
447        assert_eq!(std::fs::read_to_string(&path).unwrap(), "hello world");
448
449        // Overwrite
450        atomic_write(&path, b"updated").unwrap();
451        assert_eq!(std::fs::read_to_string(&path).unwrap(), "updated");
452
453        // tmp file should not exist
454        assert!(!path.with_extension("tmp").exists());
455
456        let _ = std::fs::remove_file(&path);
457    }
458}