Skip to main content

nika_engine/io/
writer.rs

1//! Artifact Writer - File Persistence for Task Outputs
2//!
3//! The `ArtifactWriter` is the main entry point for writing task outputs to disk.
4//! It combines atomic writes, path security, and template resolution.
5//!
6//! # Features
7//!
8//! - **Atomic Writes**: Uses temp file + rename pattern for crash safety
9//! - **Path Security**: Validates paths to prevent traversal attacks
10//! - **Template Resolution**: Supports `{{task_id}}`, `{{date}}`, etc. in paths
11//! - **Size Limits**: Enforces configurable maximum file sizes
12//! - **Format Support**: JSON, YAML, and raw text output
13//!
14//! # Example
15//!
16//! ```ignore
17//! use nika::io::writer::{ArtifactWriter, WriteRequest};
18//! use nika::ast::artifact::OutputFormat;
19//!
20//! let writer = ArtifactWriter::new("/project/.nika/artifacts", "my-workflow")?;
21//! let request = WriteRequest::new("generate_page", "pages/{{task_id}}.json")
22//!     .with_content(r#"{"title": "Hello"}"#)
23//!     .with_format(OutputFormat::Json);
24//! writer.write(request).await?;
25//! ```
26
27use std::collections::HashMap;
28use std::path::{Path, PathBuf};
29
30use tokio::fs;
31
32use crate::ast::OutputFormat;
33use crate::error::NikaError;
34use crate::io::atomic::write_atomic;
35use crate::io::security::validate_artifact_path;
36use crate::io::template::TemplateResolver;
37
38/// Default maximum artifact size (10 MB)
39pub const DEFAULT_MAX_SIZE: u64 = 10 * 1024 * 1024;
40
41/// Source for binary artifact data
42#[derive(Debug, Clone)]
43pub(crate) enum BinarySource {
44    /// Copy from a CAS store path
45    CasPath(PathBuf),
46}
47
48/// Request to write a binary artifact
49#[derive(Debug, Clone)]
50pub(crate) struct BinaryWriteRequest {
51    /// Task ID that produced this binary
52    pub task_id: String,
53    /// Output path template (may contain `{{var}}` placeholders)
54    pub output_path: String,
55    /// Source of binary data
56    pub source: BinarySource,
57    /// Expected file size (for size limit check before copy)
58    pub expected_size: u64,
59}
60
61/// Result of a successful write operation
62#[derive(Debug, Clone)]
63pub struct WriteResult {
64    /// Final resolved path where artifact was written
65    pub path: PathBuf,
66    /// Size in bytes of the written content
67    pub size: u64,
68    /// Format used for output
69    pub format: OutputFormat,
70}
71
72/// Request to write an artifact
73#[derive(Debug, Clone)]
74pub struct WriteRequest {
75    /// Task ID that produced this output
76    pub task_id: String,
77    /// Output path template (may contain `{{var}}` placeholders)
78    pub output_path: String,
79    /// Content to write (serialized)
80    pub content: String,
81    /// Output format (affects serialization validation)
82    pub format: OutputFormat,
83    /// Custom template variables
84    pub vars: HashMap<String, String>,
85}
86
87impl WriteRequest {
88    /// Create a new write request
89    pub fn new(task_id: impl Into<String>, output_path: impl Into<String>) -> Self {
90        Self {
91            task_id: task_id.into(),
92            output_path: output_path.into(),
93            content: String::new(),
94            format: OutputFormat::Text,
95            vars: HashMap::new(),
96        }
97    }
98
99    /// Set the content to write
100    pub fn with_content(mut self, content: impl Into<String>) -> Self {
101        self.content = content.into();
102        self
103    }
104
105    /// Set the output format
106    pub fn with_format(mut self, format: OutputFormat) -> Self {
107        self.format = format;
108        self
109    }
110
111    /// Add a custom template variable
112    #[cfg(test)]
113    pub fn with_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
114        self.vars.insert(key.into(), value.into());
115        self
116    }
117
118    /// Add multiple template variables
119    #[cfg(test)]
120    pub fn with_vars(mut self, vars: HashMap<String, String>) -> Self {
121        self.vars.extend(vars);
122        self
123    }
124}
125
126/// Artifact writer for persisting task outputs
127#[derive(Debug)]
128pub struct ArtifactWriter {
129    /// Base directory for artifacts (validated)
130    artifact_dir: PathBuf,
131    /// Workflow name for template resolution
132    workflow_name: String,
133    /// Maximum artifact size in bytes
134    max_size: u64,
135}
136
137impl ArtifactWriter {
138    /// Create a new artifact writer
139    ///
140    /// # Arguments
141    ///
142    /// * `artifact_dir` - Base directory for artifacts (must be absolute)
143    /// * `workflow_name` - Workflow name for template variables
144    ///
145    /// # Errors
146    ///
147    /// Returns `NikaError::ArtifactPathError` if the directory path is invalid
148    pub fn new(artifact_dir: impl Into<PathBuf>, workflow_name: impl Into<String>) -> Self {
149        Self {
150            artifact_dir: artifact_dir.into(),
151            workflow_name: workflow_name.into(),
152            max_size: DEFAULT_MAX_SIZE,
153        }
154    }
155
156    /// Set the maximum artifact size
157    pub fn with_max_size(mut self, max_size: u64) -> Self {
158        self.max_size = max_size;
159        self
160    }
161
162    /// Write an artifact to disk
163    ///
164    /// This method:
165    /// 1. Validates JSON format if `OutputFormat::Json`
166    /// 2. Resolves template variables in the output path
167    /// 3. Validates the path stays within the artifact directory
168    /// 4. Validates the content size
169    /// 5. Creates parent directories as needed
170    /// 6. Writes atomically (temp + rename)
171    /// 7. Final path validation before atomic commit
172    ///
173    /// # Arguments
174    ///
175    /// * `request` - The write request with content and metadata
176    ///
177    /// # Returns
178    ///
179    /// `WriteResult` with the final path and metadata
180    ///
181    /// # Errors
182    ///
183    /// - `NikaError::ArtifactPathError` if path validation fails
184    /// - `NikaError::ArtifactSizeExceeded` if content exceeds max_size
185    /// - `NikaError::ArtifactWriteError` if write fails or JSON is invalid
186    pub async fn write(&self, request: WriteRequest) -> Result<WriteResult, NikaError> {
187        // Check size limit
188        let content_size = request.content.len() as u64;
189        if content_size > self.max_size {
190            return Err(NikaError::ArtifactSizeExceeded {
191                path: request.output_path.clone(),
192                size: content_size,
193                max_size: self.max_size,
194            });
195        }
196
197        // Validate JSON format if specified
198        if matches!(request.format, OutputFormat::Json) && !request.content.is_empty() {
199            if let Err(e) = serde_json::from_str::<serde_json::Value>(&request.content) {
200                return Err(NikaError::ArtifactWriteError {
201                    path: request.output_path.clone(),
202                    reason: format!("Invalid JSON content: {}", e),
203                });
204            }
205        }
206
207        // Resolve template variables (validates custom vars for path traversal)
208        let resolver = TemplateResolver::new(&request.task_id, &self.workflow_name)
209            .with_vars(request.vars.clone())?;
210        let resolved_path = resolver.resolve(&request.output_path)?;
211
212        // Validate the path stays within artifact directory
213        let full_path = validate_artifact_path(&self.artifact_dir, Path::new(&resolved_path))?;
214
215        // Create parent directories
216        if let Some(parent) = full_path.parent() {
217            fs::create_dir_all(parent)
218                .await
219                .map_err(|e| NikaError::ArtifactWriteError {
220                    path: parent.display().to_string(),
221                    reason: format!("Failed to create parent directories: {}", e),
222                })?;
223        }
224
225        // Final path validation after directory creation (mitigates TOCTOU)
226        // Re-validate now that parent directories exist and can be canonicalized
227        let final_path = validate_artifact_path(&self.artifact_dir, Path::new(&resolved_path))?;
228
229        // Write atomically
230        write_atomic(&final_path, request.content.as_bytes())
231            .await
232            .map_err(|e| NikaError::ArtifactWriteError {
233                path: final_path.display().to_string(),
234                reason: format!("Atomic write failed: {}", e),
235            })?;
236
237        Ok(WriteResult {
238            path: final_path,
239            size: content_size,
240            format: request.format,
241        })
242    }
243
244    /// Write a binary artifact by copying from CAS store path.
245    ///
246    /// Reads CAS file data through `CasStore::read_raw()` which transparently
247    /// strips compression framing, then writes the original user data to the
248    /// artifact path using atomic temp+rename.
249    ///
250    /// # Arguments
251    ///
252    /// * `request` - Binary write request with source path and metadata
253    ///
254    /// # Errors
255    ///
256    /// - `NikaError::ArtifactSizeExceeded` if source exceeds max_size
257    /// - `NikaError::ArtifactWriteError` if copy fails or source missing
258    /// - `NikaError::ArtifactPathError` if output path validation fails
259    pub(crate) async fn write_binary(
260        &self,
261        request: BinaryWriteRequest,
262    ) -> Result<WriteResult, NikaError> {
263        // Check size limit before copy
264        if request.expected_size > self.max_size {
265            return Err(NikaError::ArtifactSizeExceeded {
266                path: request.output_path.clone(),
267                size: request.expected_size,
268                max_size: self.max_size,
269            });
270        }
271
272        // Resolve template variables in output path
273        let resolver = TemplateResolver::new(&request.task_id, &self.workflow_name);
274        let resolved_path = resolver.resolve(&request.output_path)?;
275
276        // Validate path stays within artifact directory
277        let full_path = validate_artifact_path(&self.artifact_dir, Path::new(&resolved_path))?;
278
279        // Create parent directories
280        if let Some(parent) = full_path.parent() {
281            fs::create_dir_all(parent)
282                .await
283                .map_err(|e| NikaError::ArtifactWriteError {
284                    path: parent.display().to_string(),
285                    reason: format!("Failed to create parent directories: {}", e),
286                })?;
287        }
288
289        // Final path validation after directory creation (mitigates TOCTOU)
290        let final_path = validate_artifact_path(&self.artifact_dir, Path::new(&resolved_path))?;
291
292        // Write binary from CAS source using atomic temp+rename pattern.
293        //
294        // CAS files may contain compression framing bytes (media-compression feature).
295        // We read through CasStore::read_raw() which transparently decompresses,
296        // then write the original user data to the artifact path. A raw file copy
297        // would leak the framing byte prefix ([0x00] or [0x01][zstd...]) into
298        // the output artifact.
299        let size = match request.source {
300            BinarySource::CasPath(ref src) => {
301                // Read CAS file and strip compression framing if present.
302                // This calls transparent_decompress under media-compression.
303                let data = crate::media::CasStore::read_raw(src).await.map_err(|e| {
304                    NikaError::ArtifactWriteError {
305                        path: final_path.display().to_string(),
306                        reason: format!("CAS read failed: {}", e),
307                    }
308                })?;
309
310                let size = data.len() as u64;
311
312                // Atomic write: temp file + rename
313                write_atomic(&final_path, &data).await.map_err(|e| {
314                    NikaError::ArtifactWriteError {
315                        path: final_path.display().to_string(),
316                        reason: format!("Atomic write failed: {}", e),
317                    }
318                })?;
319
320                size
321            }
322        };
323
324        Ok(WriteResult {
325            path: final_path,
326            size,
327            format: OutputFormat::Binary,
328        })
329    }
330
331    /// Validate a path without writing
332    ///
333    /// Useful for pre-validation before expensive operations
334    pub fn validate_path(&self, task_id: &str, output_path: &str) -> Result<PathBuf, NikaError> {
335        let resolver = TemplateResolver::new(task_id, &self.workflow_name);
336        let resolved_path = resolver.resolve(output_path)?;
337        validate_artifact_path(&self.artifact_dir, Path::new(&resolved_path))
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use tempfile::tempdir;
345
346    fn test_writer() -> (ArtifactWriter, tempfile::TempDir) {
347        let temp = tempdir().unwrap();
348        let artifact_dir = temp.path().join("artifacts");
349        std::fs::create_dir_all(&artifact_dir).unwrap();
350        let canonical_dir = artifact_dir.canonicalize().unwrap();
351        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
352        (writer, temp)
353    }
354
355    #[tokio::test]
356    async fn test_write_simple() {
357        let (writer, _temp) = test_writer();
358        let request = WriteRequest::new("task1", "output.json")
359            .with_content(r#"{"key": "value"}"#)
360            .with_format(OutputFormat::Json);
361
362        let result = writer.write(request).await.unwrap();
363        assert!(result.path.ends_with("output.json"));
364        assert_eq!(result.size, 16);
365        assert!(matches!(result.format, OutputFormat::Json));
366    }
367
368    #[tokio::test]
369    async fn test_write_with_template() {
370        let (writer, _temp) = test_writer();
371        let request = WriteRequest::new("generate_page", "{{task_id}}/output.json")
372            .with_content("test content");
373
374        let result = writer.write(request).await.unwrap();
375        assert!(result.path.to_string_lossy().contains("generate_page"));
376    }
377
378    #[tokio::test]
379    async fn test_write_nested_path() {
380        let (writer, _temp) = test_writer();
381        let request =
382            WriteRequest::new("task1", "deep/nested/path/output.txt").with_content("hello");
383
384        let result = writer.write(request).await.unwrap();
385        assert!(result.path.ends_with("deep/nested/path/output.txt"));
386    }
387
388    #[tokio::test]
389    async fn test_write_size_exceeded() {
390        let temp = tempdir().unwrap();
391        let artifact_dir = temp.path().join("artifacts");
392        std::fs::create_dir_all(&artifact_dir).unwrap();
393        let canonical_dir = artifact_dir.canonicalize().unwrap();
394
395        let writer = ArtifactWriter::new(canonical_dir, "test").with_max_size(10);
396        let request = WriteRequest::new("task1", "output.txt")
397            .with_content("this content is longer than 10 bytes");
398
399        let result = writer.write(request).await;
400        assert!(result.is_err());
401        let err = result.unwrap_err();
402        assert!(matches!(err, NikaError::ArtifactSizeExceeded { .. }));
403    }
404
405    #[tokio::test]
406    async fn test_write_path_traversal_blocked() {
407        let (writer, _temp) = test_writer();
408        let request =
409            WriteRequest::new("task1", "../../../etc/passwd").with_content("malicious content");
410
411        let result = writer.write(request).await;
412        assert!(result.is_err());
413        let err = result.unwrap_err();
414        assert!(matches!(err, NikaError::ArtifactPathError { .. }));
415    }
416
417    #[tokio::test]
418    async fn test_write_absolute_path_blocked() {
419        let (writer, _temp) = test_writer();
420        let request = WriteRequest::new("task1", "/etc/passwd").with_content("test");
421
422        let result = writer.write(request).await;
423        assert!(result.is_err());
424    }
425
426    #[tokio::test]
427    async fn test_write_custom_vars() {
428        let (writer, _temp) = test_writer();
429        // Note: custom vars cannot contain '/' - use separate path segments in template
430        let request = WriteRequest::new("task1", "locales/{{locale}}/{{entity}}.json")
431            .with_content("{}")
432            .with_var("locale", "fr-FR")
433            .with_var("entity", "qr-code");
434
435        let result = writer.write(request).await.unwrap();
436        assert!(result.path.to_string_lossy().contains("fr-FR"));
437        assert!(result.path.to_string_lossy().contains("qr-code"));
438    }
439
440    #[tokio::test]
441    async fn test_write_invalid_json_rejected() {
442        let (writer, _temp) = test_writer();
443        let request = WriteRequest::new("task1", "output.json")
444            .with_content("{ invalid json }")
445            .with_format(OutputFormat::Json);
446
447        let result = writer.write(request).await;
448        assert!(result.is_err());
449        let err = result.unwrap_err();
450        if let NikaError::ArtifactWriteError { reason, .. } = err {
451            assert!(reason.contains("Invalid JSON"));
452        } else {
453            panic!("Expected ArtifactWriteError");
454        }
455    }
456
457    #[tokio::test]
458    async fn test_write_valid_json_accepted() {
459        let (writer, _temp) = test_writer();
460        let request = WriteRequest::new("task1", "output.json")
461            .with_content(r#"{"valid": true, "nested": {"key": 123}}"#)
462            .with_format(OutputFormat::Json);
463
464        let result = writer.write(request).await;
465        assert!(result.is_ok());
466    }
467
468    #[tokio::test]
469    async fn test_write_var_path_traversal_blocked() {
470        let (writer, _temp) = test_writer();
471        let request = WriteRequest::new("task1", "{{entity}}/output.json")
472            .with_content("{}")
473            .with_var("entity", "../escape");
474
475        let result = writer.write(request).await;
476        assert!(result.is_err());
477        let err = result.unwrap_err();
478        assert!(matches!(err, NikaError::TemplateError { .. }));
479    }
480
481    #[test]
482    fn test_validate_path() {
483        let (writer, _temp) = test_writer();
484        let result = writer.validate_path("task1", "output.json");
485        assert!(result.is_ok());
486    }
487
488    #[test]
489    fn test_validate_path_traversal() {
490        let (writer, _temp) = test_writer();
491        let result = writer.validate_path("task1", "../escape.txt");
492        assert!(result.is_err());
493    }
494
495    #[test]
496    fn test_writer_max_size() {
497        let temp = tempdir().unwrap();
498        let writer = ArtifactWriter::new(temp.path(), "test").with_max_size(1024);
499        assert_eq!(writer.max_size, 1024);
500    }
501
502    #[test]
503    fn test_write_request_builder() {
504        let mut vars = HashMap::new();
505        vars.insert("key1".to_string(), "val1".to_string());
506        vars.insert("key2".to_string(), "val2".to_string());
507
508        let request = WriteRequest::new("task", "path.txt")
509            .with_content("content")
510            .with_format(OutputFormat::Json)
511            .with_vars(vars);
512
513        assert_eq!(request.task_id, "task");
514        assert_eq!(request.output_path, "path.txt");
515        assert_eq!(request.content, "content");
516        assert!(matches!(request.format, OutputFormat::Json));
517        assert_eq!(request.vars.len(), 2);
518    }
519
520    #[tokio::test]
521    async fn test_write_binary_from_cas_path() {
522        let temp = tempdir().unwrap();
523        let artifact_dir = temp.path().join("artifacts");
524        std::fs::create_dir_all(&artifact_dir).unwrap();
525        let canonical_dir = artifact_dir.canonicalize().unwrap();
526        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
527
528        // Create a fake CAS file
529        let cas_dir = temp.path().join("cas");
530        std::fs::create_dir_all(&cas_dir).unwrap();
531        let cas_file = cas_dir.join("testfile");
532        let data = b"binary image data here";
533        std::fs::write(&cas_file, data).unwrap();
534
535        let request = BinaryWriteRequest {
536            task_id: "gen_img".to_string(),
537            output_path: "images/result.bin".to_string(),
538            source: BinarySource::CasPath(cas_file),
539            expected_size: data.len() as u64,
540        };
541
542        let result = writer.write_binary(request).await.unwrap();
543        assert!(result.path.ends_with("images/result.bin"));
544        assert_eq!(result.size, data.len() as u64);
545
546        // Verify file content
547        let written = std::fs::read(&result.path).unwrap();
548        assert_eq!(written, data);
549    }
550
551    #[tokio::test]
552    async fn test_write_binary_always_overwrites() {
553        // Binary artifacts always use overwrite semantics (no append mode)
554        let temp = tempdir().unwrap();
555        let artifact_dir = temp.path().join("artifacts");
556        std::fs::create_dir_all(&artifact_dir).unwrap();
557        let canonical_dir = artifact_dir.canonicalize().unwrap();
558        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
559
560        let cas_file = temp.path().join("dummy");
561        std::fs::write(&cas_file, b"test").unwrap();
562
563        let request = BinaryWriteRequest {
564            task_id: "task1".to_string(),
565            output_path: "output.bin".to_string(),
566            source: BinarySource::CasPath(cas_file),
567            expected_size: 4,
568        };
569
570        // write_binary always overwrites — append is not supported for binary
571        let result = writer.write_binary(request).await;
572        assert!(
573            result.is_ok(),
574            "Binary write should succeed (overwrite mode)"
575        );
576    }
577
578    #[tokio::test]
579    async fn test_write_binary_size_limit() {
580        let temp = tempdir().unwrap();
581        let artifact_dir = temp.path().join("artifacts");
582        std::fs::create_dir_all(&artifact_dir).unwrap();
583        let canonical_dir = artifact_dir.canonicalize().unwrap();
584        let writer = ArtifactWriter::new(canonical_dir, "test-workflow").with_max_size(10);
585
586        let cas_file = temp.path().join("bigfile");
587        let data = vec![0u8; 100];
588        std::fs::write(&cas_file, &data).unwrap();
589
590        let request = BinaryWriteRequest {
591            task_id: "task1".to_string(),
592            output_path: "output.bin".to_string(),
593            source: BinarySource::CasPath(cas_file),
594            expected_size: 100,
595        };
596
597        let result = writer.write_binary(request).await;
598        assert!(result.is_err());
599        let err = result.unwrap_err();
600        assert!(matches!(err, NikaError::ArtifactSizeExceeded { .. }));
601    }
602
603    #[tokio::test]
604    async fn test_write_binary_missing_source() {
605        let temp = tempdir().unwrap();
606        let artifact_dir = temp.path().join("artifacts");
607        std::fs::create_dir_all(&artifact_dir).unwrap();
608        let canonical_dir = artifact_dir.canonicalize().unwrap();
609        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
610
611        let request = BinaryWriteRequest {
612            task_id: "task1".to_string(),
613            output_path: "output.bin".to_string(),
614            source: BinarySource::CasPath(PathBuf::from("/nonexistent/cas/file")),
615            expected_size: 42,
616        };
617
618        let result = writer.write_binary(request).await;
619        assert!(result.is_err());
620    }
621
622    #[tokio::test]
623    async fn test_write_binary_overwrite_existing() {
624        let temp = tempdir().unwrap();
625        let artifact_dir = temp.path().join("artifacts");
626        std::fs::create_dir_all(&artifact_dir).unwrap();
627        let canonical_dir = artifact_dir.canonicalize().unwrap();
628        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
629
630        // Create CAS files with different content
631        let cas_dir = temp.path().join("cas");
632        std::fs::create_dir_all(&cas_dir).unwrap();
633        let cas_file_v1 = cas_dir.join("v1");
634        let cas_file_v2 = cas_dir.join("v2");
635        std::fs::write(&cas_file_v1, b"version 1 data").unwrap();
636        std::fs::write(&cas_file_v2, b"version 2 data -- longer").unwrap();
637
638        // First write
639        let request1 = BinaryWriteRequest {
640            task_id: "task1".to_string(),
641            output_path: "output.bin".to_string(),
642            source: BinarySource::CasPath(cas_file_v1),
643            expected_size: 14,
644        };
645        let result1 = writer.write_binary(request1).await.unwrap();
646        assert_eq!(std::fs::read(&result1.path).unwrap(), b"version 1 data");
647
648        // Second write to same path (overwrite) — must succeed
649        let request2 = BinaryWriteRequest {
650            task_id: "task1".to_string(),
651            output_path: "output.bin".to_string(),
652            source: BinarySource::CasPath(cas_file_v2),
653            expected_size: 24,
654        };
655        let result2 = writer.write_binary(request2).await.unwrap();
656        assert_eq!(
657            std::fs::read(&result2.path).unwrap(),
658            b"version 2 data -- longer",
659            "Overwrite should replace file content"
660        );
661    }
662
663    #[tokio::test]
664    async fn test_write_binary_format_is_binary() {
665        let temp = tempdir().unwrap();
666        let artifact_dir = temp.path().join("artifacts");
667        std::fs::create_dir_all(&artifact_dir).unwrap();
668        let canonical_dir = artifact_dir.canonicalize().unwrap();
669        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
670
671        let cas_file = temp.path().join("data");
672        std::fs::write(&cas_file, b"test").unwrap();
673
674        let request = BinaryWriteRequest {
675            task_id: "task1".to_string(),
676            output_path: "output.bin".to_string(),
677            source: BinarySource::CasPath(cas_file),
678            expected_size: 4,
679        };
680
681        let result = writer.write_binary(request).await.unwrap();
682        assert_eq!(
683            result.format,
684            OutputFormat::Binary,
685            "Binary write should report Binary format"
686        );
687    }
688
689    // ═══════════════════════════════════════════════════════════════
690    // SECURITY: Binary artifact output path traversal tests
691    // Verify that write_binary blocks output paths containing ".."
692    // or absolute paths, matching the same protections as text write.
693    // ═══════════════════════════════════════════════════════════════
694
695    #[tokio::test]
696    async fn test_write_binary_output_path_traversal_blocked() {
697        let temp = tempdir().unwrap();
698        let artifact_dir = temp.path().join("artifacts");
699        std::fs::create_dir_all(&artifact_dir).unwrap();
700        let canonical_dir = artifact_dir.canonicalize().unwrap();
701        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
702
703        let cas_file = temp.path().join("data");
704        std::fs::write(&cas_file, b"secret").unwrap();
705
706        let request = BinaryWriteRequest {
707            task_id: "task1".to_string(),
708            output_path: "../../../etc/shadow".to_string(),
709            source: BinarySource::CasPath(cas_file),
710            expected_size: 6,
711        };
712
713        let result = writer.write_binary(request).await;
714        assert!(
715            result.is_err(),
716            "Path traversal in binary output must be blocked"
717        );
718        let err = result.unwrap_err();
719        assert!(
720            matches!(err, NikaError::ArtifactPathError { .. }),
721            "Expected ArtifactPathError, got: {:?}",
722            err
723        );
724    }
725
726    #[tokio::test]
727    async fn test_write_binary_output_absolute_path_blocked() {
728        let temp = tempdir().unwrap();
729        let artifact_dir = temp.path().join("artifacts");
730        std::fs::create_dir_all(&artifact_dir).unwrap();
731        let canonical_dir = artifact_dir.canonicalize().unwrap();
732        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
733
734        let cas_file = temp.path().join("data");
735        std::fs::write(&cas_file, b"test").unwrap();
736
737        let request = BinaryWriteRequest {
738            task_id: "task1".to_string(),
739            output_path: "/tmp/escape.bin".to_string(),
740            source: BinarySource::CasPath(cas_file),
741            expected_size: 4,
742        };
743
744        let result = writer.write_binary(request).await;
745        assert!(
746            result.is_err(),
747            "Absolute output path in binary write must be blocked"
748        );
749    }
750
751    #[tokio::test]
752    async fn test_write_binary_output_hidden_traversal_blocked() {
753        let temp = tempdir().unwrap();
754        let artifact_dir = temp.path().join("artifacts");
755        std::fs::create_dir_all(&artifact_dir).unwrap();
756        let canonical_dir = artifact_dir.canonicalize().unwrap();
757        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
758
759        let cas_file = temp.path().join("data");
760        std::fs::write(&cas_file, b"test").unwrap();
761
762        let request = BinaryWriteRequest {
763            task_id: "task1".to_string(),
764            output_path: "a/../../escape.bin".to_string(),
765            source: BinarySource::CasPath(cas_file),
766            expected_size: 4,
767        };
768
769        let result = writer.write_binary(request).await;
770        assert!(
771            result.is_err(),
772            "Hidden traversal in binary output path must be blocked"
773        );
774    }
775
776    #[tokio::test]
777    async fn test_write_binary_output_null_byte_blocked() {
778        let temp = tempdir().unwrap();
779        let artifact_dir = temp.path().join("artifacts");
780        std::fs::create_dir_all(&artifact_dir).unwrap();
781        let canonical_dir = artifact_dir.canonicalize().unwrap();
782        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
783
784        let cas_file = temp.path().join("data");
785        std::fs::write(&cas_file, b"test").unwrap();
786
787        let request = BinaryWriteRequest {
788            task_id: "task1".to_string(),
789            output_path: "output\0.bin".to_string(),
790            source: BinarySource::CasPath(cas_file),
791            expected_size: 4,
792        };
793
794        let result = writer.write_binary(request).await;
795        assert!(
796            result.is_err(),
797            "Null bytes in binary output path must be blocked"
798        );
799    }
800
801    // ═══════════════════════════════════════════════════════════════
802    // SECURITY: Binary artifact source path validation tests
803    // ═══════════════════════════════════════════════════════════════
804
805    #[tokio::test]
806    async fn test_write_binary_source_outside_cas_fails_gracefully() {
807        let temp = tempdir().unwrap();
808        let artifact_dir = temp.path().join("artifacts");
809        std::fs::create_dir_all(&artifact_dir).unwrap();
810        let canonical_dir = artifact_dir.canonicalize().unwrap();
811        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
812
813        let request = BinaryWriteRequest {
814            task_id: "task1".to_string(),
815            output_path: "output.bin".to_string(),
816            source: BinarySource::CasPath(PathBuf::from("/nonexistent/fake/cas/ab/cdef")),
817            expected_size: 42,
818        };
819
820        let result = writer.write_binary(request).await;
821        assert!(
822            result.is_err(),
823            "Missing CAS source file must produce an error"
824        );
825    }
826
827    #[cfg(unix)]
828    #[tokio::test]
829    async fn test_write_binary_symlink_source_reads_target() {
830        // Verify behavior when CAS path is a symlink: reflink_or_copy follows
831        // symlinks by default. This test documents the behavior.
832        use std::os::unix::fs::symlink;
833
834        let temp = tempdir().unwrap();
835        let artifact_dir = temp.path().join("artifacts");
836        std::fs::create_dir_all(&artifact_dir).unwrap();
837        let canonical_dir = artifact_dir.canonicalize().unwrap();
838        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
839
840        let real_file = temp.path().join("real_data");
841        std::fs::write(&real_file, b"real content").unwrap();
842        let symlink_path = temp.path().join("cas_symlink");
843        symlink(&real_file, &symlink_path).unwrap();
844
845        let request = BinaryWriteRequest {
846            task_id: "task1".to_string(),
847            output_path: "output.bin".to_string(),
848            source: BinarySource::CasPath(symlink_path),
849            expected_size: 12,
850        };
851
852        let result = writer.write_binary(request).await.unwrap();
853        let written = std::fs::read(&result.path).unwrap();
854        assert_eq!(
855            written, b"real content",
856            "Symlink source is followed by reflink_or_copy (documented behavior)"
857        );
858    }
859
860    #[cfg(unix)]
861    #[tokio::test]
862    async fn test_write_binary_symlink_in_output_parent_dir() {
863        // Security test: if a parent directory in the output path is a symlink
864        // that points outside the artifact directory, validate_artifact_path
865        // uses normalize_path (logical, not filesystem), so it cannot detect this.
866        // This test documents the known limitation.
867        use std::os::unix::fs::symlink;
868
869        let temp = tempdir().unwrap();
870        let artifact_dir = temp.path().join("artifacts");
871        std::fs::create_dir_all(&artifact_dir).unwrap();
872        let canonical_dir = artifact_dir.canonicalize().unwrap();
873        let writer = ArtifactWriter::new(&canonical_dir, "test-workflow");
874
875        let cas_file = temp.path().join("data");
876        std::fs::write(&cas_file, b"test data").unwrap();
877
878        // Create a symlink inside the artifact dir that points outside
879        let escape_target = temp.path().join("escape_target");
880        std::fs::create_dir_all(&escape_target).unwrap();
881        let symlink_dir = canonical_dir.join("evil_link");
882        symlink(&escape_target, &symlink_dir).unwrap();
883
884        let request = BinaryWriteRequest {
885            task_id: "task1".to_string(),
886            output_path: "evil_link/file.bin".to_string(),
887            source: BinarySource::CasPath(cas_file),
888            expected_size: 9,
889        };
890
891        // This currently SUCCEEDS because normalize_path doesn't resolve symlinks.
892        // Documented as a known limitation -- defense relies on the artifact_dir
893        // being created by nika itself (not user-controlled).
894        let result = writer.write_binary(request).await;
895        assert!(
896            result.is_ok(),
897            "Symlink-in-parent currently passes logical validation (known limitation)"
898        );
899
900        let escaped_file = escape_target.join("file.bin");
901        assert!(
902            escaped_file.exists(),
903            "File was written through symlink to escape target (known limitation)"
904        );
905    }
906
907    // ═══════════════════════════════════════════════════════════════════════
908    // Edge case tests for reflink-copy behavior
909    // ═══════════════════════════════════════════════════════════════════════
910
911    #[tokio::test]
912    async fn test_write_binary_empty_source_file() {
913        // Edge case: source file is 0 bytes
914        // reflink_or_copy should handle this gracefully
915        let temp = tempdir().unwrap();
916        let artifact_dir = temp.path().join("artifacts");
917        std::fs::create_dir_all(&artifact_dir).unwrap();
918        let canonical_dir = artifact_dir.canonicalize().unwrap();
919        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
920
921        let cas_file = temp.path().join("empty");
922        std::fs::write(&cas_file, b"").unwrap();
923
924        let request = BinaryWriteRequest {
925            task_id: "task1".to_string(),
926            output_path: "output.bin".to_string(),
927            source: BinarySource::CasPath(cas_file),
928            expected_size: 0,
929        };
930
931        let result = writer.write_binary(request).await.unwrap();
932        assert_eq!(result.size, 0, "Empty source should produce empty output");
933
934        let content = std::fs::read(&result.path).unwrap();
935        assert!(content.is_empty(), "Output file should be empty");
936    }
937
938    #[tokio::test]
939    async fn test_write_binary_source_is_symlink() {
940        // Edge case: source file is a symlink — reflink should follow it
941        // On macOS, clonefile handles symlinks. The fallback fs::copy also follows symlinks.
942        let temp = tempdir().unwrap();
943        let artifact_dir = temp.path().join("artifacts");
944        std::fs::create_dir_all(&artifact_dir).unwrap();
945        let canonical_dir = artifact_dir.canonicalize().unwrap();
946        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
947
948        // Create real file and a symlink to it
949        let real_file = temp.path().join("real_data");
950        std::fs::write(&real_file, b"symlink target data").unwrap();
951
952        let symlink_path = temp.path().join("link_to_real");
953        #[cfg(unix)]
954        std::os::unix::fs::symlink(&real_file, &symlink_path).unwrap();
955        #[cfg(windows)]
956        std::os::windows::fs::symlink_file(&real_file, &symlink_path).unwrap();
957
958        let request = BinaryWriteRequest {
959            task_id: "task1".to_string(),
960            output_path: "from_symlink.bin".to_string(),
961            source: BinarySource::CasPath(symlink_path),
962            expected_size: 19,
963        };
964
965        let result = writer.write_binary(request).await.unwrap();
966        let content = std::fs::read(&result.path).unwrap();
967        assert_eq!(
968            content, b"symlink target data",
969            "Should copy the symlink target content"
970        );
971        assert_eq!(result.size, 19);
972    }
973
974    #[tokio::test]
975    async fn test_write_binary_source_deleted_before_copy() {
976        // Edge case: CAS file deleted between request creation and copy execution
977        let temp = tempdir().unwrap();
978        let artifact_dir = temp.path().join("artifacts");
979        std::fs::create_dir_all(&artifact_dir).unwrap();
980        let canonical_dir = artifact_dir.canonicalize().unwrap();
981        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
982
983        // Create then immediately delete the source file
984        let cas_file = temp.path().join("ephemeral");
985        std::fs::write(&cas_file, b"will be deleted").unwrap();
986        std::fs::remove_file(&cas_file).unwrap();
987
988        let request = BinaryWriteRequest {
989            task_id: "task1".to_string(),
990            output_path: "output.bin".to_string(),
991            source: BinarySource::CasPath(cas_file),
992            expected_size: 15,
993        };
994
995        let result = writer.write_binary(request).await;
996        assert!(result.is_err(), "Should fail when source file is missing");
997
998        let err = result.unwrap_err();
999        if let NikaError::ArtifactWriteError { reason, .. } = &err {
1000            assert!(
1001                reason.contains("CAS read failed"),
1002                "Error should mention CAS read failure: {}",
1003                reason
1004            );
1005        } else {
1006            panic!("Expected ArtifactWriteError, got: {:?}", err);
1007        }
1008    }
1009
1010    #[tokio::test]
1011    async fn test_write_binary_read_only_source_works() {
1012        // Edge case: source file is read-only — copy should still work
1013        let temp = tempdir().unwrap();
1014        let artifact_dir = temp.path().join("artifacts");
1015        std::fs::create_dir_all(&artifact_dir).unwrap();
1016        let canonical_dir = artifact_dir.canonicalize().unwrap();
1017        let writer = ArtifactWriter::new(canonical_dir, "test-workflow");
1018
1019        let cas_file = temp.path().join("readonly");
1020        std::fs::write(&cas_file, b"read-only content").unwrap();
1021
1022        // Make the source file read-only
1023        let mut perms = std::fs::metadata(&cas_file).unwrap().permissions();
1024        perms.set_readonly(true);
1025        std::fs::set_permissions(&cas_file, perms).unwrap();
1026
1027        let request = BinaryWriteRequest {
1028            task_id: "task1".to_string(),
1029            output_path: "output.bin".to_string(),
1030            source: BinarySource::CasPath(cas_file.clone()),
1031            expected_size: 17,
1032        };
1033
1034        let result = writer.write_binary(request).await.unwrap();
1035        assert_eq!(result.size, 17);
1036        let content = std::fs::read(&result.path).unwrap();
1037        assert_eq!(content, b"read-only content");
1038
1039        // Restore permissions for cleanup
1040        use std::os::unix::fs::PermissionsExt;
1041        let perms = std::fs::Permissions::from_mode(0o644);
1042        std::fs::set_permissions(&cas_file, perms).unwrap();
1043    }
1044
1045    #[tokio::test]
1046    async fn test_write_binary_no_partial_file_on_missing_source() {
1047        // Verify that when reflink_or_copy fails, no partial destination file is left
1048        let temp = tempdir().unwrap();
1049        let artifact_dir = temp.path().join("artifacts");
1050        std::fs::create_dir_all(&artifact_dir).unwrap();
1051        let canonical_dir = artifact_dir.canonicalize().unwrap();
1052        let writer = ArtifactWriter::new(&canonical_dir, "test-workflow");
1053
1054        let request = BinaryWriteRequest {
1055            task_id: "task1".to_string(),
1056            output_path: "should_not_exist.bin".to_string(),
1057            source: BinarySource::CasPath(PathBuf::from("/nonexistent/path/file")),
1058            expected_size: 100,
1059        };
1060
1061        let _ = writer.write_binary(request).await;
1062
1063        // Verify no partial file was left at the destination
1064        let ghost = canonical_dir.join("should_not_exist.bin");
1065        assert!(
1066            !ghost.exists(),
1067            "Failed copy should not leave a partial file at: {}",
1068            ghost.display()
1069        );
1070    }
1071
1072    /// Stress test: 10 concurrent binary writes to the SAME output path.
1073    ///
1074    /// Verifies the retry loop handles the TOCTOU race between remove_file
1075    /// and reflink_or_copy (which uses create_new / O_EXCL internally).
1076    /// All writes must succeed and the final file must be consistent.
1077    #[tokio::test]
1078    async fn test_write_binary_concurrent_same_path() {
1079        use std::sync::Arc as StdArc;
1080        use tokio::task::JoinSet;
1081
1082        let temp = tempdir().unwrap();
1083        let artifact_dir = temp.path().join("artifacts");
1084        std::fs::create_dir_all(&artifact_dir).unwrap();
1085        let canonical_dir = artifact_dir.canonicalize().unwrap();
1086        let writer = StdArc::new(ArtifactWriter::new(&canonical_dir, "test-workflow"));
1087
1088        // Create 10 distinct CAS source files with identifiable content
1089        let cas_dir = temp.path().join("cas");
1090        std::fs::create_dir_all(&cas_dir).unwrap();
1091
1092        let mut cas_files = Vec::new();
1093        for i in 0..10u8 {
1094            let cas_file = cas_dir.join(format!("source_{}", i));
1095            // Each file has unique content: 1KB of repeated byte value
1096            let data = vec![i; 1024];
1097            std::fs::write(&cas_file, &data).unwrap();
1098            cas_files.push(cas_file);
1099        }
1100
1101        // Spawn 10 concurrent write_binary calls to the SAME output path
1102        let mut join_set = JoinSet::new();
1103        for (i, cas_file) in cas_files.into_iter().enumerate() {
1104            let writer = StdArc::clone(&writer);
1105            join_set.spawn(async move {
1106                let request = BinaryWriteRequest {
1107                    task_id: format!("task_{}", i),
1108                    output_path: "shared_output.bin".to_string(),
1109                    source: BinarySource::CasPath(cas_file),
1110                    expected_size: 1024,
1111                };
1112                (i, writer.write_binary(request).await)
1113            });
1114        }
1115
1116        // Collect results — all should succeed (no errors)
1117        let mut successes = 0;
1118        let mut errors = 0;
1119        while let Some(join_result) = join_set.join_next().await {
1120            let (idx, write_result) = join_result.expect("task should not panic");
1121            match write_result {
1122                Ok(result) => {
1123                    assert_eq!(result.size, 1024, "task_{} wrote wrong size", idx);
1124                    successes += 1;
1125                }
1126                Err(e) => {
1127                    eprintln!("task_{} failed: {}", idx, e);
1128                    errors += 1;
1129                }
1130            }
1131        }
1132
1133        // ALL writes must succeed — no TOCTOU errors
1134        assert_eq!(
1135            errors, 0,
1136            "All concurrent binary writes should succeed, but {} failed",
1137            errors
1138        );
1139        assert_eq!(successes, 10, "All 10 writes should succeed");
1140
1141        // The final file must contain valid data (1024 bytes of a single repeated value)
1142        let final_path = canonical_dir.join("shared_output.bin");
1143        let content = std::fs::read(&final_path).unwrap();
1144        assert_eq!(content.len(), 1024, "File should be exactly 1024 bytes");
1145
1146        // All bytes should be the same value (from one consistent writer)
1147        let first_byte = content[0];
1148        assert!(
1149            content.iter().all(|&b| b == first_byte),
1150            "File content should be consistent (all bytes from one writer), \
1151             but found mixed data — indicates corruption from concurrent writes"
1152        );
1153    }
1154}