Skip to main content

nika_engine/runtime/
context_loader.rs

1//! Context Loader - Load files at workflow start
2//!
3//! Loads files declared in the `context:` block at workflow start.
4//! Files are made available via `{{context.files.alias}}` bindings.
5//!
6//! # Supported file types
7//!
8//! - `.json`: Parsed as JSON object
9//! - `.yaml`, `.yml`: Parsed as YAML object
10//! - `.md`, `.txt`, etc.: Loaded as string
11//! - Glob patterns (`*.md`): Loaded as array of strings
12//!
13//! # Example
14//!
15//! ```yaml
16//! context:
17//!   files:
18//!     brand: ./context/brand.md        # String
19//!     persona: ./context/persona.json  # JSON object
20//!     examples: ./context/*.md         # Array of strings
21//!   session: .nika/sessions/prev.json  # Session restore
22//! ```
23
24use crate::serde_yaml;
25use std::path::Path;
26
27use globset::GlobBuilder;
28use ignore::WalkBuilder;
29use serde_json::Value;
30
31use crate::ast::analyzed::AnalyzedContextFile;
32use crate::ast::context::ContextConfig;
33use crate::error::NikaError;
34
35/// Validate that a path stays within the project boundary
36///
37/// Prevents path traversal attacks where context file paths could escape
38/// the project directory using `../` or symlinks.
39///
40/// # Security
41///
42/// This is a security-critical function. The canonical path of the loaded
43/// file must be under the canonical base path to prevent:
44/// - Reading sensitive files outside project (e.g., `/etc/passwd`)
45/// - Loading files from parent directories
46/// - Symlink attacks pointing outside project
47fn validate_path_boundary(base_path: &Path, target_path: &Path) -> Result<(), NikaError> {
48    // SECURITY: Use centralized path validation from io::security
49    // This ensures consistent security checks across all file loading operations
50    crate::io::security::validate_canonicalized_boundary(base_path, target_path).map_err(|e| {
51        NikaError::ContextLoadError {
52            alias: String::new(),
53            path: e.target_path.display().to_string(),
54            reason: e.reason,
55        }
56    })
57}
58
59// Re-export LoadedContext from store (canonical location)
60pub use crate::store::LoadedContext;
61
62// ═══════════════════════════════════════════════════════════════════════════
63// CONTEXT LOADER
64// ═══════════════════════════════════════════════════════════════════════════
65
66/// Load context files at workflow start
67///
68/// # Arguments
69///
70/// * `config` - Context configuration from workflow
71/// * `base_path` - Base directory for resolving relative paths
72///
73/// # Returns
74///
75/// Loaded context with all files resolved
76///
77/// # Errors
78///
79/// Returns `NikaError` if:
80/// - File not found
81/// - Invalid JSON/YAML
82/// - Invalid glob pattern
83pub async fn load_context(
84    config: &ContextConfig,
85    base_path: &Path,
86) -> Result<LoadedContext, NikaError> {
87    let mut context = LoadedContext::new();
88
89    // Load each file entry
90    for (alias, path_pattern) in &config.files {
91        let value = if is_glob_pattern(path_pattern) {
92            // Glob pattern → array of strings (validation happens inside)
93            load_glob_files(path_pattern, base_path).await?
94        } else {
95            // Single file - validate path boundary
96            let full_path = base_path.join(path_pattern);
97            validate_path_boundary(base_path, &full_path)?;
98            load_single_file(&full_path).await?
99        };
100        context.files.insert(alias.to_string(), value);
101    }
102
103    // Load session if specified
104    if let Some(session_path) = &config.session {
105        let full_path = base_path.join(session_path);
106        // Security: Validate session path stays within project
107        if full_path.exists() {
108            validate_path_boundary(base_path, &full_path)?;
109            let content = tokio::fs::read_to_string(&full_path).await.map_err(|e| {
110                NikaError::ContextLoadError {
111                    alias: "session".to_string(),
112                    path: full_path.display().to_string(),
113                    reason: e.to_string(),
114                }
115            })?;
116            let session: Value =
117                serde_json::from_str(&content).map_err(|e| NikaError::ContextLoadError {
118                    alias: "session".to_string(),
119                    path: full_path.display().to_string(),
120                    reason: format!("Invalid JSON: {}", e),
121                })?;
122            context.session = Some(session);
123        }
124    }
125
126    Ok(context)
127}
128
129/// Load context files from AnalyzedWorkflow's context_files vec.
130///
131/// AnalyzedWorkflow stores context as `Vec<AnalyzedContextFile>` instead of
132/// `ContextConfig { files: HashMap<String, String>, session }`. This function
133/// adapts to the analyzed shape.
134///
135/// Each `AnalyzedContextFile` has:
136/// - `path`: file path (may contain globs)
137/// - `alias`: optional alias for the context key (defaults to filename stem)
138/// - `max_bytes`: optional size limit (not yet enforced)
139pub async fn load_context_analyzed(
140    context_files: &[AnalyzedContextFile],
141    base_path: &Path,
142) -> Result<LoadedContext, NikaError> {
143    let mut context = LoadedContext::new();
144
145    for cf in context_files {
146        let alias = cf.alias.clone().unwrap_or_else(|| path_to_alias(&cf.path));
147
148        let value = if is_glob_pattern(&cf.path) {
149            load_glob_files(&cf.path, base_path).await?
150        } else {
151            let full_path = base_path.join(&cf.path);
152            validate_path_boundary(base_path, &full_path)?;
153            load_single_file(&full_path).await?
154        };
155        context.files.insert(alias, value);
156    }
157
158    Ok(context)
159}
160
161/// Derive an alias from a file path (stem without extension)
162fn path_to_alias(path: &str) -> String {
163    std::path::Path::new(path)
164        .file_stem()
165        .and_then(|s| s.to_str())
166        .unwrap_or("file")
167        .to_string()
168}
169
170/// Check if a path pattern contains glob characters
171fn is_glob_pattern(pattern: &str) -> bool {
172    pattern.contains('*') || pattern.contains('?') || pattern.contains('[')
173}
174
175/// Load a single file
176async fn load_single_file(path: &Path) -> Result<Value, NikaError> {
177    let content =
178        tokio::fs::read_to_string(path)
179            .await
180            .map_err(|e| NikaError::ContextLoadError {
181                alias: String::new(),
182                path: path.display().to_string(),
183                reason: e.to_string(),
184            })?;
185
186    // Parse based on extension
187    match path.extension().and_then(|e| e.to_str()) {
188        Some("json") => serde_json::from_str(&content).map_err(|e| NikaError::ContextLoadError {
189            alias: String::new(),
190            path: path.display().to_string(),
191            reason: format!("Invalid JSON: {}", e),
192        }),
193        Some("yaml") | Some("yml") => {
194            serde_yaml::from_str(&content).map_err(|e| NikaError::ContextLoadError {
195                alias: String::new(),
196                path: path.display().to_string(),
197                reason: format!("Invalid YAML: {}", e),
198            })
199        }
200        _ => Ok(Value::String(content)),
201    }
202}
203
204/// Load files matching a glob pattern
205async fn load_glob_files(pattern: &str, base_path: &Path) -> Result<Value, NikaError> {
206    // Extract directory and pattern parts
207    // e.g., "./context/*.md" → base = "./context", pattern = "*.md"
208    let pattern_path = Path::new(pattern);
209    let parent = pattern_path.parent().unwrap_or(Path::new("."));
210    let file_pattern = pattern_path
211        .file_name()
212        .and_then(|n| n.to_str())
213        .unwrap_or("*");
214
215    let search_dir = base_path.join(parent);
216
217    // Security: Validate glob search directory stays within project
218    if search_dir.exists() {
219        validate_path_boundary(base_path, &search_dir)?;
220    }
221
222    // Build glob matcher
223    let glob = GlobBuilder::new(file_pattern)
224        .literal_separator(true)
225        .build()
226        .map_err(|e| NikaError::ContextLoadError {
227            alias: String::new(),
228            path: pattern.to_string(),
229            reason: format!("Invalid glob pattern: {}", e),
230        })?
231        .compile_matcher();
232
233    // Walk directory and collect matches
234    let mut results = Vec::new();
235
236    if !search_dir.exists() {
237        return Ok(Value::Array(Vec::new()));
238    }
239
240    let walker = WalkBuilder::new(&search_dir)
241        .hidden(false)
242        .max_depth(Some(1)) // Only immediate directory
243        .build();
244
245    for entry in walker {
246        let entry = match entry {
247            Ok(e) => e,
248            Err(_) => continue,
249        };
250
251        let path = entry.path();
252        if !path.is_file() {
253            continue;
254        }
255
256        let file_name = match path.file_name().and_then(|n| n.to_str()) {
257            Some(n) => n,
258            None => continue,
259        };
260
261        if glob.is_match(file_name) {
262            let content =
263                tokio::fs::read_to_string(path)
264                    .await
265                    .map_err(|e| NikaError::ContextLoadError {
266                        alias: String::new(),
267                        path: path.display().to_string(),
268                        reason: e.to_string(),
269                    })?;
270            results.push(Value::String(content));
271        }
272    }
273
274    Ok(Value::Array(results))
275}
276
277// ═══════════════════════════════════════════════════════════════════════════
278// TESTS
279// ═══════════════════════════════════════════════════════════════════════════
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284    use rustc_hash::FxHashMap;
285    use tempfile::TempDir;
286    use tokio::fs;
287
288    #[test]
289    fn test_loaded_context_default() {
290        let context = LoadedContext::default();
291        assert!(context.is_empty());
292        assert_eq!(context.file_count(), 0);
293    }
294
295    #[test]
296    fn test_loaded_context_get_file() {
297        let mut context = LoadedContext::new();
298        context
299            .files
300            .insert("test".to_string(), Value::String("content".to_string()));
301
302        assert!(context.get_file("test").is_some());
303        assert!(context.get_file("nonexistent").is_none());
304    }
305
306    #[test]
307    fn test_is_glob_pattern() {
308        assert!(is_glob_pattern("*.md"));
309        assert!(is_glob_pattern("**/*.rs"));
310        assert!(is_glob_pattern("file?.txt"));
311        assert!(is_glob_pattern("[abc].txt"));
312        assert!(!is_glob_pattern("file.txt"));
313        assert!(!is_glob_pattern("./context/brand.md"));
314    }
315
316    #[tokio::test]
317    async fn test_load_single_file_text() {
318        let temp_dir = TempDir::new().unwrap();
319        let file_path = temp_dir.path().join("test.md");
320        fs::write(&file_path, "# Hello World").await.unwrap();
321
322        let result = load_single_file(&file_path).await.unwrap();
323        assert_eq!(result, Value::String("# Hello World".to_string()));
324    }
325
326    #[tokio::test]
327    async fn test_load_single_file_json() {
328        let temp_dir = TempDir::new().unwrap();
329        let file_path = temp_dir.path().join("test.json");
330        fs::write(&file_path, r#"{"name": "test", "value": 42}"#)
331            .await
332            .unwrap();
333
334        let result = load_single_file(&file_path).await.unwrap();
335        assert!(result.is_object());
336        assert_eq!(result["name"], "test");
337        assert_eq!(result["value"], 42);
338    }
339
340    #[tokio::test]
341    async fn test_load_single_file_yaml() {
342        let temp_dir = TempDir::new().unwrap();
343        let file_path = temp_dir.path().join("test.yaml");
344        fs::write(&file_path, "name: test\nvalue: 42")
345            .await
346            .unwrap();
347
348        let result = load_single_file(&file_path).await.unwrap();
349        assert!(result.is_object());
350        assert_eq!(result["name"], "test");
351        assert_eq!(result["value"], 42);
352    }
353
354    #[tokio::test]
355    async fn test_load_glob_files() {
356        let temp_dir = TempDir::new().unwrap();
357        let context_dir = temp_dir.path().join("context");
358        fs::create_dir(&context_dir).await.unwrap();
359
360        // Create test files
361        fs::write(context_dir.join("file1.md"), "# File 1")
362            .await
363            .unwrap();
364        fs::write(context_dir.join("file2.md"), "# File 2")
365            .await
366            .unwrap();
367        fs::write(context_dir.join("other.txt"), "Other file")
368            .await
369            .unwrap();
370
371        let result = load_glob_files("context/*.md", temp_dir.path())
372            .await
373            .unwrap();
374        let arr = result.as_array().unwrap();
375        assert_eq!(arr.len(), 2);
376    }
377
378    #[tokio::test]
379    async fn test_load_context_full() {
380        let temp_dir = TempDir::new().unwrap();
381
382        // Create files
383        fs::write(temp_dir.path().join("brand.md"), "# Brand Guide")
384            .await
385            .unwrap();
386        fs::write(temp_dir.path().join("persona.json"), r#"{"name": "Agent"}"#)
387            .await
388            .unwrap();
389
390        let config = ContextConfig {
391            files: {
392                let mut m = FxHashMap::default();
393                m.insert("brand".to_string(), "brand.md".to_string());
394                m.insert("persona".to_string(), "persona.json".to_string());
395                m
396            },
397            session: None,
398        };
399
400        let context = load_context(&config, temp_dir.path()).await.unwrap();
401        assert_eq!(context.file_count(), 2);
402        assert!(context.get_file("brand").is_some());
403        assert!(context.get_file("persona").is_some());
404    }
405
406    #[tokio::test]
407    async fn test_load_context_with_session() {
408        let temp_dir = TempDir::new().unwrap();
409        let sessions_dir = temp_dir.path().join(".nika/sessions");
410        fs::create_dir_all(&sessions_dir).await.unwrap();
411        fs::write(
412            sessions_dir.join("prev.json"),
413            r#"{"focus_areas": ["rust", "ai"]}"#,
414        )
415        .await
416        .unwrap();
417
418        let config = ContextConfig {
419            files: FxHashMap::default(),
420            session: Some(".nika/sessions/prev.json".to_string()),
421        };
422
423        let context = load_context(&config, temp_dir.path()).await.unwrap();
424        assert!(context.session.is_some());
425        let session = context.session.as_ref().unwrap();
426        assert!(session["focus_areas"].is_array());
427    }
428
429    #[tokio::test]
430    async fn test_load_context_missing_session_ok() {
431        let temp_dir = TempDir::new().unwrap();
432
433        let config = ContextConfig {
434            files: FxHashMap::default(),
435            session: Some(".nika/sessions/nonexistent.json".to_string()),
436        };
437
438        // Missing session file is OK (not an error)
439        let context = load_context(&config, temp_dir.path()).await.unwrap();
440        assert!(context.session.is_none());
441    }
442
443    #[tokio::test]
444    async fn test_load_context_missing_file_error() {
445        let temp_dir = TempDir::new().unwrap();
446
447        let config = ContextConfig {
448            files: {
449                let mut m = FxHashMap::default();
450                m.insert("missing".to_string(), "nonexistent.md".to_string());
451                m
452            },
453            session: None,
454        };
455
456        let result = load_context(&config, temp_dir.path()).await;
457        assert!(result.is_err());
458    }
459
460    #[tokio::test]
461    async fn test_load_context_path_traversal_detection() {
462        let temp_dir = TempDir::new().unwrap();
463
464        // Create a project subdirectory
465        let project_dir = temp_dir.path().join("project");
466        fs::create_dir(&project_dir).await.unwrap();
467
468        // Create a secret file outside project
469        fs::write(temp_dir.path().join("secret.md"), "# Secret content")
470            .await
471            .unwrap();
472
473        // Try to load file using path traversal
474        let config = ContextConfig {
475            files: {
476                let mut m = FxHashMap::default();
477                m.insert("secret".to_string(), "../secret.md".to_string());
478                m
479            },
480            session: None,
481        };
482
483        let result = load_context(&config, &project_dir).await;
484        assert!(result.is_err());
485        let err_str = result.unwrap_err().to_string();
486        assert!(
487            err_str.contains("Path traversal") || err_str.contains("outside project"),
488            "Expected path traversal error, got: {}",
489            err_str
490        );
491    }
492
493    #[test]
494    fn test_validate_path_boundary() {
495        let temp_dir = TempDir::new().unwrap();
496        let project = temp_dir.path().join("project");
497        std::fs::create_dir_all(&project).unwrap();
498
499        // Create files
500        let valid_file = project.join("valid.md");
501        std::fs::write(&valid_file, "test").unwrap();
502
503        let outside_file = temp_dir.path().join("outside.md");
504        std::fs::write(&outside_file, "test").unwrap();
505
506        // Valid path within boundary
507        assert!(validate_path_boundary(&project, &valid_file).is_ok());
508
509        // Invalid path outside boundary
510        let result = validate_path_boundary(&project, &outside_file);
511        assert!(result.is_err());
512    }
513
514    // =========================================================================
515    // Additional Error Path Tests
516    // =========================================================================
517
518    #[tokio::test]
519    async fn test_load_single_file_invalid_json() {
520        let temp_dir = TempDir::new().unwrap();
521        let file_path = temp_dir.path().join("invalid.json");
522        fs::write(&file_path, "{ not valid json ]").await.unwrap();
523
524        let result = load_single_file(&file_path).await;
525        assert!(result.is_err());
526        let err_str = result.unwrap_err().to_string();
527        assert!(
528            err_str.contains("Invalid JSON"),
529            "Expected Invalid JSON error, got: {}",
530            err_str
531        );
532    }
533
534    #[tokio::test]
535    async fn test_load_single_file_invalid_yaml() {
536        let temp_dir = TempDir::new().unwrap();
537        let file_path = temp_dir.path().join("invalid.yaml");
538        // Invalid YAML: indentation error
539        fs::write(&file_path, "name: test\n  bad: indent\n key: value")
540            .await
541            .unwrap();
542
543        let result = load_single_file(&file_path).await;
544        assert!(result.is_err());
545        let err_str = result.unwrap_err().to_string();
546        assert!(
547            err_str.contains("Invalid YAML"),
548            "Expected Invalid YAML error, got: {}",
549            err_str
550        );
551    }
552
553    #[tokio::test]
554    async fn test_load_single_file_not_found() {
555        let temp_dir = TempDir::new().unwrap();
556        let file_path = temp_dir.path().join("nonexistent.txt");
557
558        let result = load_single_file(&file_path).await;
559        assert!(result.is_err());
560        let err_str = result.unwrap_err().to_string();
561        assert!(
562            err_str.contains("No such file") || err_str.contains("nonexistent"),
563            "Expected file not found error, got: {}",
564            err_str
565        );
566    }
567
568    #[tokio::test]
569    async fn test_load_context_invalid_session_json() {
570        let temp_dir = TempDir::new().unwrap();
571        let sessions_dir = temp_dir.path().join(".nika/sessions");
572        fs::create_dir_all(&sessions_dir).await.unwrap();
573        fs::write(sessions_dir.join("bad.json"), "{ invalid json }")
574            .await
575            .unwrap();
576
577        let config = ContextConfig {
578            files: FxHashMap::default(),
579            session: Some(".nika/sessions/bad.json".to_string()),
580        };
581
582        let result = load_context(&config, temp_dir.path()).await;
583        assert!(result.is_err());
584        let err_str = result.unwrap_err().to_string();
585        assert!(
586            err_str.contains("Invalid JSON"),
587            "Expected Invalid JSON error for session, got: {}",
588            err_str
589        );
590    }
591
592    #[tokio::test]
593    async fn test_load_glob_files_nonexistent_directory() {
594        let temp_dir = TempDir::new().unwrap();
595
596        // Try to glob in a directory that doesn't exist
597        let result = load_glob_files("nonexistent_dir/*.md", temp_dir.path()).await;
598        // Should return empty array, not error
599        assert!(result.is_ok());
600        let arr = result.unwrap();
601        assert!(arr.is_array());
602        assert_eq!(arr.as_array().unwrap().len(), 0);
603    }
604
605    #[tokio::test]
606    async fn test_load_glob_files_no_matches() {
607        let temp_dir = TempDir::new().unwrap();
608        let context_dir = temp_dir.path().join("context");
609        fs::create_dir(&context_dir).await.unwrap();
610
611        // Create a file that doesn't match the pattern
612        fs::write(context_dir.join("file.txt"), "content")
613            .await
614            .unwrap();
615
616        let result = load_glob_files("context/*.md", temp_dir.path()).await;
617        assert!(result.is_ok());
618        let arr = result.unwrap().as_array().unwrap().clone();
619        assert_eq!(arr.len(), 0);
620    }
621
622    #[test]
623    fn test_validate_path_boundary_nonexistent_target() {
624        let temp_dir = TempDir::new().unwrap();
625        let project = temp_dir.path().join("project");
626        std::fs::create_dir_all(&project).unwrap();
627
628        // Try to validate a path that doesn't exist
629        let nonexistent = project.join("does_not_exist.txt");
630        let result = validate_path_boundary(&project, &nonexistent);
631        assert!(result.is_err());
632        let err_str = result.unwrap_err().to_string();
633        assert!(
634            err_str.contains("Cannot resolve target path"),
635            "Expected cannot resolve error, got: {}",
636            err_str
637        );
638    }
639
640    #[tokio::test]
641    async fn test_load_context_error_contains_alias() {
642        let temp_dir = TempDir::new().unwrap();
643
644        let config = ContextConfig {
645            files: {
646                let mut m = FxHashMap::default();
647                m.insert("my_alias".to_string(), "nonexistent.md".to_string());
648                m
649            },
650            session: None,
651        };
652
653        let result = load_context(&config, temp_dir.path()).await;
654        assert!(result.is_err());
655        // The error should include the file path for debugging
656        let err_str = result.unwrap_err().to_string();
657        assert!(
658            err_str.contains("nonexistent.md"),
659            "Error should include file path, got: {}",
660            err_str
661        );
662    }
663}