cuenv_core/tasks/
discovery.rs

1//! Task discovery across monorepo workspaces
2//!
3//! This module provides functionality to discover tasks across a monorepo,
4//! supporting TaskRef resolution and TaskMatcher-based task discovery.
5
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8
9use ignore::WalkBuilder;
10use regex::Regex;
11
12use crate::manifest::{ArgMatcher, Project, TaskMatcher, TaskRef};
13use crate::tasks::{Task, TaskIndex};
14
15/// A discovered project in the workspace
16#[derive(Debug, Clone)]
17pub struct DiscoveredProject {
18    /// Path to the env.cue file
19    pub env_cue_path: PathBuf,
20    /// Path to the project root (directory containing env.cue)
21    pub project_root: PathBuf,
22    /// The parsed manifest
23    pub manifest: Project,
24}
25
26/// Result of matching a task
27#[derive(Debug, Clone)]
28pub struct MatchedTask {
29    /// Path to the project containing this task
30    pub project_root: PathBuf,
31    /// Name of the task
32    pub task_name: String,
33    /// The task definition
34    pub task: Task,
35    /// Project name (from env.cue name field)
36    pub project_name: Option<String>,
37}
38
39/// Function type for evaluating env.cue files
40pub type EvalFn = Box<dyn Fn(&Path) -> Result<Project, String> + Send + Sync>;
41
42/// Discovers tasks across a monorepo workspace
43pub struct TaskDiscovery {
44    /// Root directory of the workspace
45    workspace_root: PathBuf,
46    /// Cached project index: name -> project
47    name_index: HashMap<String, DiscoveredProject>,
48    /// All discovered projects
49    projects: Vec<DiscoveredProject>,
50    /// Function to evaluate env.cue files
51    eval_fn: Option<EvalFn>,
52}
53
54impl TaskDiscovery {
55    /// Create a new TaskDiscovery for the given workspace root
56    pub fn new(workspace_root: PathBuf) -> Self {
57        Self {
58            workspace_root,
59            name_index: HashMap::new(),
60            projects: Vec::new(),
61            eval_fn: None,
62        }
63    }
64
65    /// Set the evaluation function for loading env.cue files
66    pub fn with_eval_fn(mut self, eval_fn: EvalFn) -> Self {
67        self.eval_fn = Some(eval_fn);
68        self
69    }
70
71    /// Discover all projects in the workspace
72    ///
73    /// This scans for env.cue files using the ignore crate to respect .gitignore
74    /// and builds the name -> project index.
75    /// Requires an eval function to be set via `with_eval_fn`.
76    ///
77    /// Projects that fail to load are logged as warnings but don't stop discovery.
78    /// A summary of failures is logged at the end if any occurred.
79    pub fn discover(&mut self) -> Result<(), DiscoveryError> {
80        self.projects.clear();
81        self.name_index.clear();
82
83        let eval_fn = self
84            .eval_fn
85            .as_ref()
86            .ok_or(DiscoveryError::NoEvalFunction)?;
87
88        // Build a walker that respects gitignore
89        // We start from workspace_root
90        let walker = WalkBuilder::new(&self.workspace_root)
91            .follow_links(true)
92            .standard_filters(true) // Enable .gitignore, .ignore, hidden file filtering
93            .build();
94
95        // Track failures for summary
96        let mut load_failures: Vec<(PathBuf, String)> = Vec::new();
97
98        for result in walker {
99            match result {
100                Ok(entry) => {
101                    let path = entry.path();
102                    if path.file_name() == Some("env.cue".as_ref()) {
103                        match self.load_project(path, eval_fn) {
104                            Ok(project) => {
105                                // Build name index
106                                let name = project.manifest.name.trim();
107                                if !name.is_empty() {
108                                    self.name_index.insert(name.to_string(), project.clone());
109                                }
110                                self.projects.push(project);
111                            }
112                            Err(e) => {
113                                let error_msg = e.to_string();
114                                tracing::warn!(
115                                    path = %path.display(),
116                                    error = %error_msg,
117                                    "Failed to load project - tasks from this project will not be available"
118                                );
119                                load_failures.push((path.to_path_buf(), error_msg));
120                            }
121                        }
122                    }
123                }
124                Err(err) => {
125                    tracing::warn!(
126                        error = %err,
127                        "Error during workspace scan - some projects may not be discovered"
128                    );
129                }
130            }
131        }
132
133        // Log summary of failures
134        if !load_failures.is_empty() {
135            tracing::warn!(
136                count = load_failures.len(),
137                "Some projects failed to load during discovery. \
138                 Fix CUE errors in these projects or add them to .gitignore to exclude. \
139                 Run with RUST_LOG=debug for details."
140            );
141        }
142
143        tracing::debug!(
144            discovered = self.projects.len(),
145            named = self.name_index.len(),
146            failures = load_failures.len(),
147            "Workspace discovery complete"
148        );
149
150        Ok(())
151    }
152
153    /// Add a pre-loaded project to the discovery
154    ///
155    /// This is useful when you already have a Project manifest loaded.
156    pub fn add_project(&mut self, project_root: PathBuf, manifest: Project) {
157        let env_cue_path = project_root.join("env.cue");
158        let project = DiscoveredProject {
159            env_cue_path,
160            project_root,
161            manifest: manifest.clone(),
162        };
163
164        // Build name index
165        let name = manifest.name.trim();
166        if !name.is_empty() {
167            self.name_index.insert(name.to_string(), project.clone());
168        }
169        self.projects.push(project);
170    }
171
172    /// Load a single project from its env.cue path
173    fn load_project(
174        &self,
175        env_cue_path: &Path,
176        eval_fn: &EvalFn,
177    ) -> Result<DiscoveredProject, DiscoveryError> {
178        let project_root = env_cue_path
179            .parent()
180            .ok_or_else(|| DiscoveryError::InvalidPath(env_cue_path.to_path_buf()))?
181            .to_path_buf();
182
183        // Use provided eval function to evaluate the env.cue file
184        let manifest = eval_fn(&project_root)
185            .map_err(|e| DiscoveryError::EvalError(env_cue_path.to_path_buf(), e))?;
186
187        Ok(DiscoveredProject {
188            env_cue_path: env_cue_path.to_path_buf(),
189            project_root,
190            manifest,
191        })
192    }
193
194    /// Resolve a TaskRef to its actual task definition
195    ///
196    /// Returns the project root and the task if found
197    pub fn resolve_ref(&self, task_ref: &TaskRef) -> Result<MatchedTask, DiscoveryError> {
198        let (project_name, task_name) = task_ref
199            .parse()
200            .ok_or_else(|| DiscoveryError::InvalidTaskRef(task_ref.ref_.clone()))?;
201
202        let project = self
203            .name_index
204            .get(&project_name)
205            .ok_or_else(|| DiscoveryError::ProjectNotFound(project_name.clone()))?;
206
207        let task_def =
208            project.manifest.tasks.get(&task_name).ok_or_else(|| {
209                DiscoveryError::TaskNotFound(project_name.clone(), task_name.clone())
210            })?;
211
212        // We only support single tasks, not task groups, for TaskRef
213        let task = task_def
214            .as_single()
215            .ok_or_else(|| DiscoveryError::TaskIsGroup(project_name.clone(), task_name.clone()))?
216            .clone();
217
218        Ok(MatchedTask {
219            project_root: project.project_root.clone(),
220            task_name,
221            task,
222            project_name: Some(project.manifest.name.clone()).filter(|s| !s.trim().is_empty()),
223        })
224    }
225
226    /// Find all tasks matching a TaskMatcher
227    ///
228    /// Returns an error if any regex pattern in the matcher is invalid.
229    pub fn match_tasks(&self, matcher: &TaskMatcher) -> Result<Vec<MatchedTask>, DiscoveryError> {
230        // Pre-compile arg matchers to catch regex errors early and avoid recompilation
231        let compiled_arg_matchers = match &matcher.args {
232            Some(arg_matchers) => Some(compile_arg_matchers(arg_matchers)?),
233            None => None,
234        };
235
236        let mut matches = Vec::new();
237
238        for project in &self.projects {
239            // Filter by workspace membership if specified
240            if let Some(required_workspaces) = &matcher.workspaces {
241                if let Some(project_workspaces) = &project.manifest.workspaces {
242                    let in_workspace = required_workspaces
243                        .iter()
244                        .any(|ws| project_workspaces.contains_key(ws));
245                    if !in_workspace {
246                        continue;
247                    }
248                } else {
249                    // Project has no workspaces defined, skip if we require specific ones
250                    continue;
251                }
252            }
253
254            // Use the canonical TaskIndex to include tasks nested in parallel groups.
255            let index = TaskIndex::build(&project.manifest.tasks).map_err(|e| {
256                DiscoveryError::TaskIndexError(project.env_cue_path.clone(), e.to_string())
257            })?;
258
259            // Check each addressable single task in the project
260            for entry in index.list() {
261                let Some(task) = entry.definition.as_single() else {
262                    continue;
263                };
264
265                // Match by labels
266                if let Some(required_labels) = &matcher.labels {
267                    let has_all_labels = required_labels
268                        .iter()
269                        .all(|label| task.labels.contains(label));
270                    if !has_all_labels {
271                        continue;
272                    }
273                }
274
275                // Match by command
276                if let Some(required_command) = &matcher.command
277                    && &task.command != required_command
278                {
279                    continue;
280                }
281
282                // Match by args using pre-compiled matchers
283                if let Some(ref compiled) = compiled_arg_matchers
284                    && !matches_args_compiled(&task.args, compiled)
285                {
286                    continue;
287                }
288
289                matches.push(MatchedTask {
290                    project_root: project.project_root.clone(),
291                    task_name: entry.name.clone(),
292                    task: task.clone(),
293                    project_name: Some(project.manifest.name.clone())
294                        .filter(|s| !s.trim().is_empty()),
295                });
296            }
297        }
298
299        Ok(matches)
300    }
301
302    /// Get all discovered projects
303    pub fn projects(&self) -> &[DiscoveredProject] {
304        &self.projects
305    }
306
307    /// Get a project by name
308    pub fn get_project(&self, name: &str) -> Option<&DiscoveredProject> {
309        self.name_index.get(name)
310    }
311}
312
313/// Compiled version of ArgMatcher for efficient matching
314#[derive(Debug)]
315struct CompiledArgMatcher {
316    contains: Option<String>,
317    regex: Option<Regex>,
318}
319
320impl CompiledArgMatcher {
321    /// Compile an ArgMatcher, validating regex patterns
322    fn compile(matcher: &ArgMatcher) -> Result<Self, DiscoveryError> {
323        let regex = match &matcher.matches {
324            Some(pattern) => {
325                // Use regex with size limits to prevent ReDoS
326                let regex = regex::RegexBuilder::new(pattern)
327                    .size_limit(1024 * 1024) // 1MB compiled size limit
328                    .build()
329                    .map_err(|e| DiscoveryError::InvalidRegex(pattern.clone(), e.to_string()))?;
330                Some(regex)
331            }
332            None => None,
333        };
334        Ok(Self {
335            contains: matcher.contains.clone(),
336            regex,
337        })
338    }
339
340    /// Check if any argument matches this matcher
341    fn matches(&self, args: &[String]) -> bool {
342        // If both are None, this matcher matches nothing (conservative behavior)
343        if self.contains.is_none() && self.regex.is_none() {
344            return false;
345        }
346
347        args.iter().any(|arg| {
348            if let Some(substring) = &self.contains
349                && arg.contains(substring)
350            {
351                return true;
352            }
353            if let Some(regex) = &self.regex
354                && regex.is_match(arg)
355            {
356                return true;
357            }
358            false
359        })
360    }
361}
362
363/// Pre-compile all arg matchers, returning errors for invalid patterns
364fn compile_arg_matchers(
365    matchers: &[ArgMatcher],
366) -> Result<Vec<CompiledArgMatcher>, DiscoveryError> {
367    matchers.iter().map(CompiledArgMatcher::compile).collect()
368}
369
370/// Check if task args match all arg matchers (using pre-compiled matchers)
371fn matches_args_compiled(args: &[String], matchers: &[CompiledArgMatcher]) -> bool {
372    matchers.iter().all(|matcher| matcher.matches(args))
373}
374
375/// Errors that can occur during task discovery
376#[derive(Debug, thiserror::Error)]
377pub enum DiscoveryError {
378    #[error("Invalid path: {0}")]
379    InvalidPath(PathBuf),
380
381    #[error("Failed to evaluate {0}: {1}")]
382    EvalError(PathBuf, String),
383
384    #[error("Invalid TaskRef format: {0}")]
385    InvalidTaskRef(String),
386
387    #[error("Project not found: {0}")]
388    ProjectNotFound(String),
389
390    #[error("Task not found: {0}:{1}")]
391    TaskNotFound(String, String),
392
393    #[error("Task {0}:{1} is a group, not a single task")]
394    TaskIsGroup(String, String),
395
396    #[error("No evaluation function provided - use with_eval_fn()")]
397    NoEvalFunction,
398
399    #[error("Invalid regex pattern '{0}': {1}")]
400    InvalidRegex(String, String),
401
402    #[error("Failed to index tasks in {0}: {1}")]
403    TaskIndexError(PathBuf, String),
404
405    #[error("IO error: {0}")]
406    Io(#[from] std::io::Error),
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412    use crate::tasks::{ParallelGroup, TaskDefinition, TaskGroup};
413    use std::collections::HashMap;
414    use std::path::PathBuf;
415
416    #[test]
417    fn test_task_ref_parse() {
418        let task_ref = TaskRef {
419            ref_: "#projen-generator:bun.install".to_string(),
420        };
421        let (project, task) = task_ref.parse().unwrap();
422        assert_eq!(project, "projen-generator");
423        assert_eq!(task, "bun.install");
424    }
425
426    #[test]
427    fn test_task_ref_parse_invalid() {
428        let task_ref = TaskRef {
429            ref_: "invalid".to_string(),
430        };
431        assert!(task_ref.parse().is_none());
432
433        let task_ref = TaskRef {
434            ref_: "#no-task".to_string(),
435        };
436        assert!(task_ref.parse().is_none());
437    }
438
439    /// Helper to compile and match for tests
440    fn matches_args(args: &[String], matchers: &[ArgMatcher]) -> bool {
441        let compiled = compile_arg_matchers(matchers).expect("test matchers should be valid");
442        matches_args_compiled(args, &compiled)
443    }
444
445    #[test]
446    fn test_matches_args_contains() {
447        let args = vec!["run".to_string(), ".projenrc.ts".to_string()];
448        let matchers = vec![ArgMatcher {
449            contains: Some(".projenrc".to_string()),
450            matches: None,
451        }];
452        assert!(matches_args(&args, &matchers));
453    }
454
455    #[test]
456    fn test_matches_args_regex() {
457        let args = vec!["run".to_string(), "test.ts".to_string()];
458        let matchers = vec![ArgMatcher {
459            contains: None,
460            matches: Some(r"\.ts$".to_string()),
461        }];
462        assert!(matches_args(&args, &matchers));
463    }
464
465    #[test]
466    fn test_matches_args_no_match() {
467        let args = vec!["build".to_string()];
468        let matchers = vec![ArgMatcher {
469            contains: Some("test".to_string()),
470            matches: None,
471        }];
472        assert!(!matches_args(&args, &matchers));
473    }
474
475    #[test]
476    fn test_invalid_regex_returns_error() {
477        let matchers = vec![ArgMatcher {
478            contains: None,
479            matches: Some(r"[invalid".to_string()), // Unclosed bracket
480        }];
481        let result = compile_arg_matchers(&matchers);
482        assert!(result.is_err());
483        let err = result.unwrap_err();
484        assert!(matches!(err, DiscoveryError::InvalidRegex(_, _)));
485    }
486
487    #[test]
488    fn test_empty_matcher_matches_nothing() {
489        let args = vec!["anything".to_string()];
490        let matchers = vec![ArgMatcher {
491            contains: None,
492            matches: None,
493        }];
494        // Empty matcher should not match anything
495        assert!(!matches_args(&args, &matchers));
496    }
497
498    #[test]
499    fn test_match_tasks_includes_parallel_group_children() {
500        let mut discovery = TaskDiscovery::new(PathBuf::from("/tmp"));
501
502        let make_task = || Task {
503            command: "echo".into(),
504            labels: vec!["projen".into()],
505            ..Default::default()
506        };
507
508        let mut parallel_tasks = HashMap::new();
509        parallel_tasks.insert(
510            "generate".into(),
511            TaskDefinition::Single(Box::new(make_task())),
512        );
513        parallel_tasks.insert(
514            "types".into(),
515            TaskDefinition::Single(Box::new(make_task())),
516        );
517
518        let mut manifest = Project::new("test");
519        manifest.tasks.insert(
520            "projen".into(),
521            TaskDefinition::Group(TaskGroup::Parallel(ParallelGroup {
522                tasks: parallel_tasks,
523                depends_on: Vec::new(),
524            })),
525        );
526
527        discovery.add_project(PathBuf::from("/tmp/proj"), manifest);
528
529        let matcher = TaskMatcher {
530            workspaces: None,
531            labels: Some(vec!["projen".into()]),
532            command: None,
533            args: None,
534            parallel: true,
535        };
536
537        let matches = discovery.match_tasks(&matcher).unwrap();
538        let names: Vec<String> = matches.into_iter().map(|m| m.task_name).collect();
539        assert_eq!(names.len(), 2);
540        assert!(names.contains(&"projen.generate".to_string()));
541        assert!(names.contains(&"projen.types".to_string()));
542    }
543}