Skip to main content

apcore_cli/
fs_discoverer.rs

1// apcore-cli -- Filesystem-based module discoverer.
2// Scans a directory recursively for module.json descriptor files and
3// produces DiscoveredModule entries for registration in the apcore Registry.
4
5use std::collections::HashMap;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8
9use async_trait::async_trait;
10
11use apcore::context::Context;
12use apcore::errors::{ErrorCode, ModuleError};
13use apcore::module::{Module, ModuleAnnotations};
14use apcore::registry::registry::{DiscoveredModule, Discoverer, ModuleDescriptor};
15
16/// Placeholder `Module` carried in `DiscoveredModule.module` for subprocess-based
17/// modules discovered on the filesystem. It holds the module's schemas so the
18/// registry can report them for validation and description, but `execute()`
19/// intentionally fails — actual invocation goes through the subprocess dispatch
20/// path in `main.rs` which resolves the executable via
21/// [`FsDiscoverer::executables_snapshot`].
22struct SubprocessPlaceholderModule {
23    module_id: String,
24    input_schema: serde_json::Value,
25    output_schema: serde_json::Value,
26    description: String,
27}
28
29#[async_trait]
30impl Module for SubprocessPlaceholderModule {
31    fn description(&self) -> &str {
32        &self.description
33    }
34
35    fn input_schema(&self) -> serde_json::Value {
36        self.input_schema.clone()
37    }
38
39    fn output_schema(&self) -> serde_json::Value {
40        self.output_schema.clone()
41    }
42
43    async fn execute(
44        &self,
45        _inputs: serde_json::Value,
46        _ctx: &Context<serde_json::Value>,
47    ) -> Result<serde_json::Value, ModuleError> {
48        Err(ModuleError::new(
49            ErrorCode::ModuleExecuteError,
50            format!(
51                "Module '{}' is a subprocess module; in-process execute() is \
52                 unsupported. Invoke via the CLI subprocess dispatcher.",
53                self.module_id
54            ),
55        ))
56    }
57}
58
59/// Intermediate struct for deserializing module.json files.
60///
61/// Fields that are optional in the JSON map to defaults suitable for
62/// constructing a full `ModuleDescriptor`.
63#[derive(Debug, serde::Deserialize)]
64struct ModuleJson {
65    name: String,
66    #[serde(default)]
67    description: String,
68    #[serde(default)]
69    tags: Vec<String>,
70    #[serde(default = "default_schema")]
71    input_schema: serde_json::Value,
72    #[serde(default = "default_schema")]
73    output_schema: serde_json::Value,
74    /// Optional relative path to an executable script (e.g. "run.sh").
75    #[serde(default)]
76    executable: Option<String>,
77}
78
79fn default_schema() -> serde_json::Value {
80    serde_json::json!({})
81}
82
83/// Filesystem-based module discoverer.
84///
85/// Recursively walks `root` looking for files named `module.json`, parses each
86/// one into a `DiscoveredModule`, and returns them all from `discover()`.
87pub struct FsDiscoverer {
88    root: PathBuf,
89    /// Map of module name to resolved executable path (built during discovery).
90    executables: std::sync::Mutex<HashMap<String, PathBuf>>,
91}
92
93impl FsDiscoverer {
94    /// Create a new discoverer rooted at the given directory path.
95    pub fn new(root: impl Into<PathBuf>) -> Self {
96        Self {
97            root: root.into(),
98            executables: std::sync::Mutex::new(HashMap::new()),
99        }
100    }
101
102    /// Return the resolved executable path for a module, if one was declared.
103    pub fn get_executable(&self, module_name: &str) -> Option<PathBuf> {
104        match self.executables.lock() {
105            Ok(map) => map.get(module_name).cloned(),
106            Err(_poisoned) => {
107                tracing::warn!("Executables mutex poisoned — returning None for '{module_name}'");
108                None
109            }
110        }
111    }
112
113    /// Return a snapshot of all executable paths discovered so far.
114    pub fn executables_snapshot(&self) -> HashMap<String, PathBuf> {
115        self.executables
116            .lock()
117            .map(|map| map.clone())
118            .unwrap_or_default()
119    }
120
121    /// Scan the extensions directory and return a map of module name to description.
122    ///
123    /// This is a convenience method for populating description metadata that
124    /// `ModuleDescriptor` does not carry. Non-parseable files are silently skipped.
125    pub fn load_descriptions(&self) -> std::collections::HashMap<String, String> {
126        let paths = Self::collect_module_jsons(&self.root);
127        let mut map = std::collections::HashMap::new();
128        for path in paths {
129            if let Ok(content) = std::fs::read_to_string(&path) {
130                if let Ok(mj) = serde_json::from_str::<ModuleJson>(&content) {
131                    if !mj.description.is_empty() {
132                        map.insert(mj.name, mj.description);
133                    }
134                }
135            }
136        }
137        map
138    }
139
140    /// Recursively collect all `module.json` paths under `dir`.
141    ///
142    /// Skips symlinked directories to prevent infinite recursion when the
143    /// extensions tree contains a symlink that points back into an ancestor.
144    fn collect_module_jsons(dir: &Path) -> Vec<PathBuf> {
145        let mut result = Vec::new();
146        let entries = match std::fs::read_dir(dir) {
147            Ok(e) => e,
148            Err(_) => return result,
149        };
150        for entry in entries.flatten() {
151            let path = entry.path();
152            // Do not follow symlinked directories — avoids infinite recursion
153            // when a symlink under the extensions root points back into an
154            // ancestor directory (common in monorepo / workspace layouts).
155            let is_symlink = entry.file_type().map(|t| t.is_symlink()).unwrap_or(false);
156            if path.is_dir() && !is_symlink {
157                result.extend(Self::collect_module_jsons(&path));
158            } else if path.file_name().and_then(|n| n.to_str()) == Some("module.json") {
159                result.push(path);
160            }
161        }
162        result
163    }
164}
165
166#[async_trait]
167impl Discoverer for FsDiscoverer {
168    async fn discover(&self, _roots: &[String]) -> Result<Vec<DiscoveredModule>, ModuleError> {
169        let paths = Self::collect_module_jsons(&self.root);
170        let mut modules = Vec::new();
171
172        for path in paths {
173            // Skip a single unreadable / malformed module.json with a warning
174            // rather than aborting the whole pass — the sibling
175            // load_descriptions() already tolerates the same failures, and
176            // dropping every later module on the floor because of one typo
177            // produces a confusing "registry shrink" symptom for users.
178            let content = match std::fs::read_to_string(&path) {
179                Ok(c) => c,
180                Err(e) => {
181                    tracing::warn!(
182                        "Failed to read module.json '{}': {} — skipping",
183                        path.display(),
184                        e
185                    );
186                    continue;
187                }
188            };
189
190            let mj: ModuleJson = match serde_json::from_str(&content) {
191                Ok(m) => m,
192                Err(e) => {
193                    tracing::warn!(
194                        "Failed to parse module.json '{}': {} — skipping",
195                        path.display(),
196                        e
197                    );
198                    continue;
199                }
200            };
201
202            // Resolve executable path relative to module.json directory.
203            // Security: validate the resolved path stays within the extensions root.
204            if let Some(ref exec_rel) = mj.executable {
205                if let Some(parent) = path.parent() {
206                    let exec_path = parent.join(exec_rel);
207                    if exec_path.exists() {
208                        // Canonicalize both paths to prevent traversal via ../../
209                        // Store the canonicalized form so consumers hold the
210                        // vetted, symlink-resolved path rather than the raw one.
211                        let (exec_canon_res, root_canon_res) =
212                            (exec_path.canonicalize(), self.root.canonicalize());
213                        let safe = match (&exec_canon_res, &root_canon_res) {
214                            (Ok(ec), Ok(rc)) => ec.starts_with(rc),
215                            _ => false,
216                        };
217                        if safe {
218                            let exec_canon = exec_canon_res.unwrap();
219                            match self.executables.lock() {
220                                Ok(mut map) => {
221                                    map.insert(mj.name.clone(), exec_canon);
222                                }
223                                Err(_poisoned) => {
224                                    tracing::warn!(
225                                        "Executables mutex poisoned during discover() — '{}' not registered",
226                                        mj.name
227                                    );
228                                }
229                            }
230                        } else {
231                            tracing::warn!(
232                                "Executable '{}' for module '{}' escapes extensions root — skipped",
233                                exec_path.display(),
234                                mj.name
235                            );
236                        }
237                    }
238                }
239            }
240
241            let module_id = mj.name.clone();
242            let descriptor = ModuleDescriptor {
243                module_id: module_id.clone(),
244                name: None,
245                description: mj.description.clone(),
246                documentation: None,
247                input_schema: mj.input_schema.clone(),
248                output_schema: mj.output_schema.clone(),
249                version: "1.0.0".to_string(),
250                tags: mj.tags,
251                annotations: Some(ModuleAnnotations::default()),
252                examples: vec![],
253                metadata: HashMap::new(),
254                display: None,
255                sunset_date: None,
256                dependencies: vec![],
257                enabled: true,
258            };
259
260            let module: Arc<dyn Module> = Arc::new(SubprocessPlaceholderModule {
261                module_id: module_id.clone(),
262                input_schema: mj.input_schema,
263                output_schema: mj.output_schema,
264                description: mj.description,
265            });
266
267            modules.push(DiscoveredModule {
268                name: module_id,
269                source: path.display().to_string(),
270                descriptor,
271                module,
272            });
273        }
274
275        Ok(modules)
276    }
277}
278
279// ---------------------------------------------------------------------------
280// Unit tests
281// ---------------------------------------------------------------------------
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use std::fs;
287    use tempfile::TempDir;
288
289    fn write_module_json(dir: &Path, name: &str, description: &str, tags: &[&str]) {
290        let tags_json: Vec<String> = tags.iter().map(|t| format!("\"{}\"", t)).collect();
291        let content = format!(
292            r#"{{
293  "name": "{}",
294  "description": "{}",
295  "tags": [{}],
296  "input_schema": {{"type": "object"}},
297  "output_schema": {{"type": "object"}}
298}}"#,
299            name,
300            description,
301            tags_json.join(", ")
302        );
303        fs::create_dir_all(dir).unwrap();
304        fs::write(dir.join("module.json"), content).unwrap();
305    }
306
307    #[tokio::test]
308    async fn test_discover_finds_modules() {
309        let tmp = TempDir::new().unwrap();
310        let root = tmp.path();
311
312        write_module_json(&root.join("math/add"), "math.add", "Add numbers", &["math"]);
313        write_module_json(
314            &root.join("text/upper"),
315            "text.upper",
316            "Uppercase text",
317            &["text"],
318        );
319
320        let discoverer = FsDiscoverer::new(root);
321        let modules = discoverer.discover(&[]).await.unwrap();
322        assert_eq!(modules.len(), 2);
323
324        let names: Vec<&str> = modules.iter().map(|m| m.name.as_str()).collect();
325        assert!(names.contains(&"math.add"));
326        assert!(names.contains(&"text.upper"));
327    }
328
329    #[tokio::test]
330    async fn test_discover_empty_dir() {
331        let tmp = TempDir::new().unwrap();
332        let discoverer = FsDiscoverer::new(tmp.path());
333        let modules = discoverer.discover(&[]).await.unwrap();
334        assert!(modules.is_empty());
335    }
336
337    #[tokio::test]
338    async fn test_discover_nonexistent_dir() {
339        let discoverer = FsDiscoverer::new("/nonexistent/path/xxx");
340        let modules = discoverer.discover(&[]).await.unwrap();
341        assert!(modules.is_empty());
342    }
343
344    #[tokio::test]
345    async fn test_discover_invalid_json_is_skipped_not_aborting() {
346        // Per review #14: a malformed module.json must produce a tracing
347        // warning and be skipped, not abort the whole discovery pass.
348        let tmp = TempDir::new().unwrap();
349        let bad = tmp.path().join("bad");
350        fs::create_dir_all(&bad).unwrap();
351        fs::write(bad.join("module.json"), "not valid json").unwrap();
352        write_module_json(
353            &tmp.path().join("good"),
354            "good.mod",
355            "still loads",
356            &["demo"],
357        );
358
359        let discoverer = FsDiscoverer::new(tmp.path());
360        let modules = discoverer
361            .discover(&[])
362            .await
363            .expect("malformed sibling must not abort the pass");
364        let names: Vec<&str> = modules.iter().map(|m| m.name.as_str()).collect();
365        assert!(
366            names.contains(&"good.mod"),
367            "well-formed module must still load alongside malformed sibling, got {names:?}"
368        );
369    }
370
371    #[tokio::test]
372    async fn test_discover_sets_descriptor_fields() {
373        let tmp = TempDir::new().unwrap();
374        write_module_json(
375            &tmp.path().join("a"),
376            "test.mod",
377            "A test module",
378            &["demo", "test"],
379        );
380
381        let discoverer = FsDiscoverer::new(tmp.path());
382        let modules = discoverer.discover(&[]).await.unwrap();
383        assert_eq!(modules.len(), 1);
384
385        let m = &modules[0];
386        assert_eq!(m.name, "test.mod");
387        assert_eq!(m.descriptor.module_id, "test.mod");
388        assert!(m.descriptor.enabled);
389        assert_eq!(m.descriptor.tags, vec!["demo", "test"]);
390        assert!(m.descriptor.dependencies.is_empty());
391    }
392
393    #[tokio::test]
394    async fn test_discover_and_register_populates_registry() {
395        let tmp = TempDir::new().unwrap();
396        write_module_json(
397            &tmp.path().join("math/add"),
398            "math.add",
399            "Add numbers",
400            &["math"],
401        );
402
403        let discoverer = FsDiscoverer::new(tmp.path());
404        let registry = apcore::Registry::new();
405        let count = registry.discover(&discoverer).await.unwrap();
406
407        assert_eq!(count, 1);
408        assert!(registry.get_definition("math.add").is_some());
409    }
410}