Skip to main content

assay/
discovery.rs

1//! Module discovery and search index builder.
2//!
3//! Discovers Assay modules from three sources (in priority order):
4//! 1. Project — `./modules/` relative to CWD
5//! 2. Global  — `$ASSAY_MODULES_PATH` or `~/.assay/modules/`
6//! 3. BuiltIn — embedded stdlib + hardcoded Rust builtins
7
8use crate::search::{SearchEngine, SearchResult};
9use include_dir::{Dir, include_dir};
10
11use crate::metadata::{self, ModuleMetadata};
12#[cfg(not(feature = "db"))]
13use crate::search::BM25Index;
14#[cfg(feature = "db")]
15use crate::search_fts5::FTS5Index;
16
17static STDLIB_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/stdlib");
18
19/// Where a discovered module originates from.
20#[derive(Debug, Clone, PartialEq)]
21pub enum ModuleSource {
22    /// Embedded in the binary via `include_dir!`
23    BuiltIn,
24    /// Found in `./modules/` relative to CWD
25    Project,
26    /// Found in `$ASSAY_MODULES_PATH` or `~/.assay/modules/`
27    Global,
28}
29
30/// A module discovered during the discovery phase.
31#[derive(Debug, Clone)]
32pub struct DiscoveredModule {
33    pub module_name: String,
34    pub source: ModuleSource,
35    pub metadata: ModuleMetadata,
36    pub lua_source: String,
37}
38
39/// Hardcoded Rust builtins with their descriptions and search keywords.
40const BUILTINS: &[(&str, &str, &[&str])] = &[
41    (
42        "http",
43        "HTTP client and server: get, post, put, patch, delete, serve",
44        &[
45            "http", "client", "server", "request", "response", "headers", "endpoint", "api",
46            "webhook", "rest",
47        ],
48    ),
49    (
50        "json",
51        "JSON serialization: parse and encode",
52        &[
53            "json",
54            "serialization",
55            "deserialize",
56            "stringify",
57            "parse",
58            "encode",
59            "format",
60        ],
61    ),
62    (
63        "yaml",
64        "YAML serialization: parse and encode",
65        &[
66            "yaml",
67            "serialization",
68            "deserialize",
69            "parse",
70            "encode",
71            "format",
72        ],
73    ),
74    (
75        "toml",
76        "TOML serialization: parse and encode",
77        &[
78            "toml",
79            "serialization",
80            "deserialize",
81            "parse",
82            "encode",
83            "configuration",
84        ],
85    ),
86    (
87        "fs",
88        "Filesystem: read and write files",
89        &["fs", "filesystem", "file", "read", "write", "io", "path"],
90    ),
91    (
92        "crypto",
93        "Cryptography: jwt_sign, hash, hmac, random",
94        &[
95            "crypto",
96            "jwt",
97            "signature",
98            "hash",
99            "hmac",
100            "encryption",
101            "random",
102            "security",
103            "password",
104            "signing",
105            "rsa",
106            "sha256",
107        ],
108    ),
109    (
110        "base64",
111        "Base64 encoding and decoding",
112        &["base64", "encoding", "decode", "encode", "binary"],
113    ),
114    (
115        "regex",
116        "Regular expressions: match, find, find_all, replace",
117        &[
118            "regex",
119            "pattern",
120            "match",
121            "find",
122            "replace",
123            "regular-expression",
124            "regexp",
125        ],
126    ),
127    (
128        "db",
129        "Database: connect, query, execute, close (Postgres, MySQL, SQLite)",
130        &[
131            "db",
132            "database",
133            "sql",
134            "postgres",
135            "mysql",
136            "sqlite",
137            "connection",
138            "query",
139            "execute",
140        ],
141    ),
142    (
143        "ws",
144        "WebSocket: connect, send, recv, close",
145        &[
146            "ws",
147            "websocket",
148            "connection",
149            "message",
150            "streaming",
151            "realtime",
152            "socket",
153        ],
154    ),
155    (
156        "template",
157        "Jinja2-compatible templates: render file or string",
158        &[
159            "template",
160            "jinja2",
161            "rendering",
162            "string-template",
163            "mustache",
164            "render",
165        ],
166    ),
167    (
168        "async",
169        "Async tasks: spawn, spawn_interval, await, cancel",
170        &[
171            "async",
172            "asynchronous",
173            "task",
174            "coroutine",
175            "concurrent",
176            "spawn",
177            "interval",
178        ],
179    ),
180    (
181        "assert",
182        "Assertions: eq, gt, lt, contains, not_nil, matches",
183        &[
184            "assert",
185            "assertion",
186            "test",
187            "validation",
188            "comparison",
189            "check",
190            "verify",
191        ],
192    ),
193    (
194        "log",
195        "Logging: info, warn, error",
196        &[
197            "log", "logging", "output", "debug", "error", "warning", "info", "trace",
198        ],
199    ),
200    (
201        "env",
202        "Environment variables: get",
203        &["env", "environment", "variable", "configuration", "config"],
204    ),
205    (
206        "sleep",
207        "Sleep for N seconds",
208        &["sleep", "delay", "pause", "wait", "time"],
209    ),
210    (
211        "time",
212        "Unix timestamp in seconds",
213        &["time", "timestamp", "unix", "epoch", "clock", "datetime"],
214    ),
215    (
216        "compress",
217        "Decompression: gunzip, unxz, unzstd. Pure binary in/out.",
218        &["compress", "decompress", "gunzip", "gzip", "xz", "lzma", "zstd"],
219    ),
220];
221
222/// Discover all modules: embedded stdlib + `./modules/` + `~/.assay/modules/` (or `$ASSAY_MODULES_PATH`).
223///
224/// Returns modules ordered by priority: Project first, then Global, then BuiltIn.
225/// Callers can deduplicate by name, keeping the highest-priority (first) occurrence.
226pub fn discover_modules() -> Vec<DiscoveredModule> {
227    let mut modules = Vec::new();
228
229    // Priority 1: Project modules (./modules/)
230    discover_filesystem_modules(
231        std::path::Path::new("./modules"),
232        ModuleSource::Project,
233        &mut modules,
234    );
235
236    // Priority 2: Global modules ($ASSAY_MODULES_PATH or ~/.assay/modules/)
237    let global_path = resolve_global_modules_path();
238    if let Some(path) = global_path {
239        discover_filesystem_modules(&path, ModuleSource::Global, &mut modules);
240    }
241
242    // Priority 3: Embedded stdlib .lua files
243    discover_embedded_stdlib(&mut modules);
244
245    // Priority 3 (continued): Hardcoded Rust builtins
246    discover_rust_builtins(&mut modules);
247
248    modules
249}
250
251/// Build a search index from discovered modules.
252///
253/// When feature `db` is enabled: uses `FTS5Index`.
254/// When feature `db` is disabled: uses `BM25Index`.
255pub fn build_index(modules: &[DiscoveredModule]) -> Box<dyn SearchEngine> {
256    #[cfg(feature = "db")]
257    {
258        let mut idx = FTS5Index::new();
259        for m in modules {
260            idx.add_document(
261                &m.module_name,
262                &[
263                    ("keywords", &m.metadata.keywords.join(" "), 3.0),
264                    ("module_name", &m.module_name, 2.0),
265                    ("description", &m.metadata.description, 1.0),
266                    ("functions", &m.metadata.auto_functions.join(" "), 1.0),
267                ],
268            );
269        }
270        Box::new(idx)
271    }
272    #[cfg(not(feature = "db"))]
273    {
274        let mut idx = BM25Index::new();
275        for m in modules {
276            idx.add_document(
277                &m.module_name,
278                &[
279                    ("keywords", &m.metadata.keywords.join(" "), 3.0),
280                    ("module_name", &m.module_name, 2.0),
281                    ("description", &m.metadata.description, 1.0),
282                    ("functions", &m.metadata.auto_functions.join(" "), 1.0),
283                ],
284            );
285        }
286        Box::new(idx)
287    }
288}
289
290/// Convenience: discover all modules, build index, search, return results.
291pub fn search_modules(query: &str, limit: usize) -> Vec<SearchResult> {
292    let modules = discover_modules();
293    let index = build_index(&modules);
294    index.search(query, limit)
295}
296
297/// Resolve the global modules directory path.
298///
299/// Checks `$ASSAY_MODULES_PATH` first, then falls back to `~/.assay/modules/`.
300/// Returns `None` if neither is available.
301fn resolve_global_modules_path() -> Option<std::path::PathBuf> {
302    if let Ok(custom) = std::env::var(crate::lua::MODULES_PATH_ENV) {
303        return Some(std::path::PathBuf::from(custom));
304    }
305    if let Ok(home) = std::env::var("HOME") {
306        return Some(std::path::Path::new(&home).join(".assay/modules"));
307    }
308    None
309}
310
311/// Discover `.lua` files from a filesystem directory.
312///
313/// Silently skips if the directory does not exist.
314fn discover_filesystem_modules(
315    dir: &std::path::Path,
316    source: ModuleSource,
317    modules: &mut Vec<DiscoveredModule>,
318) {
319    let entries = match std::fs::read_dir(dir) {
320        Ok(entries) => entries,
321        Err(_) => return, // Directory doesn't exist or can't be read — skip silently
322    };
323
324    for entry in entries.flatten() {
325        let path = entry.path();
326        if path.extension().and_then(|e| e.to_str()) != Some("lua") {
327            continue;
328        }
329
330        let lua_source = match std::fs::read_to_string(&path) {
331            Ok(s) => s,
332            Err(_) => continue,
333        };
334
335        let stem = path
336            .file_stem()
337            .and_then(|s| s.to_str())
338            .unwrap_or_default();
339        let module_name = format!("assay.{stem}");
340        let meta = metadata::parse_metadata(&lua_source);
341
342        modules.push(DiscoveredModule {
343            module_name,
344            source: source.clone(),
345            metadata: meta,
346            lua_source,
347        });
348    }
349}
350
351/// Discover embedded stdlib `.lua` files from `include_dir!`.
352///
353/// Recurses into subdirectories so nested namespaces like
354/// `engine/vault.lua` register as `assay.engine.vault`. The path-to-
355/// module-name mapping replaces the OS separator with `.`. Both
356/// `engine.lua` (the facade) and `engine/vault.lua` (a submodule) get
357/// registered as separate `assay.engine` and `assay.engine.vault`
358/// entries respectively, matching what `require()` already resolves.
359fn discover_embedded_stdlib(modules: &mut Vec<DiscoveredModule>) {
360    fn walk(dir: &include_dir::Dir<'_>, modules: &mut Vec<DiscoveredModule>) {
361        for file in dir.files() {
362            let path = file.path();
363            if path.extension().and_then(|e| e.to_str()) != Some("lua") {
364                continue;
365            }
366            let Some(lua_source) = file.contents_utf8() else {
367                continue;
368            };
369            // Build dotted path from "stdlib"-relative components,
370            // dropping the trailing `.lua`.
371            let segments: Vec<&str> = path
372                .iter()
373                .filter_map(|c| c.to_str())
374                .collect();
375            if segments.is_empty() {
376                continue;
377            }
378            let mut joined = segments.join(".");
379            if joined.ends_with(".lua") {
380                joined.truncate(joined.len() - 4);
381            }
382            let module_name = format!("assay.{joined}");
383            let meta = metadata::parse_metadata(lua_source);
384            modules.push(DiscoveredModule {
385                module_name,
386                source: ModuleSource::BuiltIn,
387                metadata: meta,
388                lua_source: lua_source.to_string(),
389            });
390        }
391        for sub in dir.dirs() {
392            walk(sub, modules);
393        }
394    }
395    walk(&STDLIB_DIR, modules);
396}
397
398/// Add hardcoded Rust builtins (not Lua files) to the module list.
399fn discover_rust_builtins(modules: &mut Vec<DiscoveredModule>) {
400    for &(name, description, kw) in BUILTINS {
401        modules.push(DiscoveredModule {
402            module_name: name.to_string(),
403            source: ModuleSource::BuiltIn,
404            lua_source: String::new(),
405            metadata: ModuleMetadata {
406                module_name: name.to_string(),
407                description: description.to_string(),
408                keywords: kw.iter().map(|k| k.to_string()).collect(),
409                ..Default::default()
410            },
411        });
412    }
413}