Skip to main content

apcore_toolkit/
scanner.rs

1// Scanner trait and shared utilities for framework scanners.
2//
3// Provides filtering, deduplication, and annotation inference.
4// Framework-specific implementations live in separate crates
5// (e.g., axum-apcore, actix-apcore).
6
7use apcore::module::ModuleAnnotations;
8use async_trait::async_trait;
9use regex::Regex;
10
11use crate::types::ScannedModule;
12
13/// Abstract interface for framework scanners.
14///
15/// Implementors provide `scan()` for framework-specific endpoint scanning
16/// and `source_name()` for identification. The `App` type parameter allows
17/// each framework adapter to accept its own application type.
18///
19/// # Language-specific API shape
20///
21/// The Rust `BaseScanner` trait intentionally contains only the primitive `scan()` and
22/// `source_name()` operations, keeping it object-safe (usable as `Box<dyn BaseScanner>`).
23///
24/// Helper utilities — [`filter_modules`], [`deduplicate_ids`], [`infer_annotations_from_method`] —
25/// are free functions in the `scanner` module rather than trait default methods.
26/// This differs from Python and TypeScript where these are instance methods on the class.
27///
28/// Usage:
29/// ```ignore
30/// let filtered = scanner::filter_modules(&modules, Some("my.*"), None)?;
31/// let deduplicated = scanner::deduplicate_ids(filtered);
32/// ```
33///
34/// # Note: ConventionScanner
35///
36/// `ConventionScanner` (available in Python as `apcore_toolkit.ConventionScanner`) is
37/// **Python-only**. It relies on Python's `importlib` module introspection for plain-function
38/// discovery, which has no equivalent in Rust. Rust consumers should use `BaseScanner`
39/// implementations that work with Rust's type system directly.
40///
41/// ```ignore
42/// // Example: Axum adapter
43/// struct AxumScanner;
44///
45/// #[async_trait]
46/// impl BaseScanner<axum::Router> for AxumScanner {
47///     async fn scan(&self, app: &axum::Router) -> Vec<ScannedModule> { /* ... */ }
48///     fn source_name(&self) -> &str { "axum" }
49/// }
50///
51/// // Example: Actix adapter
52/// struct ActixScanner;
53///
54/// #[async_trait]
55/// impl BaseScanner<()> for ActixScanner {
56///     async fn scan(&self, _app: &()) -> Vec<ScannedModule> { /* ... */ }
57///     fn source_name(&self) -> &str { "actix-web" }
58/// }
59/// ```
60#[async_trait]
61pub trait BaseScanner<App: Send + Sync = ()> {
62    /// Scan endpoints and return module definitions.
63    ///
64    /// The `app` parameter receives framework-specific state (e.g., `axum::Router`,
65    /// `actix_web::App`). Use `()` if no app context is needed.
66    async fn scan(&self, app: &App) -> Vec<ScannedModule>;
67
68    /// Return human-readable scanner name (e.g., "axum", "actix-web").
69    fn source_name(&self) -> &str;
70}
71
72/// Apply include/exclude regex filters to scanned modules.
73///
74/// - `include`: If set, only modules whose `module_id` matches are kept.
75/// - `exclude`: If set, modules whose `module_id` matches are removed.
76///
77/// Returns an error if either pattern is not a valid regex.
78///
79/// # Rust API note
80///
81/// In Python and TypeScript, `filter_modules` / `filterModules` is an *instance method*
82/// on `BaseScanner`. In Rust it is a free function because the `BaseScanner` trait must
83/// remain object-safe — adding `Self`-independent helpers as default methods would prevent
84/// trait object usage. Call this function directly with your module slice:
85///
86/// ```ignore
87/// let filtered = scanner::filter_modules(&modules, Some("my_app.*"), None)?;
88/// ```
89///
90/// # Errors
91///
92/// Returns `Err(regex::Error)` if `include` or `exclude` contain invalid regex patterns.
93pub fn filter_modules(
94    modules: &[ScannedModule],
95    include: Option<&str>,
96    exclude: Option<&str>,
97) -> Result<Vec<ScannedModule>, regex::Error> {
98    let mut result: Vec<ScannedModule> = modules.to_vec();
99
100    if let Some(pattern) = include {
101        let re = Regex::new(pattern)?;
102        result.retain(|m| re.is_match(&m.module_id));
103    }
104
105    if let Some(pattern) = exclude {
106        let re = Regex::new(pattern)?;
107        result.retain(|m| !re.is_match(&m.module_id));
108    }
109
110    Ok(result)
111}
112
113/// Resolve duplicate module IDs by appending `_2`, `_3`, etc.
114///
115/// A warning is appended to the module's warnings list when a rename occurs.
116pub fn deduplicate_ids(modules: Vec<ScannedModule>) -> Vec<ScannedModule> {
117    // Pre-scan to build the full set of original IDs, so that generated suffixed
118    // names skip any ID that already exists in the input (prevents forward collisions
119    // like `[a, a, a_2]` producing two `a_2` entries).
120    let original_ids: std::collections::HashSet<String> =
121        modules.iter().map(|m| m.module_id.clone()).collect();
122    let mut occurrence_count: std::collections::HashMap<String, usize> =
123        std::collections::HashMap::new();
124    let mut assigned: std::collections::HashSet<String> = std::collections::HashSet::new();
125    let mut result: Vec<ScannedModule> = Vec::with_capacity(modules.len());
126
127    for mut module in modules {
128        let mid = module.module_id.clone();
129        let count = occurrence_count.entry(mid.clone()).or_insert(0);
130        *count += 1;
131
132        if *count == 1 {
133            assigned.insert(mid.clone());
134        } else {
135            // Find the smallest suffix that doesn't collide with any original or
136            // already-assigned ID.
137            let mut suffix = *count;
138            let mut new_id = format!("{}_{}", mid, suffix);
139            while assigned.contains(&new_id) || original_ids.contains(&new_id) {
140                suffix += 1;
141                new_id = format!("{}_{}", mid, suffix);
142            }
143            assigned.insert(new_id.clone());
144            module.warnings.push(format!(
145                "Module ID renamed from '{}' to '{}' to avoid collision",
146                mid, new_id
147            ));
148            module.module_id = new_id;
149        }
150
151        result.push(module);
152    }
153
154    result
155}
156
157/// Infer behavioral annotations from an HTTP method.
158///
159/// Mapping:
160/// - GET     -> readonly=true, cacheable=true
161/// - HEAD    -> readonly=true (inspection-only, no body)
162/// - OPTIONS -> readonly=true (metadata query, no mutation)
163/// - DELETE  -> destructive=true
164/// - PUT     -> idempotent=true
165/// - POST    -> default (all false; creates resources, not idempotent by spec)
166/// - PATCH   -> default (partial update, not standardly idempotent)
167pub fn infer_annotations_from_method(method: &str) -> ModuleAnnotations {
168    match method.to_uppercase().as_str() {
169        "GET" => ModuleAnnotations {
170            readonly: true,
171            cacheable: true,
172            ..Default::default()
173        },
174        "HEAD" | "OPTIONS" => ModuleAnnotations {
175            readonly: true,
176            ..Default::default()
177        },
178        "DELETE" => ModuleAnnotations {
179            destructive: true,
180            ..Default::default()
181        },
182        "PUT" => ModuleAnnotations {
183            idempotent: true,
184            ..Default::default()
185        },
186        _ => ModuleAnnotations::default(),
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use serde_json::json;
194
195    fn make_module(id: &str) -> ScannedModule {
196        ScannedModule::new(
197            id.into(),
198            "test".into(),
199            json!({}),
200            json!({}),
201            vec![],
202            "app:func".into(),
203        )
204    }
205
206    #[test]
207    fn test_filter_modules_include() {
208        let modules = vec![
209            make_module("users.get"),
210            make_module("users.create"),
211            make_module("tasks.list"),
212        ];
213        let filtered = filter_modules(&modules, Some("users"), None).unwrap();
214        assert_eq!(filtered.len(), 2);
215        assert!(filtered.iter().all(|m| m.module_id.starts_with("users")));
216    }
217
218    #[test]
219    fn test_filter_modules_exclude() {
220        let modules = vec![
221            make_module("users.get"),
222            make_module("users.create"),
223            make_module("tasks.list"),
224        ];
225        let filtered = filter_modules(&modules, None, Some("users")).unwrap();
226        assert_eq!(filtered.len(), 1);
227        assert_eq!(filtered[0].module_id, "tasks.list");
228    }
229
230    #[test]
231    fn test_filter_modules_both() {
232        let modules = vec![
233            make_module("users.get"),
234            make_module("users.admin.create"),
235            make_module("tasks.list"),
236        ];
237        let filtered = filter_modules(&modules, Some("users"), Some("admin")).unwrap();
238        assert_eq!(filtered.len(), 1);
239        assert_eq!(filtered[0].module_id, "users.get");
240    }
241
242    #[test]
243    fn test_deduplicate_ids_no_duplicates() {
244        let modules = vec![make_module("a"), make_module("b")];
245        let result = deduplicate_ids(modules);
246        assert_eq!(result[0].module_id, "a");
247        assert_eq!(result[1].module_id, "b");
248        assert!(result[0].warnings.is_empty());
249    }
250
251    #[test]
252    fn test_deduplicate_ids_with_duplicates() {
253        let modules = vec![make_module("a"), make_module("a"), make_module("a")];
254        let result = deduplicate_ids(modules);
255        assert_eq!(result[0].module_id, "a");
256        assert_eq!(result[1].module_id, "a_2");
257        assert_eq!(result[2].module_id, "a_3");
258        assert!(result[1].warnings[0].contains("renamed"));
259    }
260
261    #[test]
262    fn test_infer_annotations_get() {
263        let ann = infer_annotations_from_method("GET");
264        assert!(ann.readonly);
265        assert!(ann.cacheable);
266        assert!(!ann.destructive);
267    }
268
269    #[test]
270    fn test_infer_annotations_delete() {
271        let ann = infer_annotations_from_method("DELETE");
272        assert!(ann.destructive);
273        assert!(!ann.readonly);
274    }
275
276    #[test]
277    fn test_infer_annotations_put() {
278        let ann = infer_annotations_from_method("PUT");
279        assert!(ann.idempotent);
280        assert!(!ann.readonly);
281    }
282
283    #[test]
284    fn test_infer_annotations_post() {
285        let ann = infer_annotations_from_method("POST");
286        assert!(!ann.readonly);
287        assert!(!ann.destructive);
288        assert!(!ann.idempotent);
289    }
290
291    #[test]
292    fn test_infer_annotations_case_insensitive() {
293        let ann = infer_annotations_from_method("get");
294        assert!(ann.readonly);
295    }
296
297    #[test]
298    fn test_filter_modules_no_filters() {
299        let modules = vec![make_module("users.get"), make_module("tasks.list")];
300        let filtered = filter_modules(&modules, None, None).unwrap();
301        assert_eq!(filtered.len(), 2);
302    }
303
304    #[test]
305    fn test_filter_modules_include_matches_none() {
306        let modules = vec![make_module("users.get"), make_module("tasks.list")];
307        let filtered = filter_modules(&modules, Some("^zzz$"), None).unwrap();
308        assert!(filtered.is_empty());
309    }
310
311    #[test]
312    fn test_filter_modules_exclude_matches_all() {
313        let modules = vec![make_module("users.get"), make_module("users.create")];
314        let filtered = filter_modules(&modules, None, Some("users")).unwrap();
315        assert!(filtered.is_empty());
316    }
317
318    #[test]
319    fn test_filter_modules_invalid_include_regex() {
320        let modules = vec![make_module("a")];
321        let result = filter_modules(&modules, Some("[invalid"), None);
322        assert!(result.is_err());
323    }
324
325    #[test]
326    fn test_filter_modules_invalid_exclude_regex() {
327        let modules = vec![make_module("a")];
328        let result = filter_modules(&modules, None, Some("[invalid"));
329        assert!(result.is_err());
330    }
331
332    #[test]
333    fn test_deduplicate_ids_empty_list() {
334        let result = deduplicate_ids(vec![]);
335        assert!(result.is_empty());
336    }
337
338    #[test]
339    fn test_deduplicate_ids_original_unchanged() {
340        let original = vec![make_module("a"), make_module("a")];
341        let cloned = original.clone();
342        let result = deduplicate_ids(original);
343
344        // The original Vec is consumed by deduplicate_ids (ownership).
345        // Verify the clone is independent and unmodified.
346        assert_eq!(cloned[0].module_id, "a");
347        assert_eq!(cloned[1].module_id, "a");
348        assert!(cloned[0].warnings.is_empty());
349        assert!(cloned[1].warnings.is_empty());
350
351        // The result has been deduplicated.
352        assert_eq!(result[1].module_id, "a_2");
353    }
354
355    #[test]
356    fn test_deduplicate_ids_mixed() {
357        let modules = vec![
358            make_module("a"),
359            make_module("b"),
360            make_module("a"),
361            make_module("c"),
362            make_module("b"),
363        ];
364        let result = deduplicate_ids(modules);
365        assert_eq!(result[0].module_id, "a");
366        assert_eq!(result[1].module_id, "b");
367        assert_eq!(result[2].module_id, "a_2");
368        assert_eq!(result[3].module_id, "c");
369        assert_eq!(result[4].module_id, "b_2");
370    }
371
372    #[test]
373    fn test_deduplicate_warnings_first_no_warning() {
374        let modules = vec![make_module("x"), make_module("x")];
375        let result = deduplicate_ids(modules);
376        assert!(
377            result[0].warnings.is_empty(),
378            "First occurrence should have no warning"
379        );
380        assert!(
381            !result[1].warnings.is_empty(),
382            "Duplicate should have a warning"
383        );
384    }
385
386    #[test]
387    fn test_deduplicate_warnings_preserved() {
388        let mut m = make_module("dup");
389        m.warnings.push("existing warning".into());
390        let modules = vec![make_module("dup"), m];
391        let result = deduplicate_ids(modules);
392
393        // Second module had an existing warning; it should still be there
394        // along with the new rename warning.
395        assert_eq!(result[1].warnings.len(), 2);
396        assert_eq!(result[1].warnings[0], "existing warning");
397        assert!(result[1].warnings[1].contains("renamed"));
398    }
399
400    #[test]
401    fn test_infer_annotations_patch() {
402        let ann = infer_annotations_from_method("PATCH");
403        assert!(!ann.readonly);
404        assert!(!ann.destructive);
405        assert!(!ann.idempotent);
406        assert!(!ann.cacheable);
407    }
408
409    #[test]
410    fn test_infer_annotations_head() {
411        let ann = infer_annotations_from_method("HEAD");
412        assert!(ann.readonly, "HEAD should be readonly");
413        assert!(!ann.cacheable, "HEAD should not be cacheable (no body)");
414        assert!(!ann.destructive);
415        assert!(!ann.idempotent);
416    }
417
418    #[test]
419    fn test_infer_annotations_options() {
420        let ann = infer_annotations_from_method("OPTIONS");
421        assert!(ann.readonly, "OPTIONS should be readonly");
422        assert!(!ann.destructive);
423        assert!(!ann.idempotent);
424    }
425
426    #[test]
427    fn test_infer_annotations_head_case_insensitive() {
428        let ann = infer_annotations_from_method("head");
429        assert!(ann.readonly);
430    }
431
432    #[test]
433    fn test_deduplicate_ids_no_collision_with_preexisting_suffixed_id() {
434        // [a, a, a_2] — the second 'a' must not collide with the pre-existing 'a_2'.
435        // Pre-scan sees {"a", "a_2"}, so the second 'a' skips 'a_2' and picks 'a_3'.
436        let modules = vec![make_module("a"), make_module("a"), make_module("a_2")];
437        let result = deduplicate_ids(modules);
438        assert_eq!(result[0].module_id, "a", "first 'a' keeps its ID");
439        assert_eq!(
440            result[1].module_id, "a_3",
441            "second 'a' skips 'a_2' (pre-existing) and picks 'a_3'"
442        );
443        assert_eq!(result[2].module_id, "a_2", "original 'a_2' keeps its ID");
444        // All IDs must be distinct.
445        let ids: std::collections::HashSet<_> = result.iter().map(|m| &m.module_id).collect();
446        assert_eq!(ids.len(), 3, "all three IDs must be distinct");
447    }
448}