Skip to main content

apcore_toolkit/
scanner.rs

1// Scanner trait and shared utilities for framework scanners.
2//
3// Provides filtering, deduplication, and annotation inference.
4// Framework-specific implementations live in separate crates
5// (e.g., axum-apcore, actix-apcore).
6
7use apcore::module::ModuleAnnotations;
8use async_trait::async_trait;
9use regex::Regex;
10
11use crate::types::ScannedModule;
12
13/// Abstract interface for framework scanners.
14///
15/// Implementors provide `scan()` for framework-specific endpoint scanning
16/// and `source_name()` for identification. The `App` type parameter allows
17/// each framework adapter to accept its own application type.
18///
19/// # Language-specific API shape
20///
21/// The Rust `BaseScanner` trait intentionally contains only the primitive `scan()` and
22/// `source_name()` operations, keeping it object-safe (usable as `Box<dyn BaseScanner>`).
23///
24/// Helper utilities — [`filter_modules`], [`deduplicate_ids`], [`infer_annotations_from_method`] —
25/// are free functions in the `scanner` module rather than trait default methods.
26/// This differs from Python and TypeScript where these are instance methods on the class.
27///
28/// Usage:
29/// ```ignore
30/// let filtered = scanner::filter_modules(&modules, Some("my.*"), None)?;
31/// let deduplicated = scanner::deduplicate_ids(filtered);
32/// ```
33///
34/// # Note: ConventionScanner
35///
36/// `ConventionScanner` (available in Python as `apcore_toolkit.ConventionScanner`) is
37/// **Python-only**. It relies on Python's `importlib` module introspection for plain-function
38/// discovery, which has no equivalent in Rust. Rust consumers should use `BaseScanner`
39/// implementations that work with Rust's type system directly.
40///
41/// ```ignore
42/// // Example: Axum adapter
43/// struct AxumScanner;
44///
45/// #[async_trait]
46/// impl BaseScanner<axum::Router> for AxumScanner {
47///     async fn scan(&self, app: &axum::Router) -> Vec<ScannedModule> { /* ... */ }
48///     fn source_name(&self) -> &str { "axum" }
49/// }
50///
51/// // Example: Actix adapter
52/// struct ActixScanner;
53///
54/// #[async_trait]
55/// impl BaseScanner<()> for ActixScanner {
56///     async fn scan(&self, _app: &()) -> Vec<ScannedModule> { /* ... */ }
57///     fn source_name(&self) -> &str { "actix-web" }
58/// }
59/// ```
60#[async_trait]
61pub trait BaseScanner<App: Send + Sync = ()> {
62    /// Scan endpoints and return module definitions.
63    ///
64    /// The `app` parameter receives framework-specific state (e.g., `axum::Router`,
65    /// `actix_web::App`). Use `()` if no app context is needed.
66    async fn scan(&self, app: &App) -> Vec<ScannedModule>;
67
68    /// Return human-readable scanner name (e.g., "axum", "actix-web").
69    fn source_name(&self) -> &str;
70}
71
72/// Apply include/exclude regex filters to scanned modules.
73///
74/// - `include`: If set, only modules whose `module_id` matches are kept.
75/// - `exclude`: If set, modules whose `module_id` matches are removed.
76///
77/// Returns an error if either pattern is not a valid regex.
78///
79/// # Rust API note
80///
81/// In Python and TypeScript, `filter_modules` / `filterModules` is an *instance method*
82/// on `BaseScanner`. In Rust it is a free function because the `BaseScanner` trait must
83/// remain object-safe — adding `Self`-independent helpers as default methods would prevent
84/// trait object usage. Call this function directly with your module slice:
85///
86/// ```ignore
87/// let filtered = scanner::filter_modules(&modules, Some("my_app.*"), None)?;
88/// ```
89///
90/// # Errors
91///
92/// Returns `Err(regex::Error)` if `include` or `exclude` contain invalid regex patterns.
93pub fn filter_modules(
94    modules: &[ScannedModule],
95    include: Option<&str>,
96    exclude: Option<&str>,
97) -> Result<Vec<ScannedModule>, regex::Error> {
98    let mut result: Vec<ScannedModule> = modules.to_vec();
99
100    if let Some(pattern) = include {
101        let re = Regex::new(pattern)?;
102        result.retain(|m| re.is_match(&m.module_id));
103    }
104
105    if let Some(pattern) = exclude {
106        let re = Regex::new(pattern)?;
107        result.retain(|m| !re.is_match(&m.module_id));
108    }
109
110    Ok(result)
111}
112
113/// Resolve duplicate module IDs by appending `_2`, `_3`, etc.
114///
115/// A warning is appended to the module's warnings list when a rename occurs.
116pub fn deduplicate_ids(modules: Vec<ScannedModule>) -> Vec<ScannedModule> {
117    // Pre-scan to build the full set of original IDs, so that generated suffixed
118    // names skip any ID that already exists in the input (prevents forward collisions
119    // like `[a, a, a_2]` producing two `a_2` entries).
120    let original_ids: std::collections::HashSet<String> =
121        modules.iter().map(|m| m.module_id.clone()).collect();
122    let mut occurrence_count: std::collections::HashMap<String, usize> =
123        std::collections::HashMap::new();
124    let mut assigned: std::collections::HashSet<String> = std::collections::HashSet::new();
125    let mut result: Vec<ScannedModule> = Vec::with_capacity(modules.len());
126
127    for mut module in modules {
128        let mid = module.module_id.clone();
129        let count = occurrence_count.entry(mid.clone()).or_insert(0);
130        *count += 1;
131
132        if *count == 1 {
133            assigned.insert(mid.clone());
134        } else {
135            // Find the smallest suffix that doesn't collide with any original or
136            // already-assigned ID.
137            let mut suffix = *count;
138            let mut new_id = format!("{}_{}", mid, suffix);
139            while assigned.contains(&new_id) || original_ids.contains(&new_id) {
140                suffix += 1;
141                new_id = format!("{}_{}", mid, suffix);
142            }
143            assigned.insert(new_id.clone());
144            module.warnings.push(format!(
145                "Module ID renamed from '{}' to '{}' to avoid collision",
146                mid, new_id
147            ));
148            module.module_id = new_id;
149        }
150
151        result.push(module);
152    }
153
154    result
155}
156
157/// Infer behavioral annotations from an HTTP method.
158///
159/// Mapping:
160/// - GET    -> readonly=true, cacheable=true
161/// - DELETE -> destructive=true
162/// - PUT    -> idempotent=true
163/// - POST   -> default (all false; creates resources, not idempotent by spec)
164/// - PATCH  -> default (partial update, not standardly idempotent)
165/// - HEAD / OPTIONS / unknown -> default (all false)
166///
167/// Note: HEAD and OPTIONS intentionally return all-false defaults to match
168/// the Python and TypeScript implementations. They do NOT receive readonly=true.
169pub fn infer_annotations_from_method(method: &str) -> ModuleAnnotations {
170    match method.to_uppercase().as_str() {
171        "GET" => ModuleAnnotations {
172            readonly: true,
173            cacheable: true,
174            ..Default::default()
175        },
176        "DELETE" => ModuleAnnotations {
177            destructive: true,
178            ..Default::default()
179        },
180        "PUT" => ModuleAnnotations {
181            idempotent: true,
182            ..Default::default()
183        },
184        _ => ModuleAnnotations::default(),
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191    use serde_json::json;
192
193    fn make_module(id: &str) -> ScannedModule {
194        ScannedModule::new(
195            id.into(),
196            "test".into(),
197            json!({}),
198            json!({}),
199            vec![],
200            "app:func".into(),
201        )
202    }
203
204    #[test]
205    fn test_filter_modules_include() {
206        let modules = vec![
207            make_module("users.get"),
208            make_module("users.create"),
209            make_module("tasks.list"),
210        ];
211        let filtered = filter_modules(&modules, Some("users"), None).unwrap();
212        assert_eq!(filtered.len(), 2);
213        assert!(filtered.iter().all(|m| m.module_id.starts_with("users")));
214    }
215
216    #[test]
217    fn test_filter_modules_exclude() {
218        let modules = vec![
219            make_module("users.get"),
220            make_module("users.create"),
221            make_module("tasks.list"),
222        ];
223        let filtered = filter_modules(&modules, None, Some("users")).unwrap();
224        assert_eq!(filtered.len(), 1);
225        assert_eq!(filtered[0].module_id, "tasks.list");
226    }
227
228    #[test]
229    fn test_filter_modules_both() {
230        let modules = vec![
231            make_module("users.get"),
232            make_module("users.admin.create"),
233            make_module("tasks.list"),
234        ];
235        let filtered = filter_modules(&modules, Some("users"), Some("admin")).unwrap();
236        assert_eq!(filtered.len(), 1);
237        assert_eq!(filtered[0].module_id, "users.get");
238    }
239
240    #[test]
241    fn test_deduplicate_ids_no_duplicates() {
242        let modules = vec![make_module("a"), make_module("b")];
243        let result = deduplicate_ids(modules);
244        assert_eq!(result[0].module_id, "a");
245        assert_eq!(result[1].module_id, "b");
246        assert!(result[0].warnings.is_empty());
247    }
248
249    #[test]
250    fn test_deduplicate_ids_with_duplicates() {
251        let modules = vec![make_module("a"), make_module("a"), make_module("a")];
252        let result = deduplicate_ids(modules);
253        assert_eq!(result[0].module_id, "a");
254        assert_eq!(result[1].module_id, "a_2");
255        assert_eq!(result[2].module_id, "a_3");
256        assert!(result[1].warnings[0].contains("renamed"));
257    }
258
259    #[test]
260    fn test_infer_annotations_get() {
261        let ann = infer_annotations_from_method("GET");
262        assert!(ann.readonly);
263        assert!(ann.cacheable);
264        assert!(!ann.destructive);
265    }
266
267    #[test]
268    fn test_infer_annotations_delete() {
269        let ann = infer_annotations_from_method("DELETE");
270        assert!(ann.destructive);
271        assert!(!ann.readonly);
272    }
273
274    #[test]
275    fn test_infer_annotations_put() {
276        let ann = infer_annotations_from_method("PUT");
277        assert!(ann.idempotent);
278        assert!(!ann.readonly);
279    }
280
281    #[test]
282    fn test_infer_annotations_post() {
283        let ann = infer_annotations_from_method("POST");
284        assert!(!ann.readonly);
285        assert!(!ann.destructive);
286        assert!(!ann.idempotent);
287    }
288
289    #[test]
290    fn test_infer_annotations_case_insensitive() {
291        let ann = infer_annotations_from_method("get");
292        assert!(ann.readonly);
293    }
294
295    #[test]
296    fn test_filter_modules_no_filters() {
297        let modules = vec![make_module("users.get"), make_module("tasks.list")];
298        let filtered = filter_modules(&modules, None, None).unwrap();
299        assert_eq!(filtered.len(), 2);
300    }
301
302    #[test]
303    fn test_filter_modules_include_matches_none() {
304        let modules = vec![make_module("users.get"), make_module("tasks.list")];
305        let filtered = filter_modules(&modules, Some("^zzz$"), None).unwrap();
306        assert!(filtered.is_empty());
307    }
308
309    #[test]
310    fn test_filter_modules_exclude_matches_all() {
311        let modules = vec![make_module("users.get"), make_module("users.create")];
312        let filtered = filter_modules(&modules, None, Some("users")).unwrap();
313        assert!(filtered.is_empty());
314    }
315
316    #[test]
317    fn test_filter_modules_invalid_include_regex() {
318        let modules = vec![make_module("a")];
319        let result = filter_modules(&modules, Some("[invalid"), None);
320        assert!(result.is_err());
321    }
322
323    #[test]
324    fn test_filter_modules_invalid_exclude_regex() {
325        let modules = vec![make_module("a")];
326        let result = filter_modules(&modules, None, Some("[invalid"));
327        assert!(result.is_err());
328    }
329
330    #[test]
331    fn test_deduplicate_ids_empty_list() {
332        let result = deduplicate_ids(vec![]);
333        assert!(result.is_empty());
334    }
335
336    #[test]
337    fn test_deduplicate_ids_original_unchanged() {
338        let original = vec![make_module("a"), make_module("a")];
339        let cloned = original.clone();
340        let result = deduplicate_ids(original);
341
342        // The original Vec is consumed by deduplicate_ids (ownership).
343        // Verify the clone is independent and unmodified.
344        assert_eq!(cloned[0].module_id, "a");
345        assert_eq!(cloned[1].module_id, "a");
346        assert!(cloned[0].warnings.is_empty());
347        assert!(cloned[1].warnings.is_empty());
348
349        // The result has been deduplicated.
350        assert_eq!(result[1].module_id, "a_2");
351    }
352
353    #[test]
354    fn test_deduplicate_ids_mixed() {
355        let modules = vec![
356            make_module("a"),
357            make_module("b"),
358            make_module("a"),
359            make_module("c"),
360            make_module("b"),
361        ];
362        let result = deduplicate_ids(modules);
363        assert_eq!(result[0].module_id, "a");
364        assert_eq!(result[1].module_id, "b");
365        assert_eq!(result[2].module_id, "a_2");
366        assert_eq!(result[3].module_id, "c");
367        assert_eq!(result[4].module_id, "b_2");
368    }
369
370    #[test]
371    fn test_deduplicate_warnings_first_no_warning() {
372        let modules = vec![make_module("x"), make_module("x")];
373        let result = deduplicate_ids(modules);
374        assert!(
375            result[0].warnings.is_empty(),
376            "First occurrence should have no warning"
377        );
378        assert!(
379            !result[1].warnings.is_empty(),
380            "Duplicate should have a warning"
381        );
382    }
383
384    #[test]
385    fn test_deduplicate_warnings_preserved() {
386        let mut m = make_module("dup");
387        m.warnings.push("existing warning".into());
388        let modules = vec![make_module("dup"), m];
389        let result = deduplicate_ids(modules);
390
391        // Second module had an existing warning; it should still be there
392        // along with the new rename warning.
393        assert_eq!(result[1].warnings.len(), 2);
394        assert_eq!(result[1].warnings[0], "existing warning");
395        assert!(result[1].warnings[1].contains("renamed"));
396    }
397
398    #[test]
399    fn test_infer_annotations_patch() {
400        let ann = infer_annotations_from_method("PATCH");
401        assert!(!ann.readonly);
402        assert!(!ann.destructive);
403        assert!(!ann.idempotent);
404        assert!(!ann.cacheable);
405    }
406
407    #[test]
408    fn test_infer_annotations_head() {
409        // Regression test (D10-001): HEAD must return all-false defaults,
410        // matching Python and TypeScript behavior. Previously returned readonly=true.
411        let ann = infer_annotations_from_method("HEAD");
412        assert!(
413            !ann.readonly,
414            "HEAD should NOT be readonly (matches Python/TS)"
415        );
416        assert!(!ann.cacheable);
417        assert!(!ann.destructive);
418        assert!(!ann.idempotent);
419    }
420
421    #[test]
422    fn test_infer_annotations_options() {
423        // Regression test (D10-001): OPTIONS must return all-false defaults,
424        // matching Python and TypeScript behavior. Previously returned readonly=true.
425        let ann = infer_annotations_from_method("OPTIONS");
426        assert!(
427            !ann.readonly,
428            "OPTIONS should NOT be readonly (matches Python/TS)"
429        );
430        assert!(!ann.cacheable);
431        assert!(!ann.destructive);
432        assert!(!ann.idempotent);
433    }
434
435    #[test]
436    fn test_infer_annotations_head_case_insensitive() {
437        let ann = infer_annotations_from_method("head");
438        assert!(
439            !ann.readonly,
440            "HEAD (lowercase) should also return all-false defaults"
441        );
442    }
443
444    #[test]
445    fn test_deduplicate_ids_no_collision_with_preexisting_suffixed_id() {
446        // [a, a, a_2] — the second 'a' must not collide with the pre-existing 'a_2'.
447        // Pre-scan sees {"a", "a_2"}, so the second 'a' skips 'a_2' and picks 'a_3'.
448        let modules = vec![make_module("a"), make_module("a"), make_module("a_2")];
449        let result = deduplicate_ids(modules);
450        assert_eq!(result[0].module_id, "a", "first 'a' keeps its ID");
451        assert_eq!(
452            result[1].module_id, "a_3",
453            "second 'a' skips 'a_2' (pre-existing) and picks 'a_3'"
454        );
455        assert_eq!(result[2].module_id, "a_2", "original 'a_2' keeps its ID");
456        // All IDs must be distinct.
457        let ids: std::collections::HashSet<_> = result.iter().map(|m| &m.module_id).collect();
458        assert_eq!(ids.len(), 3, "all three IDs must be distinct");
459    }
460}