Skip to main content

apcore_toolkit/
scanner.rs

1// Scanner trait and shared utilities for framework scanners.
2//
3// Provides filtering, deduplication, and annotation inference.
4// Framework-specific implementations live in separate crates
5// (e.g., axum-apcore, actix-apcore).
6
7use apcore::module::ModuleAnnotations;
8use async_trait::async_trait;
9use regex::Regex;
10
11use crate::types::ScannedModule;
12
13/// Abstract interface for framework scanners.
14///
15/// Implementors provide `scan()` for framework-specific endpoint scanning
16/// and `source_name()` for identification. The `App` type parameter allows
17/// each framework adapter to accept its own application type.
18///
19/// # Language-specific API shape
20///
21/// The Rust `BaseScanner` trait intentionally contains only the primitive `scan()` and
22/// `source_name()` operations, keeping it object-safe (usable as `Box<dyn BaseScanner>`).
23///
24/// Helper utilities — [`filter_modules`], [`deduplicate_ids`], [`infer_annotations_from_method`] —
25/// are free functions in the `scanner` module rather than trait default methods.
26/// This differs from Python and TypeScript where these are instance methods on the class.
27///
28/// Usage:
29/// ```ignore
30/// let filtered = scanner::filter_modules(&modules, Some("my.*"), None)?;
31/// let deduplicated = scanner::deduplicate_ids(filtered);
32/// ```
33///
34/// # Note: ConventionScanner
35///
36/// `ConventionScanner` (available in Python as `apcore_toolkit.ConventionScanner`) is
37/// **Python-only**. It relies on Python's `importlib` module introspection for plain-function
38/// discovery, which has no equivalent in Rust. Rust consumers should use `BaseScanner`
39/// implementations that work with Rust's type system directly.
40///
41/// ```ignore
42/// // Example: Axum adapter
43/// struct AxumScanner;
44///
45/// #[async_trait]
46/// impl BaseScanner<axum::Router> for AxumScanner {
47///     async fn scan(&self, app: &axum::Router) -> Vec<ScannedModule> { /* ... */ }
48///     fn source_name(&self) -> &str { "axum" }
49/// }
50///
51/// // Example: Actix adapter
52/// struct ActixScanner;
53///
54/// #[async_trait]
55/// impl BaseScanner<()> for ActixScanner {
56///     async fn scan(&self, _app: &()) -> Vec<ScannedModule> { /* ... */ }
57///     fn source_name(&self) -> &str { "actix-web" }
58/// }
59/// ```
60#[async_trait]
61pub trait BaseScanner<App: Send + Sync = ()> {
62    /// Scan endpoints and return module definitions.
63    ///
64    /// The `app` parameter receives framework-specific state (e.g., `axum::Router`,
65    /// `actix_web::App`). Use `()` if no app context is needed.
66    async fn scan(&self, app: &App) -> Vec<ScannedModule>;
67
68    /// Return human-readable scanner name (e.g., "axum", "actix-web").
69    fn source_name(&self) -> &str;
70}
71
72/// Apply include/exclude regex filters to scanned modules.
73///
74/// - `include`: If set, only modules whose `module_id` matches are kept.
75/// - `exclude`: If set, modules whose `module_id` matches are removed.
76///
77/// Returns an error if either pattern is not a valid regex.
78///
79/// # Rust API note
80///
81/// In Python and TypeScript, `filter_modules` / `filterModules` is an *instance method*
82/// on `BaseScanner`. In Rust it is a free function because the `BaseScanner` trait must
83/// remain object-safe — adding `Self`-independent helpers as default methods would prevent
84/// trait object usage. Call this function directly with your module slice:
85///
86/// ```ignore
87/// let filtered = scanner::filter_modules(&modules, Some("my_app.*"), None)?;
88/// ```
89///
90/// # Errors
91///
92/// Returns `Err(regex::Error)` if `include` or `exclude` contain invalid regex patterns.
93pub fn filter_modules(
94    modules: &[ScannedModule],
95    include: Option<&str>,
96    exclude: Option<&str>,
97) -> Result<Vec<ScannedModule>, regex::Error> {
98    let mut result: Vec<ScannedModule> = modules.to_vec();
99
100    if let Some(pattern) = include {
101        let re = Regex::new(pattern)?;
102        result.retain(|m| re.is_match(&m.module_id));
103    }
104
105    if let Some(pattern) = exclude {
106        let re = Regex::new(pattern)?;
107        result.retain(|m| !re.is_match(&m.module_id));
108    }
109
110    Ok(result)
111}
112
113/// Resolve duplicate module IDs by appending `_2`, `_3`, etc.
114///
115/// A warning is appended to the module's warnings list when a rename occurs.
116pub fn deduplicate_ids(modules: Vec<ScannedModule>) -> Vec<ScannedModule> {
117    // Pre-scan to build the full set of original IDs, so that generated suffixed
118    // names skip any ID that already exists in the input (prevents forward collisions
119    // like `[a, a, a_2]` producing two `a_2` entries).
120    let original_ids: std::collections::HashSet<String> =
121        modules.iter().map(|m| m.module_id.clone()).collect();
122    let mut occurrence_count: std::collections::HashMap<String, usize> =
123        std::collections::HashMap::new();
124    let mut assigned: std::collections::HashSet<String> = std::collections::HashSet::new();
125    let mut result: Vec<ScannedModule> = Vec::with_capacity(modules.len());
126
127    for mut module in modules {
128        let mid = module.module_id.clone();
129        let count = occurrence_count.entry(mid.clone()).or_insert(0);
130        *count += 1;
131
132        if *count == 1 {
133            assigned.insert(mid.clone());
134        } else {
135            // Find the smallest suffix that doesn't collide with any original or
136            // already-assigned ID.
137            let mut suffix = *count;
138            let mut new_id = format!("{}_{}", mid, suffix);
139            while assigned.contains(&new_id) || original_ids.contains(&new_id) {
140                suffix += 1;
141                new_id = format!("{}_{}", mid, suffix);
142            }
143            assigned.insert(new_id.clone());
144            module.warnings.push(format!(
145                "Module ID renamed from '{}' to '{}' to avoid collision",
146                mid, new_id
147            ));
148            module.module_id = new_id;
149        }
150
151        result.push(module);
152    }
153
154    result
155}
156
157/// Infer behavioral annotations from an HTTP method.
158///
159/// Canonical RFC 9110 mapping (aligned with Python and TypeScript SDKs and
160/// the spec at apcore-toolkit/docs/features/scanning.md):
161/// - GET            -> readonly=true, cacheable=true
162/// - HEAD           -> readonly=true (no cacheable)
163/// - OPTIONS        -> readonly=true (no cacheable)
164/// - DELETE         -> destructive=true
165/// - PUT            -> idempotent=true
166/// - POST           -> default (all false; creates resources, not idempotent by spec)
167/// - PATCH          -> default (partial update, not standardly idempotent)
168/// - unknown method -> default (all false)
169pub fn infer_annotations_from_method(method: &str) -> ModuleAnnotations {
170    match method.to_uppercase().as_str() {
171        "GET" => ModuleAnnotations {
172            readonly: true,
173            cacheable: true,
174            ..Default::default()
175        },
176        "HEAD" | "OPTIONS" => ModuleAnnotations {
177            readonly: true,
178            ..Default::default()
179        },
180        "DELETE" => ModuleAnnotations {
181            destructive: true,
182            ..Default::default()
183        },
184        "PUT" => ModuleAnnotations {
185            idempotent: true,
186            ..Default::default()
187        },
188        _ => ModuleAnnotations::default(),
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use serde_json::json;
196
197    fn make_module(id: &str) -> ScannedModule {
198        ScannedModule::new(
199            id.into(),
200            "test".into(),
201            json!({}),
202            json!({}),
203            vec![],
204            "app:func".into(),
205        )
206    }
207
208    #[test]
209    fn test_filter_modules_include() {
210        let modules = vec![
211            make_module("users.get"),
212            make_module("users.create"),
213            make_module("tasks.list"),
214        ];
215        let filtered = filter_modules(&modules, Some("users"), None).unwrap();
216        assert_eq!(filtered.len(), 2);
217        assert!(filtered.iter().all(|m| m.module_id.starts_with("users")));
218    }
219
220    #[test]
221    fn test_filter_modules_exclude() {
222        let modules = vec![
223            make_module("users.get"),
224            make_module("users.create"),
225            make_module("tasks.list"),
226        ];
227        let filtered = filter_modules(&modules, None, Some("users")).unwrap();
228        assert_eq!(filtered.len(), 1);
229        assert_eq!(filtered[0].module_id, "tasks.list");
230    }
231
232    #[test]
233    fn test_filter_modules_both() {
234        let modules = vec![
235            make_module("users.get"),
236            make_module("users.admin.create"),
237            make_module("tasks.list"),
238        ];
239        let filtered = filter_modules(&modules, Some("users"), Some("admin")).unwrap();
240        assert_eq!(filtered.len(), 1);
241        assert_eq!(filtered[0].module_id, "users.get");
242    }
243
244    #[test]
245    fn test_deduplicate_ids_no_duplicates() {
246        let modules = vec![make_module("a"), make_module("b")];
247        let result = deduplicate_ids(modules);
248        assert_eq!(result[0].module_id, "a");
249        assert_eq!(result[1].module_id, "b");
250        assert!(result[0].warnings.is_empty());
251    }
252
253    #[test]
254    fn test_deduplicate_ids_with_duplicates() {
255        let modules = vec![make_module("a"), make_module("a"), make_module("a")];
256        let result = deduplicate_ids(modules);
257        assert_eq!(result[0].module_id, "a");
258        assert_eq!(result[1].module_id, "a_2");
259        assert_eq!(result[2].module_id, "a_3");
260        assert!(result[1].warnings[0].contains("renamed"));
261    }
262
263    #[test]
264    fn test_infer_annotations_get() {
265        let ann = infer_annotations_from_method("GET");
266        assert!(ann.readonly);
267        assert!(ann.cacheable);
268        assert!(!ann.destructive);
269    }
270
271    #[test]
272    fn test_infer_annotations_delete() {
273        let ann = infer_annotations_from_method("DELETE");
274        assert!(ann.destructive);
275        assert!(!ann.readonly);
276    }
277
278    #[test]
279    fn test_infer_annotations_put() {
280        let ann = infer_annotations_from_method("PUT");
281        assert!(ann.idempotent);
282        assert!(!ann.readonly);
283    }
284
285    #[test]
286    fn test_infer_annotations_post() {
287        let ann = infer_annotations_from_method("POST");
288        assert!(!ann.readonly);
289        assert!(!ann.destructive);
290        assert!(!ann.idempotent);
291    }
292
293    #[test]
294    fn test_infer_annotations_case_insensitive() {
295        let ann = infer_annotations_from_method("get");
296        assert!(ann.readonly);
297    }
298
299    #[test]
300    fn test_filter_modules_no_filters() {
301        let modules = vec![make_module("users.get"), make_module("tasks.list")];
302        let filtered = filter_modules(&modules, None, None).unwrap();
303        assert_eq!(filtered.len(), 2);
304    }
305
306    #[test]
307    fn test_filter_modules_include_matches_none() {
308        let modules = vec![make_module("users.get"), make_module("tasks.list")];
309        let filtered = filter_modules(&modules, Some("^zzz$"), None).unwrap();
310        assert!(filtered.is_empty());
311    }
312
313    #[test]
314    fn test_filter_modules_exclude_matches_all() {
315        let modules = vec![make_module("users.get"), make_module("users.create")];
316        let filtered = filter_modules(&modules, None, Some("users")).unwrap();
317        assert!(filtered.is_empty());
318    }
319
320    #[test]
321    fn test_filter_modules_invalid_include_regex() {
322        let modules = vec![make_module("a")];
323        let result = filter_modules(&modules, Some("[invalid"), None);
324        assert!(result.is_err());
325    }
326
327    #[test]
328    fn test_filter_modules_invalid_exclude_regex() {
329        let modules = vec![make_module("a")];
330        let result = filter_modules(&modules, None, Some("[invalid"));
331        assert!(result.is_err());
332    }
333
334    #[test]
335    fn test_deduplicate_ids_empty_list() {
336        let result = deduplicate_ids(vec![]);
337        assert!(result.is_empty());
338    }
339
340    #[test]
341    fn test_deduplicate_ids_original_unchanged() {
342        let original = vec![make_module("a"), make_module("a")];
343        let cloned = original.clone();
344        let result = deduplicate_ids(original);
345
346        // The original Vec is consumed by deduplicate_ids (ownership).
347        // Verify the clone is independent and unmodified.
348        assert_eq!(cloned[0].module_id, "a");
349        assert_eq!(cloned[1].module_id, "a");
350        assert!(cloned[0].warnings.is_empty());
351        assert!(cloned[1].warnings.is_empty());
352
353        // The result has been deduplicated.
354        assert_eq!(result[1].module_id, "a_2");
355    }
356
357    #[test]
358    fn test_deduplicate_ids_mixed() {
359        let modules = vec![
360            make_module("a"),
361            make_module("b"),
362            make_module("a"),
363            make_module("c"),
364            make_module("b"),
365        ];
366        let result = deduplicate_ids(modules);
367        assert_eq!(result[0].module_id, "a");
368        assert_eq!(result[1].module_id, "b");
369        assert_eq!(result[2].module_id, "a_2");
370        assert_eq!(result[3].module_id, "c");
371        assert_eq!(result[4].module_id, "b_2");
372    }
373
374    #[test]
375    fn test_deduplicate_warnings_first_no_warning() {
376        let modules = vec![make_module("x"), make_module("x")];
377        let result = deduplicate_ids(modules);
378        assert!(
379            result[0].warnings.is_empty(),
380            "First occurrence should have no warning"
381        );
382        assert!(
383            !result[1].warnings.is_empty(),
384            "Duplicate should have a warning"
385        );
386    }
387
388    #[test]
389    fn test_deduplicate_warnings_preserved() {
390        let mut m = make_module("dup");
391        m.warnings.push("existing warning".into());
392        let modules = vec![make_module("dup"), m];
393        let result = deduplicate_ids(modules);
394
395        // Second module had an existing warning; it should still be there
396        // along with the new rename warning.
397        assert_eq!(result[1].warnings.len(), 2);
398        assert_eq!(result[1].warnings[0], "existing warning");
399        assert!(result[1].warnings[1].contains("renamed"));
400    }
401
402    #[test]
403    fn test_infer_annotations_patch() {
404        let ann = infer_annotations_from_method("PATCH");
405        assert!(!ann.readonly);
406        assert!(!ann.destructive);
407        assert!(!ann.idempotent);
408        assert!(!ann.cacheable);
409    }
410
411    #[test]
412    fn test_infer_annotations_head() {
413        // Canonical RFC 9110 mapping: HEAD must return readonly=true (without
414        // cacheable=true), matching Python and TypeScript implementations and
415        // the spec in apcore-toolkit/docs/features/scanning.md.
416        let ann = infer_annotations_from_method("HEAD");
417        assert!(ann.readonly, "HEAD must be readonly (matches Python/TS)");
418        assert!(!ann.cacheable, "HEAD must NOT be cacheable");
419        assert!(!ann.destructive);
420        assert!(!ann.idempotent);
421    }
422
423    #[test]
424    fn test_infer_annotations_options() {
425        // Canonical RFC 9110 mapping: OPTIONS must return readonly=true (without
426        // cacheable=true), matching Python and TypeScript implementations and
427        // the spec in apcore-toolkit/docs/features/scanning.md.
428        let ann = infer_annotations_from_method("OPTIONS");
429        assert!(ann.readonly, "OPTIONS must be readonly (matches Python/TS)");
430        assert!(!ann.cacheable, "OPTIONS must NOT be cacheable");
431        assert!(!ann.destructive);
432        assert!(!ann.idempotent);
433    }
434
435    #[test]
436    fn test_infer_annotations_head_case_insensitive() {
437        let ann = infer_annotations_from_method("head");
438        assert!(
439            ann.readonly,
440            "HEAD (lowercase) should also return readonly=true"
441        );
442        assert!(!ann.cacheable);
443    }
444
445    #[test]
446    fn test_deduplicate_ids_no_collision_with_preexisting_suffixed_id() {
447        // [a, a, a_2] — the second 'a' must not collide with the pre-existing 'a_2'.
448        // Pre-scan sees {"a", "a_2"}, so the second 'a' skips 'a_2' and picks 'a_3'.
449        let modules = vec![make_module("a"), make_module("a"), make_module("a_2")];
450        let result = deduplicate_ids(modules);
451        assert_eq!(result[0].module_id, "a", "first 'a' keeps its ID");
452        assert_eq!(
453            result[1].module_id, "a_3",
454            "second 'a' skips 'a_2' (pre-existing) and picks 'a_3'"
455        );
456        assert_eq!(result[2].module_id, "a_2", "original 'a_2' keeps its ID");
457        // All IDs must be distinct.
458        let ids: std::collections::HashSet<_> = result.iter().map(|m| &m.module_id).collect();
459        assert_eq!(ids.len(), 3, "all three IDs must be distinct");
460    }
461}