apcore-toolkit 0.7.0

Shared scanner, schema extraction, and output toolkit for apcore framework adapters
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
// Registry writer for direct module registration.
//
// Converts ScannedModule instances into apcore Module implementations
// and registers them directly into an apcore Registry.
//
// Framework adapters provide a `HandlerFactory` to resolve targets to real
// async handlers. Without a factory, modules are registered with a passthrough
// handler that echoes inputs (useful for schema-only registration).

use std::pin::Pin;
use std::sync::Arc;

use tracing::{debug, warn};

use apcore::context::Context;
use apcore::errors::ModuleError;
use apcore::Registry;

use crate::output::types::{Verifier, WriteResult};
use crate::output::verifiers::{run_verifier_chain, RegistryVerifier};
use crate::types::ScannedModule;

/// Async handler function type for registered modules.
pub type HandlerFn = Arc<
    dyn for<'a> Fn(
            serde_json::Value,
            &'a Context<serde_json::Value>,
        ) -> Pin<
            Box<
                dyn std::future::Future<Output = Result<serde_json::Value, ModuleError>>
                    + Send
                    + 'a,
            >,
        > + Send
        + Sync,
>;

/// Factory that resolves a `target` string to an async handler.
///
/// Framework adapters implement this to map target strings (e.g., `"myapp:get_user"`)
/// to actual handler functions. For example, an Axum adapter might look up the
/// handler in a route table; a generic adapter might use a dynamic dispatch map.
///
/// ```ignore
/// let factory: HandlerFactory = Arc::new(|target: &str| {
///     let handler = lookup_handler(target);
///     Some(Arc::new(move |inputs, _ctx| {
///         let h = handler.clone();
///         Box::pin(async move { h.call(inputs).await })
///     }))
/// });
/// let writer = RegistryWriter::with_handler_factory(factory);
/// ```
pub type HandlerFactory = Arc<dyn Fn(&str) -> Option<HandlerFn> + Send + Sync>;

/// Registers ScannedModule instances directly into an apcore Registry.
///
/// This is the default writer used when no output_format is specified.
/// Instead of writing files, it registers modules directly for immediate use.
///
/// ## Handler Resolution
///
/// By default (`RegistryWriter::new()`), modules are registered with a passthrough
/// handler that returns inputs unchanged — useful for schema-only registration
/// where execution is handled elsewhere.
///
/// For executable modules, use `RegistryWriter::with_handler_factory(factory)` to
/// provide a [`HandlerFactory`] that resolves target strings to real handlers.
pub struct RegistryWriter {
    handler_factory: Option<HandlerFactory>,
    /// Optional allow-list of `target` prefixes. When set, any module whose
    /// `target` does not start with one of these prefixes is rejected with a
    /// failed `WriteResult` before any handler factory is invoked. Mirrors the
    /// `allowed_prefixes` parameter on the Python and TypeScript SDKs and
    /// provides a defence-in-depth boundary on dynamically-supplied targets.
    allowed_prefixes: Option<Vec<String>>,
}

impl Default for RegistryWriter {
    fn default() -> Self {
        Self::new()
    }
}

impl RegistryWriter {
    /// Create a RegistryWriter with passthrough handlers (schema-only registration).
    ///
    /// # Handler resolution
    ///
    /// Unlike the Python and TypeScript implementations which dynamically import
    /// the target function at write time (`resolve_target`), the Rust implementation
    /// registers a passthrough handler that echoes its inputs when no HandlerFactory
    /// is configured. This means calling a module registered by this writer will
    /// succeed but will not execute real business logic. To register real handlers,
    /// use the HandlerFactory integration.
    ///
    /// # Panics
    ///
    /// This constructor does not panic. However, note that without a `HandlerFactory`,
    /// all registered modules will use a passthrough handler that echoes inputs unchanged.
    /// This is suitable for schema-only registration. For real execution, use
    /// [`RegistryWriter::with_handler_factory`] to supply a factory that resolves targets
    /// to actual async handlers.
    pub fn new() -> Self {
        Self {
            handler_factory: None,
            allowed_prefixes: None,
        }
    }

    /// Create a RegistryWriter with a custom handler factory for target resolution.
    pub fn with_handler_factory(factory: HandlerFactory) -> Self {
        Self {
            handler_factory: Some(factory),
            allowed_prefixes: None,
        }
    }

    /// Restrict registration to modules whose `target` starts with one of the
    /// supplied prefixes. Modules with a non-matching target are rejected with
    /// a failed `WriteResult` and never reach the handler factory.
    ///
    /// Matches the `allowed_prefixes` parameter on the Python `RegistryWriter`
    /// and the TypeScript `allowedPrefixes` option. Use it to bound the set of
    /// callable Python/Rust paths a binding YAML may resolve to (defence in
    /// depth against forged or attacker-controlled `target` strings).
    pub fn with_allowed_prefixes(mut self, prefixes: Vec<String>) -> Self {
        self.allowed_prefixes = Some(prefixes);
        self
    }

    /// Returns `true` when the module target is permitted by the configured
    /// `allowed_prefixes` (or when no allow-list is configured).
    ///
    /// Performs boundary-aware module-path matching: the module path component
    /// of `target` (everything before the `:` separator) must equal the prefix
    /// or be a dotted descendant of it. Mirrors Python's
    /// `_module_path_matches_prefix` — `"myapp"` does NOT permit `"myappx"`.
    fn target_allowed(&self, target: &str) -> bool {
        match self.allowed_prefixes.as_ref() {
            None => true,
            Some(prefixes) => {
                let module_path = target.split(':').next().unwrap_or(target);
                prefixes
                    .iter()
                    .any(|p| module_path_matches_prefix(module_path, p))
            }
        }
    }
}

/// Boundary-aware module-path prefix match.
///
/// Returns `true` when `module_path` is exactly `prefix` or a dotted
/// descendant of it. A trailing dot on `prefix` is tolerated; an empty
/// prefix never matches. Mirrors the Python `_module_path_matches_prefix`
/// helper in `apcore-toolkit-python/src/apcore_toolkit/resolve_target.py`.
fn module_path_matches_prefix(module_path: &str, prefix: &str) -> bool {
    let normalized = prefix.trim_end_matches('.');
    if normalized.is_empty() {
        return false;
    }
    if module_path == normalized {
        return true;
    }
    let mut boundary = String::with_capacity(normalized.len() + 1);
    boundary.push_str(normalized);
    boundary.push('.');
    module_path.starts_with(&boundary)
}

impl RegistryWriter {
    /// Register scanned modules into the registry.
    ///
    /// - `registry`: The apcore Registry to register modules into.
    /// - `dry_run`: If true, skip registration and return results only.
    /// - `verify`: If true, verify modules are retrievable after registration.
    /// - `verifiers`: Optional custom verifiers run after the built-in check.
    ///
    /// # Verifier contract for registry-based modules
    ///
    /// Registry modules have no output file, so custom verifiers receive
    /// `path = ""`. Built-in file-based verifiers (`YAMLVerifier`, `JSONVerifier`,
    /// etc.) skip gracefully when path is empty. Custom verifiers must also
    /// handle `path = ""` without erroring — use `module_id` for any
    /// registry-based checks.
    pub fn write(
        &self,
        modules: &[ScannedModule],
        registry: &mut Registry,
        dry_run: bool,
        verify: bool,
        verifiers: Option<&[&dyn Verifier]>,
    ) -> Vec<WriteResult> {
        let mut results: Vec<WriteResult> = Vec::new();

        for module in modules {
            if dry_run {
                results.push(WriteResult::new(module.module_id.clone()));
                continue;
            }

            if !self.target_allowed(&module.target) {
                warn!(
                    module_id = %module.module_id,
                    target = %module.target,
                    "RegistryWriter: target rejected by allowed_prefixes"
                );
                results.push(WriteResult::failed(
                    module.module_id.clone(),
                    None,
                    format!(
                        "target '{}' is not in allowed_prefixes — registration refused",
                        module.target
                    ),
                ));
                continue;
            }

            let fm = self.to_function_module(module);
            // Register with a descriptor
            let descriptor = apcore::registry::registry::ModuleDescriptor {
                module_id: module.module_id.clone(),
                name: Some(module.module_id.clone()),
                description: module.description.clone(),
                documentation: module.documentation.clone(),
                input_schema: module.input_schema.clone(),
                output_schema: module.output_schema.clone(),
                version: module.version.clone(),
                tags: module.tags.clone(),
                annotations: module.annotations.clone(),
                examples: module.examples.clone(),
                metadata: module.metadata.clone(),
                display: module.display.clone(),
                sunset_date: None,
                dependencies: vec![],
                enabled: true,
            };
            // Note: unlike Python/TypeScript, Rust collects per-module registration errors
            // rather than aborting. This is intentional — partial registration is preferred
            // over a hard stop, giving callers the opportunity to inspect and handle each failure.
            if let Err(e) = registry.register(&module.module_id, Box::new(fm), descriptor) {
                warn!(
                    module_id = %module.module_id,
                    error = %e,
                    "RegistryWriter registration failed"
                );
                results.push(WriteResult::failed(
                    module.module_id.clone(),
                    None,
                    format!("Registration failed: {e}"),
                ));
                continue;
            }
            debug!("Registered module: {}", module.module_id);

            let mut result = WriteResult::new(module.module_id.clone());
            if verify {
                result = verify_registry(&result, &module.module_id, registry);
            }
            if result.verified {
                if let Some(vs) = verifiers {
                    let chain_result = run_verifier_chain(vs, "", &module.module_id);
                    if !chain_result.ok {
                        result = WriteResult::failed(
                            result.module_id,
                            result.path,
                            chain_result.error.unwrap_or_default(),
                        );
                    }
                }
            }
            results.push(result);
        }

        results
    }
}

impl RegistryWriter {
    /// Convert a ScannedModule to an apcore FunctionModule.
    ///
    /// If a handler factory is configured and resolves the target, uses the
    /// resolved handler. Otherwise falls back to a passthrough handler that
    /// returns inputs unchanged.
    fn to_function_module(&self, module: &ScannedModule) -> apcore::decorator::FunctionModule {
        let annotations = module.annotations.clone().unwrap_or_default();
        let input_schema = module.input_schema.clone();
        let output_schema = module.output_schema.clone();

        // Try to resolve the target via the handler factory
        if let Some(factory) = &self.handler_factory {
            if let Some(handler) = factory(&module.target) {
                return apcore::decorator::FunctionModule::new::<_, ()>(
                    annotations,
                    input_schema,
                    output_schema,
                    move |inputs: serde_json::Value,
                          ctx: &Context<serde_json::Value>|
                          -> Pin<
                        Box<
                            dyn std::future::Future<Output = Result<serde_json::Value, ModuleError>>
                                + Send
                                + '_,
                        >,
                    > { handler(inputs, ctx) },
                );
            }
        }

        // Fallback: passthrough handler (schema-only registration)
        debug!(
            module_id = %module.module_id,
            "RegistryWriter using passthrough handler (no HandlerFactory configured)",
        );
        fn passthrough<'a>(
            inputs: serde_json::Value,
            _ctx: &'a Context<serde_json::Value>,
        ) -> Pin<
            Box<
                dyn std::future::Future<Output = Result<serde_json::Value, ModuleError>>
                    + Send
                    + 'a,
            >,
        > {
            Box::pin(async move { Ok(inputs) })
        }

        apcore::decorator::FunctionModule::new::<_, ()>(
            annotations,
            input_schema,
            output_schema,
            passthrough,
        )
    }
}

/// Verify that a module was successfully registered and is retrievable.
fn verify_registry(result: &WriteResult, module_id: &str, registry: &Registry) -> WriteResult {
    let verifier = RegistryVerifier::new(registry);
    let vr = verifier.verify("", module_id);
    if vr.ok {
        result.clone()
    } else {
        WriteResult::failed(module_id.into(), None, vr.error.unwrap_or_default())
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    fn sample_module() -> ScannedModule {
        ScannedModule::new(
            "users.get".into(),
            "Get user".into(),
            json!({"type": "object"}),
            json!({"type": "object"}),
            vec!["users".into()],
            "app:get_user".into(),
        )
    }

    #[test]
    fn test_write_dry_run() {
        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let modules = vec![sample_module()];
        let results = writer.write(&modules, &mut registry, true, false, None);
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].module_id, "users.get");
        assert!(!registry.has("users.get"));
    }

    #[test]
    fn test_write_registers_module() {
        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let modules = vec![sample_module()];
        let results = writer.write(&modules, &mut registry, false, false, None);
        assert_eq!(results.len(), 1);
        assert!(registry.has("users.get"));
    }

    #[test]
    fn test_write_with_verify() {
        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let modules = vec![sample_module()];
        let results = writer.write(&modules, &mut registry, false, true, None);
        assert_eq!(results.len(), 1);
        assert!(results[0].verified);
    }

    #[test]
    fn test_write_empty_list() {
        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let results = writer.write(&[], &mut registry, false, false, None);
        assert!(results.is_empty());
    }

    #[test]
    fn test_custom_verifier_runs_even_when_verify_false() {
        // D11-011: verify=false skips the built-in registry check, but custom
        // verifiers must still run. A failing custom verifier with verify=false
        // should produce a result with verified=false.
        use crate::output::types::{Verifier, VerifyResult};

        struct AlwaysFail;
        impl Verifier for AlwaysFail {
            fn verify(&self, _path: &str, _module_id: &str) -> VerifyResult {
                VerifyResult::fail("custom verifier failed".into())
            }
        }

        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let modules = vec![sample_module()];
        let failing_verifier = AlwaysFail;
        let verifiers: &[&dyn Verifier] = &[&failing_verifier];
        // verify=false: built-in registry check skipped, but custom verifier runs
        let results = writer.write(&modules, &mut registry, false, false, Some(verifiers));
        assert_eq!(results.len(), 1);
        // Module was registered successfully
        assert!(registry.has("users.get"));
        // But custom verifier ran and failed — verified must be false
        assert!(
            !results[0].verified,
            "custom verifier must run even when verify=false; result: {:?}",
            results[0]
        );
        assert!(
            results[0]
                .verification_error
                .as_deref()
                .unwrap_or("")
                .contains("custom verifier failed"),
            "verification_error should contain the custom verifier message"
        );
    }

    #[test]
    fn test_write_multiple_modules() {
        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let modules = vec![
            ScannedModule::new(
                "mod.a".into(),
                "A".into(),
                json!({"type": "object"}),
                json!({"type": "object"}),
                vec![],
                "app:a".into(),
            ),
            ScannedModule::new(
                "mod.b".into(),
                "B".into(),
                json!({"type": "object"}),
                json!({"type": "object"}),
                vec![],
                "app:b".into(),
            ),
        ];
        let results = writer.write(&modules, &mut registry, false, false, None);
        assert_eq!(results.len(), 2);
        assert!(registry.has("mod.a"));
        assert!(registry.has("mod.b"));
        assert!(results[0].verified);
        assert!(results[1].verified);
    }

    // D11-2 regression: allowed_prefixes is a defence-in-depth allow-list on
    // the `target` field. A module whose target does not match any prefix
    // must be rejected with a failed WriteResult and never registered.
    #[test]
    fn test_allowed_prefixes_rejects_non_matching_target() {
        // Use module-path-only prefixes (no trailing colon) — matches the
        // canonical Python/TypeScript behavior where prefixes are dotted
        // module paths, not target strings with the `:callable` suffix.
        let writer =
            RegistryWriter::new().with_allowed_prefixes(vec!["app".into(), "myapp".into()]);
        let mut registry = Registry::new();
        let allowed = sample_module(); // target = "app:get_user"
        let denied = ScannedModule::new(
            "evil.module".into(),
            "Forged target".into(),
            json!({"type": "object"}),
            json!({"type": "object"}),
            vec![],
            "evil:run_attacker_code".into(),
        );
        let results = writer.write(&[allowed, denied], &mut registry, false, false, None);
        assert_eq!(results.len(), 2);
        // app:get_user is in allowed_prefixes — registered.
        assert!(registry.has("users.get"));
        assert!(results[0].verified);
        // evil:* is not — rejected, NOT registered.
        assert!(!registry.has("evil.module"));
        assert!(!results[1].verified);
        let err = results[1].verification_error.as_deref().unwrap_or("");
        assert!(
            err.contains("allowed_prefixes"),
            "rejection message should mention allowed_prefixes: got {err:?}"
        );
    }

    // D11-002 regression: boundary-aware module-path matching. Prefix `"myapp"`
    // must reject `"myappx.evil:fn"` (peer SDKs already reject; Rust used to
    // accept due to bare `starts_with`). Mirrors Python's
    // `_module_path_matches_prefix`.
    #[test]
    fn test_target_allowed_boundary_aware() {
        let writer = RegistryWriter::new().with_allowed_prefixes(vec!["myapp".into()]);
        // Exact match
        assert!(writer.target_allowed("myapp:fn"));
        // Dotted descendant
        assert!(writer.target_allowed("myapp.foo:fn"));
        assert!(writer.target_allowed("myapp.foo.bar:fn"));
        // Non-match: same character prefix without dotted boundary
        assert!(!writer.target_allowed("myappx.evil:fn"));
        assert!(!writer.target_allowed("myappx:fn"));
        // Unrelated module path
        assert!(!writer.target_allowed("other:fn"));

        // Nested prefix
        let writer2 = RegistryWriter::new().with_allowed_prefixes(vec!["myapp.foo".into()]);
        assert!(writer2.target_allowed("myapp.foo:fn"));
        assert!(writer2.target_allowed("myapp.foo.bar:fn"));
        assert!(!writer2.target_allowed("myapp.foobar:fn"));
        assert!(!writer2.target_allowed("myapp:fn"));

        // Trailing-dot tolerance and empty-prefix rejection
        let writer3 = RegistryWriter::new().with_allowed_prefixes(vec!["myapp.".into()]);
        assert!(writer3.target_allowed("myapp:fn"));
        let writer4 = RegistryWriter::new().with_allowed_prefixes(vec!["".into()]);
        assert!(!writer4.target_allowed("anything:fn"));
    }

    #[test]
    fn test_allowed_prefixes_default_none_admits_everything() {
        // Without allowed_prefixes set, target_allowed must return true for
        // every input — preserves existing behaviour for callers that have
        // not opted in.
        let writer = RegistryWriter::new();
        let mut registry = Registry::new();
        let module = ScannedModule::new(
            "any.module".into(),
            "Any target".into(),
            json!({"type": "object"}),
            json!({"type": "object"}),
            vec![],
            "anything-goes:func".into(),
        );
        let results = writer.write(&[module], &mut registry, false, false, None);
        assert_eq!(results.len(), 1);
        assert!(registry.has("any.module"));
    }
}