1use serde::{Deserialize, Serialize};
12use std::collections::{BTreeMap, BTreeSet};
13
14use crate::extension_inclusion::{ExtensionCategory, InclusionEntry, InclusionList};
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
24#[serde(rename_all = "snake_case")]
25pub enum HostCapability {
26 Read,
27 Write,
28 Exec,
29 Http,
30 Session,
31 Ui,
32 Log,
33 Env,
34 Tool,
35}
36
37impl HostCapability {
38 #[must_use]
40 pub fn from_str_loose(s: &str) -> Option<Self> {
41 match s.to_ascii_lowercase().as_str() {
42 "read" => Some(Self::Read),
43 "write" => Some(Self::Write),
44 "exec" => Some(Self::Exec),
45 "http" => Some(Self::Http),
46 "session" => Some(Self::Session),
47 "ui" => Some(Self::Ui),
48 "log" => Some(Self::Log),
49 "env" => Some(Self::Env),
50 "tool" => Some(Self::Tool),
51 _ => None,
52 }
53 }
54
55 #[must_use]
57 pub const fn all() -> &'static [Self] {
58 &[
59 Self::Read,
60 Self::Write,
61 Self::Exec,
62 Self::Http,
63 Self::Session,
64 Self::Ui,
65 Self::Log,
66 Self::Env,
67 Self::Tool,
68 ]
69 }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct ExpectedBehavior {
79 pub description: String,
81 pub protocol_surface: String,
83 pub pass_criteria: String,
85 pub fail_criteria: String,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ConformanceCell {
92 pub category: ExtensionCategory,
94 pub capability: HostCapability,
96 pub required: bool,
98 pub behaviors: Vec<ExpectedBehavior>,
100 pub exemplar_extensions: Vec<String>,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct FixtureAssignment {
111 pub cell_key: String,
113 pub fixture_extensions: Vec<String>,
115 pub min_fixtures: usize,
117 pub coverage_met: bool,
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct CategoryCriteria {
124 pub category: ExtensionCategory,
125 pub must_pass: Vec<String>,
127 pub failure_conditions: Vec<String>,
129 pub out_of_scope: Vec<String>,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ConformanceTestPlan {
136 pub schema: String,
137 pub generated_at: String,
138 pub task: String,
139 pub matrix: Vec<ConformanceCell>,
141 pub fixture_assignments: Vec<FixtureAssignment>,
143 pub category_criteria: Vec<CategoryCriteria>,
145 pub coverage: CoverageSummary,
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct CoverageSummary {
152 pub total_cells: usize,
153 pub required_cells: usize,
154 pub covered_cells: usize,
155 pub uncovered_required_cells: usize,
156 pub total_exemplar_extensions: usize,
157 pub categories_covered: usize,
158 pub capabilities_covered: usize,
159}
160
161const MAX_BEHAVIORS_PER_CELL: usize = 16;
167
168fn push_behavior_bounded(behaviors: &mut Vec<ExpectedBehavior>, behavior: ExpectedBehavior) {
170 if behaviors.len() < MAX_BEHAVIORS_PER_CELL {
171 behaviors.push(behavior);
172 }
173 }
175
176#[derive(Debug, Clone, Deserialize)]
178pub struct ApiMatrixEntry {
179 pub registration_types: Vec<String>,
180 pub hostcalls: Vec<String>,
181 pub capabilities_required: Vec<String>,
182 pub events_listened: Vec<String>,
183 pub node_apis: Vec<String>,
184 pub third_party_deps: Vec<String>,
185}
186
187#[derive(Debug, Clone, Deserialize)]
189pub struct ApiMatrix {
190 pub schema: String,
191 pub extensions: BTreeMap<String, ApiMatrixEntry>,
192}
193
194#[must_use]
196#[allow(clippy::too_many_lines)]
197fn build_behaviors(
198 category: &ExtensionCategory,
199 capability: HostCapability,
200) -> Vec<ExpectedBehavior> {
201 let mut behaviors = Vec::new();
202
203 if matches!(capability, HostCapability::Log) {
205 push_behavior_bounded(
206 &mut behaviors,
207 ExpectedBehavior {
208 description: "Extension load emits structured log".into(),
209 protocol_surface: "pi.ext.log.v1".into(),
210 pass_criteria: "Load event logged with correct extension_id and schema".into(),
211 fail_criteria: "Missing load log or wrong extension_id".into(),
212 },
213 );
214 return behaviors;
215 }
216
217 match category {
218 ExtensionCategory::Tool => match capability {
219 HostCapability::Read => behaviors.push(ExpectedBehavior {
220 description: "Tool reads files via pi.tool(read/grep/find/ls)".into(),
221 protocol_surface: "host_call(method=tool, name∈{read,grep,find,ls})".into(),
222 pass_criteria:
223 "Hostcall completes with correct file content; capability derived as read"
224 .into(),
225 fail_criteria: "Hostcall denied, wrong capability derivation, or incorrect content"
226 .into(),
227 }),
228 HostCapability::Write => behaviors.push(ExpectedBehavior {
229 description: "Tool writes/edits files via pi.tool(write/edit)".into(),
230 protocol_surface: "host_call(method=tool, name∈{write,edit})".into(),
231 pass_criteria: "Hostcall completes; file mutation applied correctly".into(),
232 fail_criteria: "Hostcall denied or file not mutated".into(),
233 }),
234 HostCapability::Exec => behaviors.push(ExpectedBehavior {
235 description: "Tool executes commands via pi.exec() or pi.tool(bash)".into(),
236 protocol_surface: "host_call(method=exec) or host_call(method=tool, name=bash)"
237 .into(),
238 pass_criteria: "Command runs, stdout/stderr/exitCode returned".into(),
239 fail_criteria: "Execution denied, timeout without error, or wrong exit code".into(),
240 }),
241 HostCapability::Http => behaviors.push(ExpectedBehavior {
242 description: "Tool makes HTTP requests via pi.http()".into(),
243 protocol_surface: "host_call(method=http)".into(),
244 pass_criteria: "Request sent, response returned with status/body".into(),
245 fail_criteria: "HTTP denied or malformed response".into(),
246 }),
247 _ => {}
248 },
249 ExtensionCategory::Command => match capability {
250 HostCapability::Ui => behaviors.push(ExpectedBehavior {
251 description: "Slash command prompts user via pi.ui.*".into(),
252 protocol_surface: "host_call(method=ui, op∈{select,input,confirm})".into(),
253 pass_criteria: "UI prompt dispatched and response routed back to handler".into(),
254 fail_criteria: "UI call denied in interactive mode or response lost".into(),
255 }),
256 HostCapability::Session => behaviors.push(ExpectedBehavior {
257 description: "Command accesses session state via pi.session.*".into(),
258 protocol_surface: "host_call(method=session)".into(),
259 pass_criteria: "Session data read/written correctly".into(),
260 fail_criteria: "Session call denied or data corrupted".into(),
261 }),
262 HostCapability::Exec => behaviors.push(ExpectedBehavior {
263 description: "Command executes shell commands".into(),
264 protocol_surface: "host_call(method=exec)".into(),
265 pass_criteria: "Execution succeeds with correct output".into(),
266 fail_criteria: "Execution denied or wrong output".into(),
267 }),
268 _ => {}
269 },
270 ExtensionCategory::Provider => match capability {
271 HostCapability::Http => behaviors.push(ExpectedBehavior {
272 description: "Provider streams LLM responses via pi.http()".into(),
273 protocol_surface: "host_call(method=http) + streamSimple streaming".into(),
274 pass_criteria: "HTTP request to LLM API succeeds; streaming chunks delivered"
275 .into(),
276 fail_criteria: "HTTP denied, stream broken, or chunks lost".into(),
277 }),
278 HostCapability::Read => behaviors.push(ExpectedBehavior {
279 description: "Provider reads local config files".into(),
280 protocol_surface: "host_call(method=tool, name=read) or pi.fs.read".into(),
281 pass_criteria: "Config file read succeeds".into(),
282 fail_criteria: "Read denied or file not found".into(),
283 }),
284 HostCapability::Env => behaviors.push(ExpectedBehavior {
285 description: "Provider accesses API keys via process.env".into(),
286 protocol_surface: "process.env access (capability=env)".into(),
287 pass_criteria: "Environment variable accessible when env capability granted".into(),
288 fail_criteria: "Env access denied when capability should be granted".into(),
289 }),
290 _ => {}
291 },
292 ExtensionCategory::EventHook => match capability {
293 HostCapability::Session => behaviors.push(ExpectedBehavior {
294 description: "Event hook reads/modifies session on lifecycle events".into(),
295 protocol_surface: "event_hook dispatch + host_call(method=session)".into(),
296 pass_criteria: "Hook fires on correct event; session mutations applied".into(),
297 fail_criteria: "Hook not fired, wrong event, or session mutation lost".into(),
298 }),
299 HostCapability::Ui => behaviors.push(ExpectedBehavior {
300 description: "Event hook renders UI elements".into(),
301 protocol_surface: "event_hook dispatch + host_call(method=ui)".into(),
302 pass_criteria: "UI elements rendered after hook fires".into(),
303 fail_criteria: "UI call fails or hook not dispatched".into(),
304 }),
305 HostCapability::Exec => behaviors.push(ExpectedBehavior {
306 description: "Event hook executes commands on events".into(),
307 protocol_surface: "event_hook dispatch + host_call(method=exec)".into(),
308 pass_criteria: "Command execution triggered by event".into(),
309 fail_criteria: "Execution denied or event not dispatched".into(),
310 }),
311 HostCapability::Http => behaviors.push(ExpectedBehavior {
312 description: "Event hook makes HTTP requests on events".into(),
313 protocol_surface: "event_hook dispatch + host_call(method=http)".into(),
314 pass_criteria: "HTTP request sent when event fires".into(),
315 fail_criteria: "HTTP denied or event not dispatched".into(),
316 }),
317 _ => {}
318 },
319 ExtensionCategory::UiComponent => {
320 if matches!(capability, HostCapability::Ui) {
321 push_behavior_bounded(
322 &mut behaviors,
323 ExpectedBehavior {
324 description: "UI component registers message renderer".into(),
325 protocol_surface: "registerMessageRenderer in register payload".into(),
326 pass_criteria: "Renderer registered and callable".into(),
327 fail_criteria: "Renderer not found in registration snapshot".into(),
328 },
329 );
330 }
331 }
332 ExtensionCategory::Configuration => match capability {
333 HostCapability::Ui => behaviors.push(ExpectedBehavior {
334 description: "Flag/shortcut activation triggers UI".into(),
335 protocol_surface: "register(flags/shortcuts) + host_call(method=ui)".into(),
336 pass_criteria: "Flag/shortcut registered; activation dispatches correctly".into(),
337 fail_criteria: "Registration missing or activation fails".into(),
338 }),
339 HostCapability::Session => behaviors.push(ExpectedBehavior {
340 description: "Flag modifies session configuration".into(),
341 protocol_surface: "register(flags) + host_call(method=session)".into(),
342 pass_criteria: "Flag value reflected in session state".into(),
343 fail_criteria: "Session state not updated after flag set".into(),
344 }),
345 _ => {}
346 },
347 ExtensionCategory::Multi => {
348 push_behavior_bounded(
351 &mut behaviors,
352 ExpectedBehavior {
353 description: format!(
354 "Multi-type extension uses {capability:?} across registrations"
355 ),
356 protocol_surface: format!(
357 "Multiple register types + host_call using {capability:?}"
358 ),
359 pass_criteria: "All registration types load; capability dispatched correctly"
360 .into(),
361 fail_criteria: "Any registration type fails or capability mismatch".into(),
362 },
363 );
364 }
365 ExtensionCategory::General => {
366 if matches!(capability, HostCapability::Session | HostCapability::Ui) {
367 push_behavior_bounded(
368 &mut behaviors,
369 ExpectedBehavior {
370 description: format!(
371 "General extension uses {capability:?} via export default"
372 ),
373 protocol_surface: format!(
374 "export default + host_call(method={capability:?})"
375 ),
376 pass_criteria: "Extension loads; hostcall dispatched and returns".into(),
377 fail_criteria: "Load failure or hostcall error".into(),
378 },
379 );
380 }
381 }
382 }
383
384 if matches!(capability, HostCapability::Tool) && !matches!(category, ExtensionCategory::Tool) {
386 push_behavior_bounded(
388 &mut behaviors,
389 ExpectedBehavior {
390 description: "Extension calls non-core tool via pi.tool()".into(),
391 protocol_surface: "host_call(method=tool, name=<non-core>)".into(),
392 pass_criteria: "Tool capability check applied; prompt/deny in strict mode".into(),
393 fail_criteria: "Tool call bypasses capability check".into(),
394 },
395 );
396 }
397
398 behaviors
399}
400
401#[must_use]
403const fn is_required_cell(category: &ExtensionCategory, capability: HostCapability) -> bool {
404 match category {
405 ExtensionCategory::Tool => matches!(
406 capability,
407 HostCapability::Read
408 | HostCapability::Write
409 | HostCapability::Exec
410 | HostCapability::Http
411 ),
412 ExtensionCategory::Command => {
413 matches!(capability, HostCapability::Ui | HostCapability::Session)
414 }
415 ExtensionCategory::Provider => {
416 matches!(capability, HostCapability::Http | HostCapability::Env)
417 }
418 ExtensionCategory::EventHook => matches!(
419 capability,
420 HostCapability::Session | HostCapability::Ui | HostCapability::Exec
421 ),
422 ExtensionCategory::UiComponent => matches!(capability, HostCapability::Ui),
423 ExtensionCategory::Configuration => {
424 matches!(capability, HostCapability::Ui | HostCapability::Session)
425 }
426 ExtensionCategory::Multi => true, ExtensionCategory::General => {
428 matches!(capability, HostCapability::Session | HostCapability::Ui)
429 }
430 }
431}
432
433#[must_use]
435#[allow(clippy::too_many_lines)]
436fn build_category_criteria() -> Vec<CategoryCriteria> {
437 vec![
438 CategoryCriteria {
439 category: ExtensionCategory::Tool,
440 must_pass: vec![
441 "registerTool present in registration snapshot".into(),
442 "Tool definition includes name, description, and JSON Schema parameters".into(),
443 "tool_call dispatch reaches handler and returns tool_result".into(),
444 "Hostcalls use correct capability derivation (read/write/exec per tool name)"
445 .into(),
446 ],
447 failure_conditions: vec![
448 "registerTool missing from snapshot".into(),
449 "Tool schema validation fails".into(),
450 "tool_call dispatch error or timeout".into(),
451 "Capability mismatch between declared and derived".into(),
452 ],
453 out_of_scope: vec![
454 "Tool output correctness beyond protocol conformance".into(),
455 "Performance benchmarks (covered by perf harness)".into(),
456 ],
457 },
458 CategoryCriteria {
459 category: ExtensionCategory::Command,
460 must_pass: vec![
461 "registerCommand/registerSlashCommand in registration snapshot".into(),
462 "Command definition includes name and description".into(),
463 "slash_command dispatch reaches handler and returns slash_result".into(),
464 "UI hostcalls (select/input/confirm) dispatch correctly".into(),
465 ],
466 failure_conditions: vec![
467 "Command missing from snapshot".into(),
468 "slash_command dispatch fails".into(),
469 "UI hostcall denied in interactive mode".into(),
470 ],
471 out_of_scope: vec!["Command business logic correctness".into()],
472 },
473 CategoryCriteria {
474 category: ExtensionCategory::Provider,
475 must_pass: vec![
476 "registerProvider in registration snapshot with model entries".into(),
477 "streamSimple callable and returns AsyncIterable<string>".into(),
478 "HTTP hostcalls dispatched with correct capability".into(),
479 "Stream cancellation propagates correctly".into(),
480 ],
481 failure_conditions: vec![
482 "Provider missing from snapshot".into(),
483 "streamSimple throws or hangs".into(),
484 "HTTP capability not derived correctly".into(),
485 "Cancellation does not terminate stream".into(),
486 ],
487 out_of_scope: vec![
488 "LLM response quality".into(),
489 "OAuth token refresh (separate test suite)".into(),
490 ],
491 },
492 CategoryCriteria {
493 category: ExtensionCategory::EventHook,
494 must_pass: vec![
495 "Event hooks registered for declared events".into(),
496 "Hook fires when event dispatched".into(),
497 "Hook can access session/UI/exec hostcalls as declared".into(),
498 "Hook errors do not crash the host".into(),
499 ],
500 failure_conditions: vec![
501 "Event hook not registered".into(),
502 "Hook does not fire on matching event".into(),
503 "Hostcall denied when capability is granted".into(),
504 "Hook error propagates as host crash".into(),
505 ],
506 out_of_scope: vec!["Hook side-effect correctness".into()],
507 },
508 CategoryCriteria {
509 category: ExtensionCategory::UiComponent,
510 must_pass: vec![
511 "registerMessageRenderer in registration snapshot".into(),
512 "Renderer callable with message content".into(),
513 "Rendered output is a valid string/markup".into(),
514 ],
515 failure_conditions: vec![
516 "Renderer missing from snapshot".into(),
517 "Renderer throws on valid input".into(),
518 ],
519 out_of_scope: vec!["Visual rendering correctness (requires UI testing)".into()],
520 },
521 CategoryCriteria {
522 category: ExtensionCategory::Configuration,
523 must_pass: vec![
524 "registerFlag/registerShortcut in registration snapshot".into(),
525 "Flag value readable after registration".into(),
526 "Shortcut activation dispatches correctly".into(),
527 ],
528 failure_conditions: vec![
529 "Flag/shortcut missing from snapshot".into(),
530 "Flag value not persisted".into(),
531 "Shortcut activation does not trigger handler".into(),
532 ],
533 out_of_scope: vec!["Configuration persistence across sessions".into()],
534 },
535 CategoryCriteria {
536 category: ExtensionCategory::Multi,
537 must_pass: vec![
538 "All declared registration types present in snapshot".into(),
539 "Each registration type independently functional".into(),
540 "Capabilities correctly derived for each registration type".into(),
541 ],
542 failure_conditions: vec![
543 "Any declared registration type missing".into(),
544 "Cross-type interaction causes error".into(),
545 ],
546 out_of_scope: vec!["Interaction semantics between registration types".into()],
547 },
548 CategoryCriteria {
549 category: ExtensionCategory::General,
550 must_pass: vec![
551 "Extension loads via export default without error".into(),
552 "Hostcalls dispatched correctly when used".into(),
553 ],
554 failure_conditions: vec![
555 "Load throws an error".into(),
556 "Hostcall denied when capability is granted".into(),
557 ],
558 out_of_scope: vec![
559 "Extensions with no hostcalls (load-only test is sufficient)".into(),
560 ],
561 },
562 ]
563}
564
565#[must_use]
567fn capabilities_from_api_entry(entry: &ApiMatrixEntry) -> BTreeSet<HostCapability> {
568 let mut caps = BTreeSet::new();
569 for cap_str in &entry.capabilities_required {
570 if let Some(cap) = HostCapability::from_str_loose(cap_str) {
571 caps.insert(cap);
572 }
573 }
574 for hc in &entry.hostcalls {
576 if hc.contains("http") {
577 caps.insert(HostCapability::Http);
578 }
579 if hc.contains("exec") {
580 caps.insert(HostCapability::Exec);
581 }
582 if hc.contains("session") {
583 caps.insert(HostCapability::Session);
584 }
585 if hc.contains("ui") {
586 caps.insert(HostCapability::Ui);
587 }
588 if hc.contains("events") {
589 caps.insert(HostCapability::Session);
590 }
591 }
592 for api in &entry.node_apis {
594 match api.as_str() {
595 "fs" | "path" => {
596 caps.insert(HostCapability::Read);
597 }
598 "child_process" | "process" => {
599 caps.insert(HostCapability::Exec);
600 }
601 "os" => {
602 caps.insert(HostCapability::Env);
603 }
604 _ => {}
606 }
607 }
608 caps
609}
610
611#[must_use]
616fn category_for_extension(
617 entry: &InclusionEntry,
618 api_entry: Option<&ApiMatrixEntry>,
619) -> ExtensionCategory {
620 if let Some(api) = api_entry {
621 let registrations = api
622 .registration_types
623 .iter()
624 .map(|registration| registration_type_to_classifier_name(registration))
625 .filter(|registration| !registration.is_empty())
626 .collect::<Vec<_>>();
627 if !registrations.is_empty() {
628 return crate::extension_inclusion::classify_registrations(®istrations);
629 }
630 }
631 entry.category.clone()
632}
633
634fn registration_type_to_classifier_name(registration: &str) -> String {
635 let trimmed = registration.trim();
636 if trimmed.is_empty() {
637 return String::new();
638 }
639 if trimmed.starts_with("register") {
640 return trimmed.to_string();
641 }
642
643 match trimmed.replace('-', "_").as_str() {
644 "tool" => "registerTool".to_string(),
645 "command" => "registerCommand".to_string(),
646 "slash_command" => "registerSlashCommand".to_string(),
647 "provider" => "registerProvider".to_string(),
648 "event" => "registerEvent".to_string(),
649 "event_hook" => "registerEventHook".to_string(),
650 "message_renderer" | "ui" => "registerMessageRenderer".to_string(),
651 "flag" => "registerFlag".to_string(),
652 "shortcut" => "registerShortcut".to_string(),
653 normalized => {
654 let suffix = normalized
655 .split('_')
656 .filter(|part| !part.is_empty())
657 .map(capitalize_first)
658 .collect::<String>();
659 if suffix.is_empty() {
660 String::new()
661 } else {
662 format!("register{suffix}")
663 }
664 }
665 }
666}
667
668fn capitalize_first(s: &str) -> String {
669 let mut c = s.chars();
670 c.next().map_or_else(String::new, |f| {
671 f.to_uppercase().collect::<String>() + c.as_str()
672 })
673}
674
675#[must_use]
677#[allow(clippy::too_many_lines)]
678pub fn build_test_plan(
679 inclusion: &InclusionList,
680 api_matrix: Option<&ApiMatrix>,
681 task_id: &str,
682) -> ConformanceTestPlan {
683 let all_entries: Vec<&InclusionEntry> = inclusion
685 .tier0
686 .iter()
687 .chain(inclusion.tier1.iter())
688 .chain(inclusion.tier1_review.iter())
689 .chain(inclusion.tier2.iter())
690 .collect();
691
692 let mut ext_map: BTreeMap<String, (ExtensionCategory, BTreeSet<HostCapability>)> =
694 BTreeMap::new();
695
696 for entry in &all_entries {
697 let api_entry = api_matrix.and_then(|m| m.extensions.get(&entry.id));
698 let cat = category_for_extension(entry, api_entry);
699 let caps = api_entry.map_or_else(BTreeSet::new, capabilities_from_api_entry);
700 ext_map.insert(entry.id.clone(), (cat, caps));
701 }
702
703 let categories = [
705 ExtensionCategory::Tool,
706 ExtensionCategory::Command,
707 ExtensionCategory::Provider,
708 ExtensionCategory::EventHook,
709 ExtensionCategory::UiComponent,
710 ExtensionCategory::Configuration,
711 ExtensionCategory::Multi,
712 ExtensionCategory::General,
713 ];
714
715 let mut matrix = Vec::new();
716 let mut fixture_assignments = Vec::new();
717
718 for category in &categories {
719 for capability in HostCapability::all() {
720 let behaviors = build_behaviors(category, *capability);
721 if behaviors.is_empty() {
722 continue;
723 }
724
725 let required = is_required_cell(category, *capability);
726
727 let exemplars: Vec<String> = ext_map
729 .iter()
730 .filter(|(_, (cat, caps))| cat == category && caps.contains(capability))
731 .map(|(id, _)| id.clone())
732 .collect();
733
734 let cell_key = format!("{category:?}:{capability:?}");
735
736 let min_fixtures = if required { 2 } else { 1 };
737 let coverage_met = exemplars.len() >= min_fixtures;
738
739 matrix.push(ConformanceCell {
740 category: category.clone(),
741 capability: *capability,
742 required,
743 behaviors,
744 exemplar_extensions: exemplars.clone(),
745 });
746
747 fixture_assignments.push(FixtureAssignment {
748 cell_key,
749 fixture_extensions: exemplars,
750 min_fixtures,
751 coverage_met,
752 });
753 }
754 }
755
756 let total_cells = matrix.len();
758 let required_cells = matrix.iter().filter(|c| c.required).count();
759 let covered_cells = fixture_assignments
760 .iter()
761 .filter(|a| a.coverage_met)
762 .count();
763 let uncovered_required_cells = fixture_assignments
764 .iter()
765 .filter(|a| {
766 !a.coverage_met
767 && matrix.iter().any(|c| {
768 format!("{:?}:{:?}", c.category, c.capability) == a.cell_key && c.required
769 })
770 })
771 .count();
772 let total_exemplars: BTreeSet<&str> = ext_map.keys().map(String::as_str).collect();
773 let categories_covered: std::collections::HashSet<String> = ext_map
774 .values()
775 .map(|(cat, _)| format!("{cat:?}"))
776 .collect();
777 let capabilities_covered: BTreeSet<&HostCapability> =
778 ext_map.values().flat_map(|(_, caps)| caps.iter()).collect();
779
780 let coverage = CoverageSummary {
781 total_cells,
782 required_cells,
783 covered_cells,
784 uncovered_required_cells,
785 total_exemplar_extensions: total_exemplars.len(),
786 categories_covered: categories_covered.len(),
787 capabilities_covered: capabilities_covered.len(),
788 };
789
790 let category_criteria = build_category_criteria();
791
792 ConformanceTestPlan {
793 schema: "pi.ext.conformance-matrix.v1".to_string(),
794 generated_at: crate::extension_validation::chrono_now_iso(),
795 task: task_id.to_string(),
796 matrix,
797 fixture_assignments,
798 category_criteria,
799 coverage,
800 }
801}
802
803#[cfg(test)]
808mod tests {
809 use super::*;
810
811 #[test]
812 fn host_capability_from_str_all_variants() {
813 assert_eq!(
814 HostCapability::from_str_loose("read"),
815 Some(HostCapability::Read)
816 );
817 assert_eq!(
818 HostCapability::from_str_loose("WRITE"),
819 Some(HostCapability::Write)
820 );
821 assert_eq!(
822 HostCapability::from_str_loose("Exec"),
823 Some(HostCapability::Exec)
824 );
825 assert_eq!(
826 HostCapability::from_str_loose("http"),
827 Some(HostCapability::Http)
828 );
829 assert_eq!(
830 HostCapability::from_str_loose("session"),
831 Some(HostCapability::Session)
832 );
833 assert_eq!(
834 HostCapability::from_str_loose("ui"),
835 Some(HostCapability::Ui)
836 );
837 assert_eq!(HostCapability::from_str_loose("unknown"), None);
838 }
839
840 #[test]
841 fn build_behaviors_tool_read() {
842 let behaviors = build_behaviors(&ExtensionCategory::Tool, HostCapability::Read);
843 assert_eq!(behaviors.len(), 1);
844 assert!(behaviors[0].description.contains("reads files"));
845 }
846
847 #[test]
848 fn build_behaviors_provider_http() {
849 let behaviors = build_behaviors(&ExtensionCategory::Provider, HostCapability::Http);
850 assert_eq!(behaviors.len(), 1);
851 assert!(behaviors[0].description.contains("streams LLM"));
852 }
853
854 #[test]
855 fn build_behaviors_empty_for_irrelevant_cell() {
856 let behaviors = build_behaviors(&ExtensionCategory::UiComponent, HostCapability::Exec);
857 assert!(behaviors.is_empty());
858 }
859
860 #[test]
861 fn is_required_tool_read() {
862 assert!(is_required_cell(
863 &ExtensionCategory::Tool,
864 HostCapability::Read
865 ));
866 }
867
868 #[test]
869 fn is_required_provider_http() {
870 assert!(is_required_cell(
871 &ExtensionCategory::Provider,
872 HostCapability::Http
873 ));
874 }
875
876 #[test]
877 fn not_required_tool_session() {
878 assert!(!is_required_cell(
879 &ExtensionCategory::Tool,
880 HostCapability::Session
881 ));
882 }
883
884 #[test]
885 fn capabilities_from_api_entry_basic() {
886 let entry = ApiMatrixEntry {
887 registration_types: vec!["tool".into()],
888 hostcalls: vec!["pi.http()".into()],
889 capabilities_required: vec!["read".into(), "write".into()],
890 events_listened: vec![],
891 node_apis: vec!["fs".into()],
892 third_party_deps: vec![],
893 };
894 let caps = capabilities_from_api_entry(&entry);
895 assert!(caps.contains(&HostCapability::Read));
896 assert!(caps.contains(&HostCapability::Write));
897 assert!(caps.contains(&HostCapability::Http));
898 }
899
900 #[test]
901 fn category_criteria_all_categories_covered() {
902 let criteria = build_category_criteria();
903 assert_eq!(criteria.len(), 8); let cats: Vec<_> = criteria.iter().map(|c| &c.category).collect();
905 assert!(cats.contains(&&ExtensionCategory::Tool));
906 assert!(cats.contains(&&ExtensionCategory::Provider));
907 assert!(cats.contains(&&ExtensionCategory::General));
908 }
909
910 #[test]
911 fn build_test_plan_empty_inclusion() {
912 let inclusion = InclusionList {
913 schema: "pi.ext.inclusion.v1".into(),
914 generated_at: "2026-01-01T00:00:00Z".into(),
915 task: Some("test".into()),
916 stats: Some(crate::extension_inclusion::InclusionStats {
917 total_included: 0,
918 tier0_count: 0,
919 tier1_count: 0,
920 tier2_count: 0,
921 excluded_count: 0,
922 pinned_npm: 0,
923 pinned_git: 0,
924 pinned_url: 0,
925 pinned_checksum_only: 0,
926 }),
927 tier0: vec![],
928 tier1: vec![],
929 tier2: vec![],
930 exclusions: vec![],
931 category_coverage: std::collections::HashMap::new(),
932 summary: None,
933 tier1_review: vec![],
934 coverage: None,
935 exclusion_notes: vec![],
936 };
937
938 let plan = build_test_plan(&inclusion, None, "test-task");
939 assert_eq!(plan.schema, "pi.ext.conformance-matrix.v1");
940 assert!(!plan.matrix.is_empty()); assert_eq!(plan.coverage.total_exemplar_extensions, 0);
942 }
943
944 #[test]
945 fn capitalize_first_works() {
946 assert_eq!(capitalize_first("tool"), "Tool");
947 assert_eq!(capitalize_first(""), "");
948 assert_eq!(capitalize_first("a"), "A");
949 }
950
951 #[test]
952 fn registration_type_to_classifier_name_handles_snake_case_event_hook() {
953 assert_eq!(
954 registration_type_to_classifier_name("event_hook"),
955 "registerEventHook"
956 );
957 }
958
959 #[test]
960 fn category_for_extension_uses_api_matrix_event_hook_category() {
961 let entry = InclusionEntry {
962 id: "event-hook-ext".into(),
963 name: Some("event-hook-ext".into()),
964 tier: Some("tier-1".into()),
965 score: Some(50.0),
966 category: ExtensionCategory::General,
967 registrations: Vec::new(),
968 version_pin: None,
969 sha256: None,
970 artifact_path: None,
971 license: None,
972 source_tier: None,
973 rationale: None,
974 directory: None,
975 provenance: None,
976 capabilities: None,
977 risk_level: None,
978 inclusion_rationale: None,
979 };
980 let api_entry = ApiMatrixEntry {
981 registration_types: vec!["event_hook".into()],
982 hostcalls: Vec::new(),
983 capabilities_required: Vec::new(),
984 events_listened: Vec::new(),
985 node_apis: Vec::new(),
986 third_party_deps: Vec::new(),
987 };
988
989 assert_eq!(
990 category_for_extension(&entry, Some(&api_entry)),
991 ExtensionCategory::EventHook
992 );
993 }
994
995 #[test]
996 fn category_for_extension_uses_api_matrix_multi_type_category() {
997 let entry = InclusionEntry {
998 id: "event-provider-ext".into(),
999 name: Some("event-provider-ext".into()),
1000 tier: Some("tier-1".into()),
1001 score: Some(70.0),
1002 category: ExtensionCategory::Provider,
1003 registrations: Vec::new(),
1004 version_pin: None,
1005 sha256: None,
1006 artifact_path: None,
1007 license: None,
1008 source_tier: None,
1009 rationale: None,
1010 directory: None,
1011 provenance: None,
1012 capabilities: None,
1013 risk_level: None,
1014 inclusion_rationale: None,
1015 };
1016 let api_entry = ApiMatrixEntry {
1017 registration_types: vec!["event_hook".into(), "provider".into()],
1018 hostcalls: Vec::new(),
1019 capabilities_required: Vec::new(),
1020 events_listened: Vec::new(),
1021 node_apis: Vec::new(),
1022 third_party_deps: Vec::new(),
1023 };
1024
1025 assert_eq!(
1026 category_for_extension(&entry, Some(&api_entry)),
1027 ExtensionCategory::Multi
1028 );
1029 }
1030
1031 #[test]
1032 fn host_capability_all_count() {
1033 assert_eq!(HostCapability::all().len(), 9);
1034 }
1035
1036 #[test]
1037 fn serde_roundtrip_host_capability() {
1038 let cap = HostCapability::Http;
1039 let json = serde_json::to_string(&cap).expect("serialize HostCapability");
1040 assert_eq!(json, "\"http\"");
1041 let back: HostCapability = serde_json::from_str(&json).expect("deserialize HostCapability");
1042 assert_eq!(back, cap);
1043 }
1044
1045 #[test]
1046 fn serde_roundtrip_conformance_cell() {
1047 let cell = ConformanceCell {
1048 category: ExtensionCategory::Tool,
1049 capability: HostCapability::Read,
1050 required: true,
1051 behaviors: vec![ExpectedBehavior {
1052 description: "test".into(),
1053 protocol_surface: "test".into(),
1054 pass_criteria: "test".into(),
1055 fail_criteria: "test".into(),
1056 }],
1057 exemplar_extensions: vec!["hello".into()],
1058 };
1059 let json = serde_json::to_string(&cell).expect("serialize ConformanceCell");
1060 let back: ConformanceCell =
1061 serde_json::from_str(&json).expect("deserialize ConformanceCell");
1062 assert_eq!(back.category, ExtensionCategory::Tool);
1063 assert!(back.required);
1064 }
1065
1066 mod proptest_conformance_matrix {
1067 use super::*;
1068 use proptest::prelude::*;
1069
1070 const ALL_CAP_NAMES: &[&str] = &[
1071 "read", "write", "exec", "http", "session", "ui", "log", "env", "tool",
1072 ];
1073
1074 const fn category_from_index(index: usize) -> ExtensionCategory {
1075 match index {
1076 0 => ExtensionCategory::Tool,
1077 1 => ExtensionCategory::Command,
1078 2 => ExtensionCategory::Provider,
1079 3 => ExtensionCategory::EventHook,
1080 4 => ExtensionCategory::UiComponent,
1081 5 => ExtensionCategory::Configuration,
1082 6 => ExtensionCategory::Multi,
1083 _ => ExtensionCategory::General,
1084 }
1085 }
1086
1087 fn mask_case(input: &str, upper_mask: &[bool]) -> String {
1088 input
1089 .chars()
1090 .zip(upper_mask.iter().copied())
1091 .map(
1092 |(ch, upper)| {
1093 if upper { ch.to_ascii_uppercase() } else { ch }
1094 },
1095 )
1096 .collect()
1097 }
1098
1099 fn make_inclusion_entry(id: String, category: ExtensionCategory) -> InclusionEntry {
1100 InclusionEntry {
1101 id,
1102 name: None,
1103 tier: None,
1104 score: None,
1105 category,
1106 registrations: Vec::new(),
1107 version_pin: None,
1108 sha256: None,
1109 artifact_path: None,
1110 license: None,
1111 source_tier: None,
1112 rationale: None,
1113 directory: None,
1114 provenance: None,
1115 capabilities: None,
1116 risk_level: None,
1117 inclusion_rationale: None,
1118 }
1119 }
1120
1121 fn build_synthetic_plan(
1122 specs: &[(usize, Vec<usize>)],
1123 reverse_tier_order: bool,
1124 ) -> ConformanceTestPlan {
1125 let mut tier0 = specs
1126 .iter()
1127 .enumerate()
1128 .map(|(idx, (cat_idx, _))| {
1129 make_inclusion_entry(format!("ext-{idx}"), category_from_index(*cat_idx))
1130 })
1131 .collect::<Vec<_>>();
1132
1133 if reverse_tier_order {
1134 tier0.reverse();
1135 }
1136
1137 let inclusion = InclusionList {
1138 schema: "pi.ext.inclusion.v1".to_string(),
1139 generated_at: "2026-01-01T00:00:00Z".to_string(),
1140 task: Some("prop-generated".to_string()),
1141 stats: None,
1142 tier0,
1143 tier1: Vec::new(),
1144 tier2: Vec::new(),
1145 exclusions: Vec::new(),
1146 category_coverage: std::collections::HashMap::new(),
1147 summary: None,
1148 tier1_review: Vec::new(),
1149 coverage: None,
1150 exclusion_notes: Vec::new(),
1151 };
1152
1153 let extensions = specs
1154 .iter()
1155 .enumerate()
1156 .map(|(idx, (_, cap_indices))| {
1157 let id = format!("ext-{idx}");
1158 let entry = ApiMatrixEntry {
1159 registration_types: Vec::new(),
1160 hostcalls: Vec::new(),
1161 capabilities_required: cap_indices
1162 .iter()
1163 .map(|cap_idx| ALL_CAP_NAMES[*cap_idx].to_string())
1164 .collect(),
1165 events_listened: Vec::new(),
1166 node_apis: Vec::new(),
1167 third_party_deps: Vec::new(),
1168 };
1169 (id, entry)
1170 })
1171 .collect::<BTreeMap<_, _>>();
1172
1173 let api_matrix = ApiMatrix {
1174 schema: "pi.ext.api-matrix.v1".to_string(),
1175 extensions,
1176 };
1177
1178 build_test_plan(&inclusion, Some(&api_matrix), "prop-generated")
1179 }
1180
1181 proptest! {
1182 #[test]
1184 fn from_str_loose_case_insensitive(idx in 0..ALL_CAP_NAMES.len()) {
1185 let name = ALL_CAP_NAMES[idx];
1186 let lower = HostCapability::from_str_loose(name);
1187 let upper = HostCapability::from_str_loose(&name.to_uppercase());
1188 let mixed = HostCapability::from_str_loose(&capitalize_first(name));
1189 assert_eq!(lower, upper);
1190 assert_eq!(lower, mixed);
1191 assert!(lower.is_some());
1192 }
1193
1194 #[test]
1196 fn from_str_loose_arbitrary_case_masks(
1197 idx in 0..ALL_CAP_NAMES.len(),
1198 upper_mask in prop::collection::vec(any::<bool>(), 0..64usize),
1199 ) {
1200 let canonical = ALL_CAP_NAMES[idx];
1201 let mut effective_mask = upper_mask;
1202 effective_mask.resize(canonical.len(), false);
1203 effective_mask.truncate(canonical.len());
1204 let variant = mask_case(canonical, &effective_mask);
1205
1206 assert_eq!(
1207 HostCapability::from_str_loose(canonical),
1208 HostCapability::from_str_loose(&variant)
1209 );
1210 }
1211
1212 #[test]
1214 fn from_str_loose_unknown(s in "[a-z]{10,20}") {
1215 if !ALL_CAP_NAMES.contains(&s.as_str()) {
1216 assert!(HostCapability::from_str_loose(&s).is_none());
1217 }
1218 }
1219
1220 #[test]
1222 fn all_count(_dummy in 0..1u8) {
1223 assert_eq!(HostCapability::all().len(), 9);
1224 }
1225
1226 #[test]
1228 fn capability_serde_roundtrip(idx in 0..9usize) {
1229 let cap = HostCapability::all()[idx];
1230 let json = serde_json::to_string(&cap).expect("serialize HostCapability in proptest");
1231 let back: HostCapability = serde_json::from_str(&json).expect("deserialize HostCapability in proptest");
1232 assert_eq!(cap, back);
1233 }
1234
1235 #[test]
1237 fn multi_requires_all(idx in 0..9usize) {
1238 let cap = HostCapability::all()[idx];
1239 assert!(is_required_cell(&ExtensionCategory::Multi, cap));
1240 }
1241
1242 #[test]
1244 fn required_cell_deterministic(cat_idx in 0..8usize, cap_idx in 0..9usize) {
1245 let cats = [
1246 ExtensionCategory::Tool,
1247 ExtensionCategory::Command,
1248 ExtensionCategory::Provider,
1249 ExtensionCategory::EventHook,
1250 ExtensionCategory::UiComponent,
1251 ExtensionCategory::Configuration,
1252 ExtensionCategory::Multi,
1253 ExtensionCategory::General,
1254 ];
1255 let cap = HostCapability::all()[cap_idx];
1256 let first = is_required_cell(&cats[cat_idx], cap);
1257 let second = is_required_cell(&cats[cat_idx], cap);
1258 assert_eq!(first, second);
1259 }
1260
1261 #[test]
1263 fn capitalize_first_empty(_dummy in 0..1u8) {
1264 assert_eq!(capitalize_first(""), "");
1265 }
1266
1267 #[test]
1269 fn capitalize_first_works(s in "[a-z]{1,20}") {
1270 let result = capitalize_first(&s);
1271 let first = result.chars().next().expect("capitalize_first should return non-empty string");
1272 assert!(first.is_uppercase());
1273 assert_eq!(&result[first.len_utf8()..], &s[1..]);
1274 }
1275
1276 #[test]
1278 fn capitalize_first_idempotent(s in "[A-Z][a-z]{0,15}") {
1279 assert_eq!(capitalize_first(&s), s);
1280 }
1281
1282 #[test]
1284 fn build_behaviors_never_panics(cat_idx in 0..8usize, cap_idx in 0..9usize) {
1285 let cats = [
1286 ExtensionCategory::Tool,
1287 ExtensionCategory::Command,
1288 ExtensionCategory::Provider,
1289 ExtensionCategory::EventHook,
1290 ExtensionCategory::UiComponent,
1291 ExtensionCategory::Configuration,
1292 ExtensionCategory::Multi,
1293 ExtensionCategory::General,
1294 ];
1295 let cap = HostCapability::all()[cap_idx];
1296 let behaviors = build_behaviors(&cats[cat_idx], cap);
1297 for b in &behaviors {
1299 assert!(!b.description.is_empty());
1300 assert!(!b.protocol_surface.is_empty());
1301 assert!(!b.pass_criteria.is_empty());
1302 assert!(!b.fail_criteria.is_empty());
1303 }
1304 }
1305
1306 #[test]
1308 fn build_test_plan_coverage_invariants(task_id in "[a-z0-9_-]{1,32}") {
1309 let inclusion = InclusionList {
1310 schema: "pi.ext.inclusion.v1".to_string(),
1311 generated_at: "2026-01-01T00:00:00Z".to_string(),
1312 task: Some(task_id.clone()),
1313 stats: None,
1314 tier0: Vec::new(),
1315 tier1: Vec::new(),
1316 tier2: Vec::new(),
1317 exclusions: Vec::new(),
1318 category_coverage: std::collections::HashMap::new(),
1319 summary: None,
1320 tier1_review: Vec::new(),
1321 coverage: None,
1322 exclusion_notes: Vec::new(),
1323 };
1324
1325 let plan = build_test_plan(&inclusion, None, &task_id);
1326 assert_eq!(plan.task, task_id);
1327 assert_eq!(plan.coverage.total_cells, plan.matrix.len());
1328 assert_eq!(plan.fixture_assignments.len(), plan.matrix.len());
1329 assert!(plan.coverage.required_cells <= plan.coverage.total_cells);
1330 assert!(plan.coverage.covered_cells <= plan.coverage.total_cells);
1331 assert!(plan.coverage.uncovered_required_cells <= plan.coverage.required_cells);
1332
1333 for assignment in &plan.fixture_assignments {
1334 let matches = plan
1335 .matrix
1336 .iter()
1337 .filter(|cell| format!("{:?}:{:?}", cell.category, cell.capability) == assignment.cell_key)
1338 .count();
1339 assert_eq!(matches, 1);
1340 }
1341 }
1342
1343 #[test]
1345 fn build_test_plan_fixture_thresholds_align_with_required_cells(
1346 specs in prop::collection::vec(
1347 (
1348 0usize..8usize,
1349 prop::collection::vec(0usize..ALL_CAP_NAMES.len(), 0..12usize),
1350 ),
1351 0..24usize
1352 )
1353 ) {
1354 let plan = build_synthetic_plan(&specs, false);
1355 let required_by_key = plan
1356 .matrix
1357 .iter()
1358 .map(|cell| {
1359 (
1360 format!("{:?}:{:?}", cell.category, cell.capability),
1361 cell.required,
1362 )
1363 })
1364 .collect::<std::collections::BTreeMap<_, _>>();
1365
1366 for assignment in &plan.fixture_assignments {
1367 let required = required_by_key.get(&assignment.cell_key);
1368 prop_assert!(required.is_some());
1369 let min_expected = if *required.expect("present") { 2 } else { 1 };
1370 prop_assert_eq!(assignment.min_fixtures, min_expected);
1371 prop_assert_eq!(
1372 assignment.coverage_met,
1373 assignment.fixture_extensions.len() >= assignment.min_fixtures
1374 );
1375 }
1376
1377 let uncovered_required = plan
1378 .fixture_assignments
1379 .iter()
1380 .filter(|assignment| {
1381 !assignment.coverage_met
1382 && required_by_key
1383 .get(&assignment.cell_key)
1384 .is_some_and(|required| *required)
1385 })
1386 .count();
1387 prop_assert_eq!(plan.coverage.uncovered_required_cells, uncovered_required);
1388 }
1389
1390 #[test]
1392 fn build_test_plan_shape_is_stable_under_tier_reordering(
1393 specs in prop::collection::vec(
1394 (
1395 0usize..8usize,
1396 prop::collection::vec(0usize..ALL_CAP_NAMES.len(), 0..12usize),
1397 ),
1398 0..24usize
1399 )
1400 ) {
1401 let forward = build_synthetic_plan(&specs, false);
1402 let reversed = build_synthetic_plan(&specs, true);
1403
1404 let forward_matrix = serde_json::to_string(&forward.matrix).expect("serialize matrix");
1405 let reversed_matrix = serde_json::to_string(&reversed.matrix).expect("serialize matrix");
1406 prop_assert_eq!(forward_matrix, reversed_matrix);
1407
1408 let forward_assignments =
1409 serde_json::to_string(&forward.fixture_assignments).expect("serialize assignments");
1410 let reversed_assignments =
1411 serde_json::to_string(&reversed.fixture_assignments).expect("serialize assignments");
1412 prop_assert_eq!(forward_assignments, reversed_assignments);
1413
1414 let forward_coverage =
1415 serde_json::to_string(&forward.coverage).expect("serialize coverage");
1416 let reversed_coverage =
1417 serde_json::to_string(&reversed.coverage).expect("serialize coverage");
1418 prop_assert_eq!(forward_coverage, reversed_coverage);
1419 }
1420
1421 #[test]
1423 fn capabilities_from_api_entry_includes_declared_valid_capabilities(
1424 cap_indices in proptest::collection::vec(0usize..ALL_CAP_NAMES.len(), 0..24usize)
1425 ) {
1426 let declared = cap_indices
1427 .iter()
1428 .map(|idx| ALL_CAP_NAMES[*idx].to_string())
1429 .collect::<Vec<_>>();
1430 let entry = ApiMatrixEntry {
1431 registration_types: vec!["tool".to_string()],
1432 hostcalls: Vec::new(),
1433 capabilities_required: declared.clone(),
1434 events_listened: Vec::new(),
1435 node_apis: Vec::new(),
1436 third_party_deps: Vec::new(),
1437 };
1438 let computed = capabilities_from_api_entry(&entry);
1439 for cap in declared {
1440 let parsed = HostCapability::from_str_loose(&cap).expect("declared capability must parse");
1441 assert!(computed.contains(&parsed));
1442 }
1443 }
1444 }
1445 }
1446}