Skip to main content

pi/
extension_conformance_matrix.rs

1//! Conformance test matrix for Pi extensions.
2//!
3//! Maps `ExtensionCategory × HostCapability → ExpectedBehavior` to produce
4//! a concrete test plan.  Each cell in the matrix is a `ConformanceCell`
5//! that specifies what the runtime MUST validate for that combination.
6//!
7//! The matrix is populated from the inclusion list, the API matrix, and the
8//! validated manifest so that every extension shape and capability requirement
9//! has an explicit test target.
10
11use serde::{Deserialize, Serialize};
12use std::collections::{BTreeMap, BTreeSet, HashMap};
13
14use crate::extension_inclusion::{ExtensionCategory, InclusionEntry, InclusionList};
15
16// ────────────────────────────────────────────────────────────────────────────
17// Host capabilities (canonical)
18// ────────────────────────────────────────────────────────────────────────────
19
20/// Host capabilities that extensions may require.
21///
22/// These map 1:1 to the capability taxonomy in EXTENSIONS.md §3.2A.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
24#[serde(rename_all = "snake_case")]
25pub enum HostCapability {
26    Read,
27    Write,
28    Exec,
29    Http,
30    Session,
31    Ui,
32    Log,
33    Env,
34    Tool,
35}
36
37impl HostCapability {
38    /// Parse a capability string (case-insensitive).
39    #[must_use]
40    pub fn from_str_loose(s: &str) -> Option<Self> {
41        match s.to_ascii_lowercase().as_str() {
42            "read" => Some(Self::Read),
43            "write" => Some(Self::Write),
44            "exec" => Some(Self::Exec),
45            "http" => Some(Self::Http),
46            "session" => Some(Self::Session),
47            "ui" => Some(Self::Ui),
48            "log" => Some(Self::Log),
49            "env" => Some(Self::Env),
50            "tool" => Some(Self::Tool),
51            _ => None,
52        }
53    }
54
55    /// All defined capabilities (sorted).
56    #[must_use]
57    pub const fn all() -> &'static [Self] {
58        &[
59            Self::Read,
60            Self::Write,
61            Self::Exec,
62            Self::Http,
63            Self::Session,
64            Self::Ui,
65            Self::Log,
66            Self::Env,
67            Self::Tool,
68        ]
69    }
70}
71
72// ────────────────────────────────────────────────────────────────────────────
73// Expected behaviors
74// ────────────────────────────────────────────────────────────────────────────
75
76/// What the conformance harness MUST verify for a given cell.
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct ExpectedBehavior {
79    /// Short description of what is being tested.
80    pub description: String,
81    /// The specific protocol message or hostcall being validated.
82    pub protocol_surface: String,
83    /// Pass/fail criteria (human-readable).
84    pub pass_criteria: String,
85    /// Fail criteria (what constitutes a failure).
86    pub fail_criteria: String,
87}
88
89/// A single cell in the conformance matrix.
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ConformanceCell {
92    /// Extension category (row).
93    pub category: ExtensionCategory,
94    /// Host capability (column).
95    pub capability: HostCapability,
96    /// Whether this combination is required (must test) vs optional.
97    pub required: bool,
98    /// Expected behaviors to validate.
99    pub behaviors: Vec<ExpectedBehavior>,
100    /// Extensions from the inclusion list that exercise this cell.
101    pub exemplar_extensions: Vec<String>,
102}
103
104// ────────────────────────────────────────────────────────────────────────────
105// Test plan
106// ────────────────────────────────────────────────────────────────────────────
107
108/// A fixture assignment linking a conformance cell to concrete test fixtures.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct FixtureAssignment {
111    /// Cell key: `"{category}:{capability}"`.
112    pub cell_key: String,
113    /// Extension IDs that serve as test fixtures for this cell.
114    pub fixture_extensions: Vec<String>,
115    /// Minimum number of fixtures required for adequate coverage.
116    pub min_fixtures: usize,
117    /// Whether the minimum is met.
118    pub coverage_met: bool,
119}
120
121/// Pass/fail criteria for an extension category.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct CategoryCriteria {
124    pub category: ExtensionCategory,
125    /// What MUST happen for any extension of this category to pass.
126    pub must_pass: Vec<String>,
127    /// What constitutes a failure.
128    pub failure_conditions: Vec<String>,
129    /// What is not tested (out of scope).
130    pub out_of_scope: Vec<String>,
131}
132
133/// The complete test plan document.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ConformanceTestPlan {
136    pub schema: String,
137    pub generated_at: String,
138    pub task: String,
139    /// The matrix: category × capability → cell.
140    pub matrix: Vec<ConformanceCell>,
141    /// Fixture assignments: which extensions validate which cells.
142    pub fixture_assignments: Vec<FixtureAssignment>,
143    /// Per-category pass/fail criteria.
144    pub category_criteria: Vec<CategoryCriteria>,
145    /// Coverage summary.
146    pub coverage: CoverageSummary,
147}
148
149/// Coverage summary for the test plan.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct CoverageSummary {
152    pub total_cells: usize,
153    pub required_cells: usize,
154    pub covered_cells: usize,
155    pub uncovered_required_cells: usize,
156    pub total_exemplar_extensions: usize,
157    pub categories_covered: usize,
158    pub capabilities_covered: usize,
159}
160
161// ────────────────────────────────────────────────────────────────────────────
162// Matrix builder
163// ────────────────────────────────────────────────────────────────────────────
164
165/// API matrix entry from `docs/extension-api-matrix.json`.
166#[derive(Debug, Clone, Deserialize)]
167pub struct ApiMatrixEntry {
168    pub registration_types: Vec<String>,
169    pub hostcalls: Vec<String>,
170    pub capabilities_required: Vec<String>,
171    pub events_listened: Vec<String>,
172    pub node_apis: Vec<String>,
173    pub third_party_deps: Vec<String>,
174}
175
176/// The top-level API matrix document.
177#[derive(Debug, Clone, Deserialize)]
178pub struct ApiMatrix {
179    pub schema: String,
180    pub extensions: HashMap<String, ApiMatrixEntry>,
181}
182
183/// Build the canonical expected behaviors for a category × capability pair.
184#[must_use]
185#[allow(clippy::too_many_lines)]
186fn build_behaviors(
187    category: &ExtensionCategory,
188    capability: HostCapability,
189) -> Vec<ExpectedBehavior> {
190    let mut behaviors = Vec::new();
191
192    // Registration behaviors (universal for all categories)
193    if matches!(capability, HostCapability::Log) {
194        behaviors.push(ExpectedBehavior {
195            description: "Extension load emits structured log".into(),
196            protocol_surface: "pi.ext.log.v1".into(),
197            pass_criteria: "Load event logged with correct extension_id and schema".into(),
198            fail_criteria: "Missing load log or wrong extension_id".into(),
199        });
200        return behaviors;
201    }
202
203    match category {
204        ExtensionCategory::Tool => match capability {
205            HostCapability::Read => behaviors.push(ExpectedBehavior {
206                description: "Tool reads files via pi.tool(read/grep/find/ls)".into(),
207                protocol_surface: "host_call(method=tool, name∈{read,grep,find,ls})".into(),
208                pass_criteria:
209                    "Hostcall completes with correct file content; capability derived as read"
210                        .into(),
211                fail_criteria: "Hostcall denied, wrong capability derivation, or incorrect content"
212                    .into(),
213            }),
214            HostCapability::Write => behaviors.push(ExpectedBehavior {
215                description: "Tool writes/edits files via pi.tool(write/edit)".into(),
216                protocol_surface: "host_call(method=tool, name∈{write,edit})".into(),
217                pass_criteria: "Hostcall completes; file mutation applied correctly".into(),
218                fail_criteria: "Hostcall denied or file not mutated".into(),
219            }),
220            HostCapability::Exec => behaviors.push(ExpectedBehavior {
221                description: "Tool executes commands via pi.exec() or pi.tool(bash)".into(),
222                protocol_surface: "host_call(method=exec) or host_call(method=tool, name=bash)"
223                    .into(),
224                pass_criteria: "Command runs, stdout/stderr/exitCode returned".into(),
225                fail_criteria: "Execution denied, timeout without error, or wrong exit code".into(),
226            }),
227            HostCapability::Http => behaviors.push(ExpectedBehavior {
228                description: "Tool makes HTTP requests via pi.http()".into(),
229                protocol_surface: "host_call(method=http)".into(),
230                pass_criteria: "Request sent, response returned with status/body".into(),
231                fail_criteria: "HTTP denied or malformed response".into(),
232            }),
233            _ => {}
234        },
235        ExtensionCategory::Command => match capability {
236            HostCapability::Ui => behaviors.push(ExpectedBehavior {
237                description: "Slash command prompts user via pi.ui.*".into(),
238                protocol_surface: "host_call(method=ui, op∈{select,input,confirm})".into(),
239                pass_criteria: "UI prompt dispatched and response routed back to handler".into(),
240                fail_criteria: "UI call denied in interactive mode or response lost".into(),
241            }),
242            HostCapability::Session => behaviors.push(ExpectedBehavior {
243                description: "Command accesses session state via pi.session.*".into(),
244                protocol_surface: "host_call(method=session)".into(),
245                pass_criteria: "Session data read/written correctly".into(),
246                fail_criteria: "Session call denied or data corrupted".into(),
247            }),
248            HostCapability::Exec => behaviors.push(ExpectedBehavior {
249                description: "Command executes shell commands".into(),
250                protocol_surface: "host_call(method=exec)".into(),
251                pass_criteria: "Execution succeeds with correct output".into(),
252                fail_criteria: "Execution denied or wrong output".into(),
253            }),
254            _ => {}
255        },
256        ExtensionCategory::Provider => match capability {
257            HostCapability::Http => behaviors.push(ExpectedBehavior {
258                description: "Provider streams LLM responses via pi.http()".into(),
259                protocol_surface: "host_call(method=http) + streamSimple streaming".into(),
260                pass_criteria: "HTTP request to LLM API succeeds; streaming chunks delivered"
261                    .into(),
262                fail_criteria: "HTTP denied, stream broken, or chunks lost".into(),
263            }),
264            HostCapability::Read => behaviors.push(ExpectedBehavior {
265                description: "Provider reads local config files".into(),
266                protocol_surface: "host_call(method=tool, name=read) or pi.fs.read".into(),
267                pass_criteria: "Config file read succeeds".into(),
268                fail_criteria: "Read denied or file not found".into(),
269            }),
270            HostCapability::Env => behaviors.push(ExpectedBehavior {
271                description: "Provider accesses API keys via process.env".into(),
272                protocol_surface: "process.env access (capability=env)".into(),
273                pass_criteria: "Environment variable accessible when env capability granted".into(),
274                fail_criteria: "Env access denied when capability should be granted".into(),
275            }),
276            _ => {}
277        },
278        ExtensionCategory::EventHook => match capability {
279            HostCapability::Session => behaviors.push(ExpectedBehavior {
280                description: "Event hook reads/modifies session on lifecycle events".into(),
281                protocol_surface: "event_hook dispatch + host_call(method=session)".into(),
282                pass_criteria: "Hook fires on correct event; session mutations applied".into(),
283                fail_criteria: "Hook not fired, wrong event, or session mutation lost".into(),
284            }),
285            HostCapability::Ui => behaviors.push(ExpectedBehavior {
286                description: "Event hook renders UI elements".into(),
287                protocol_surface: "event_hook dispatch + host_call(method=ui)".into(),
288                pass_criteria: "UI elements rendered after hook fires".into(),
289                fail_criteria: "UI call fails or hook not dispatched".into(),
290            }),
291            HostCapability::Exec => behaviors.push(ExpectedBehavior {
292                description: "Event hook executes commands on events".into(),
293                protocol_surface: "event_hook dispatch + host_call(method=exec)".into(),
294                pass_criteria: "Command execution triggered by event".into(),
295                fail_criteria: "Execution denied or event not dispatched".into(),
296            }),
297            HostCapability::Http => behaviors.push(ExpectedBehavior {
298                description: "Event hook makes HTTP requests on events".into(),
299                protocol_surface: "event_hook dispatch + host_call(method=http)".into(),
300                pass_criteria: "HTTP request sent when event fires".into(),
301                fail_criteria: "HTTP denied or event not dispatched".into(),
302            }),
303            _ => {}
304        },
305        ExtensionCategory::UiComponent => {
306            if matches!(capability, HostCapability::Ui) {
307                behaviors.push(ExpectedBehavior {
308                    description: "UI component registers message renderer".into(),
309                    protocol_surface: "registerMessageRenderer in register payload".into(),
310                    pass_criteria: "Renderer registered and callable".into(),
311                    fail_criteria: "Renderer not found in registration snapshot".into(),
312                });
313            }
314        }
315        ExtensionCategory::Configuration => match capability {
316            HostCapability::Ui => behaviors.push(ExpectedBehavior {
317                description: "Flag/shortcut activation triggers UI".into(),
318                protocol_surface: "register(flags/shortcuts) + host_call(method=ui)".into(),
319                pass_criteria: "Flag/shortcut registered; activation dispatches correctly".into(),
320                fail_criteria: "Registration missing or activation fails".into(),
321            }),
322            HostCapability::Session => behaviors.push(ExpectedBehavior {
323                description: "Flag modifies session configuration".into(),
324                protocol_surface: "register(flags) + host_call(method=session)".into(),
325                pass_criteria: "Flag value reflected in session state".into(),
326                fail_criteria: "Session state not updated after flag set".into(),
327            }),
328            _ => {}
329        },
330        ExtensionCategory::Multi => {
331            // Multi-category extensions: behaviors are the union of their constituent types.
332            // We add a cross-cutting behavior.
333            behaviors.push(ExpectedBehavior {
334                description: format!(
335                    "Multi-type extension uses {capability:?} across registrations"
336                ),
337                protocol_surface: format!(
338                    "Multiple register types + host_call using {capability:?}"
339                ),
340                pass_criteria: "All registration types load; capability dispatched correctly"
341                    .into(),
342                fail_criteria: "Any registration type fails or capability mismatch".into(),
343            });
344        }
345        ExtensionCategory::General => {
346            if matches!(capability, HostCapability::Session | HostCapability::Ui) {
347                behaviors.push(ExpectedBehavior {
348                    description: format!(
349                        "General extension uses {capability:?} via export default"
350                    ),
351                    protocol_surface: format!("export default + host_call(method={capability:?})"),
352                    pass_criteria: "Extension loads; hostcall dispatched and returns".into(),
353                    fail_criteria: "Load failure or hostcall error".into(),
354                });
355            }
356        }
357    }
358
359    // Universal registration behavior for all categories
360    if matches!(capability, HostCapability::Tool) && !matches!(category, ExtensionCategory::Tool) {
361        // Non-tool extensions that still call tools
362        behaviors.push(ExpectedBehavior {
363            description: "Extension calls non-core tool via pi.tool()".into(),
364            protocol_surface: "host_call(method=tool, name=<non-core>)".into(),
365            pass_criteria: "Tool capability check applied; prompt/deny in strict mode".into(),
366            fail_criteria: "Tool call bypasses capability check".into(),
367        });
368    }
369
370    behaviors
371}
372
373/// Determine whether a category × capability cell is required.
374#[must_use]
375const fn is_required_cell(category: &ExtensionCategory, capability: HostCapability) -> bool {
376    match category {
377        ExtensionCategory::Tool => matches!(
378            capability,
379            HostCapability::Read
380                | HostCapability::Write
381                | HostCapability::Exec
382                | HostCapability::Http
383        ),
384        ExtensionCategory::Command => {
385            matches!(capability, HostCapability::Ui | HostCapability::Session)
386        }
387        ExtensionCategory::Provider => {
388            matches!(capability, HostCapability::Http | HostCapability::Env)
389        }
390        ExtensionCategory::EventHook => matches!(
391            capability,
392            HostCapability::Session | HostCapability::Ui | HostCapability::Exec
393        ),
394        ExtensionCategory::UiComponent => matches!(capability, HostCapability::Ui),
395        ExtensionCategory::Configuration => {
396            matches!(capability, HostCapability::Ui | HostCapability::Session)
397        }
398        ExtensionCategory::Multi => true, // All cells required for multi-type
399        ExtensionCategory::General => {
400            matches!(capability, HostCapability::Session | HostCapability::Ui)
401        }
402    }
403}
404
405/// Build per-category pass/fail criteria.
406#[must_use]
407#[allow(clippy::too_many_lines)]
408fn build_category_criteria() -> Vec<CategoryCriteria> {
409    vec![
410        CategoryCriteria {
411            category: ExtensionCategory::Tool,
412            must_pass: vec![
413                "registerTool present in registration snapshot".into(),
414                "Tool definition includes name, description, and JSON Schema parameters".into(),
415                "tool_call dispatch reaches handler and returns tool_result".into(),
416                "Hostcalls use correct capability derivation (read/write/exec per tool name)"
417                    .into(),
418            ],
419            failure_conditions: vec![
420                "registerTool missing from snapshot".into(),
421                "Tool schema validation fails".into(),
422                "tool_call dispatch error or timeout".into(),
423                "Capability mismatch between declared and derived".into(),
424            ],
425            out_of_scope: vec![
426                "Tool output correctness beyond protocol conformance".into(),
427                "Performance benchmarks (covered by perf harness)".into(),
428            ],
429        },
430        CategoryCriteria {
431            category: ExtensionCategory::Command,
432            must_pass: vec![
433                "registerCommand/registerSlashCommand in registration snapshot".into(),
434                "Command definition includes name and description".into(),
435                "slash_command dispatch reaches handler and returns slash_result".into(),
436                "UI hostcalls (select/input/confirm) dispatch correctly".into(),
437            ],
438            failure_conditions: vec![
439                "Command missing from snapshot".into(),
440                "slash_command dispatch fails".into(),
441                "UI hostcall denied in interactive mode".into(),
442            ],
443            out_of_scope: vec!["Command business logic correctness".into()],
444        },
445        CategoryCriteria {
446            category: ExtensionCategory::Provider,
447            must_pass: vec![
448                "registerProvider in registration snapshot with model entries".into(),
449                "streamSimple callable and returns AsyncIterable<string>".into(),
450                "HTTP hostcalls dispatched with correct capability".into(),
451                "Stream cancellation propagates correctly".into(),
452            ],
453            failure_conditions: vec![
454                "Provider missing from snapshot".into(),
455                "streamSimple throws or hangs".into(),
456                "HTTP capability not derived correctly".into(),
457                "Cancellation does not terminate stream".into(),
458            ],
459            out_of_scope: vec![
460                "LLM response quality".into(),
461                "OAuth token refresh (separate test suite)".into(),
462            ],
463        },
464        CategoryCriteria {
465            category: ExtensionCategory::EventHook,
466            must_pass: vec![
467                "Event hooks registered for declared events".into(),
468                "Hook fires when event dispatched".into(),
469                "Hook can access session/UI/exec hostcalls as declared".into(),
470                "Hook errors do not crash the host".into(),
471            ],
472            failure_conditions: vec![
473                "Event hook not registered".into(),
474                "Hook does not fire on matching event".into(),
475                "Hostcall denied when capability is granted".into(),
476                "Hook error propagates as host crash".into(),
477            ],
478            out_of_scope: vec!["Hook side-effect correctness".into()],
479        },
480        CategoryCriteria {
481            category: ExtensionCategory::UiComponent,
482            must_pass: vec![
483                "registerMessageRenderer in registration snapshot".into(),
484                "Renderer callable with message content".into(),
485                "Rendered output is a valid string/markup".into(),
486            ],
487            failure_conditions: vec![
488                "Renderer missing from snapshot".into(),
489                "Renderer throws on valid input".into(),
490            ],
491            out_of_scope: vec!["Visual rendering correctness (requires UI testing)".into()],
492        },
493        CategoryCriteria {
494            category: ExtensionCategory::Configuration,
495            must_pass: vec![
496                "registerFlag/registerShortcut in registration snapshot".into(),
497                "Flag value readable after registration".into(),
498                "Shortcut activation dispatches correctly".into(),
499            ],
500            failure_conditions: vec![
501                "Flag/shortcut missing from snapshot".into(),
502                "Flag value not persisted".into(),
503                "Shortcut activation does not trigger handler".into(),
504            ],
505            out_of_scope: vec!["Configuration persistence across sessions".into()],
506        },
507        CategoryCriteria {
508            category: ExtensionCategory::Multi,
509            must_pass: vec![
510                "All declared registration types present in snapshot".into(),
511                "Each registration type independently functional".into(),
512                "Capabilities correctly derived for each registration type".into(),
513            ],
514            failure_conditions: vec![
515                "Any declared registration type missing".into(),
516                "Cross-type interaction causes error".into(),
517            ],
518            out_of_scope: vec!["Interaction semantics between registration types".into()],
519        },
520        CategoryCriteria {
521            category: ExtensionCategory::General,
522            must_pass: vec![
523                "Extension loads via export default without error".into(),
524                "Hostcalls dispatched correctly when used".into(),
525            ],
526            failure_conditions: vec![
527                "Load throws an error".into(),
528                "Hostcall denied when capability is granted".into(),
529            ],
530            out_of_scope: vec![
531                "Extensions with no hostcalls (load-only test is sufficient)".into(),
532            ],
533        },
534    ]
535}
536
537/// Determine capabilities for an extension based on its API matrix entry.
538#[must_use]
539fn capabilities_from_api_entry(entry: &ApiMatrixEntry) -> BTreeSet<HostCapability> {
540    let mut caps = BTreeSet::new();
541    for cap_str in &entry.capabilities_required {
542        if let Some(cap) = HostCapability::from_str_loose(cap_str) {
543            caps.insert(cap);
544        }
545    }
546    // Infer from hostcalls
547    for hc in &entry.hostcalls {
548        if hc.contains("http") {
549            caps.insert(HostCapability::Http);
550        }
551        if hc.contains("exec") {
552            caps.insert(HostCapability::Exec);
553        }
554        if hc.contains("session") {
555            caps.insert(HostCapability::Session);
556        }
557        if hc.contains("ui") {
558            caps.insert(HostCapability::Ui);
559        }
560        if hc.contains("events") {
561            caps.insert(HostCapability::Session);
562        }
563    }
564    // Infer from node APIs
565    for api in &entry.node_apis {
566        match api.as_str() {
567            "fs" | "path" => {
568                caps.insert(HostCapability::Read);
569            }
570            "child_process" | "process" => {
571                caps.insert(HostCapability::Exec);
572            }
573            "os" => {
574                caps.insert(HostCapability::Env);
575            }
576            // Pure computation or unknown — no capability needed
577            _ => {}
578        }
579    }
580    caps
581}
582
583/// Map an extension from the inclusion list to its category.
584///
585/// Uses the registration types from the API matrix if available, otherwise
586/// falls back to the inclusion list category.
587#[must_use]
588fn category_for_extension(
589    entry: &InclusionEntry,
590    api_entry: Option<&ApiMatrixEntry>,
591) -> ExtensionCategory {
592    if let Some(api) = api_entry {
593        if !api.registration_types.is_empty() {
594            return crate::extension_inclusion::classify_registrations(
595                &api.registration_types
596                    .iter()
597                    .map(|r| format!("register{}", capitalize_first(r)))
598                    .collect::<Vec<_>>(),
599            );
600        }
601    }
602    entry.category.clone()
603}
604
605fn capitalize_first(s: &str) -> String {
606    let mut c = s.chars();
607    c.next().map_or_else(String::new, |f| {
608        f.to_uppercase().collect::<String>() + c.as_str()
609    })
610}
611
612/// Build the full conformance test plan from inclusion list + API matrix.
613#[must_use]
614#[allow(clippy::too_many_lines)]
615pub fn build_test_plan(
616    inclusion: &InclusionList,
617    api_matrix: Option<&ApiMatrix>,
618    task_id: &str,
619) -> ConformanceTestPlan {
620    // Collect all included extensions (supports both v1 and v2 formats)
621    let all_entries: Vec<&InclusionEntry> = inclusion
622        .tier0
623        .iter()
624        .chain(inclusion.tier1.iter())
625        .chain(inclusion.tier1_review.iter())
626        .chain(inclusion.tier2.iter())
627        .collect();
628
629    // Build extension → category + capabilities map
630    let mut ext_map: BTreeMap<String, (ExtensionCategory, BTreeSet<HostCapability>)> =
631        BTreeMap::new();
632
633    for entry in &all_entries {
634        let api_entry = api_matrix.and_then(|m| m.extensions.get(&entry.id));
635        let cat = category_for_extension(entry, api_entry);
636        let caps = api_entry.map_or_else(BTreeSet::new, capabilities_from_api_entry);
637        ext_map.insert(entry.id.clone(), (cat, caps));
638    }
639
640    // Build the matrix: for each category × capability, collect exemplars
641    let categories = [
642        ExtensionCategory::Tool,
643        ExtensionCategory::Command,
644        ExtensionCategory::Provider,
645        ExtensionCategory::EventHook,
646        ExtensionCategory::UiComponent,
647        ExtensionCategory::Configuration,
648        ExtensionCategory::Multi,
649        ExtensionCategory::General,
650    ];
651
652    let mut matrix = Vec::new();
653    let mut fixture_assignments = Vec::new();
654
655    for category in &categories {
656        for capability in HostCapability::all() {
657            let behaviors = build_behaviors(category, *capability);
658            if behaviors.is_empty() {
659                continue;
660            }
661
662            let required = is_required_cell(category, *capability);
663
664            // Find exemplar extensions
665            let exemplars: Vec<String> = ext_map
666                .iter()
667                .filter(|(_, (cat, caps))| cat == category && caps.contains(capability))
668                .map(|(id, _)| id.clone())
669                .collect();
670
671            let cell_key = format!("{category:?}:{capability:?}");
672
673            let min_fixtures = if required { 2 } else { 1 };
674            let coverage_met = exemplars.len() >= min_fixtures;
675
676            matrix.push(ConformanceCell {
677                category: category.clone(),
678                capability: *capability,
679                required,
680                behaviors,
681                exemplar_extensions: exemplars.clone(),
682            });
683
684            fixture_assignments.push(FixtureAssignment {
685                cell_key,
686                fixture_extensions: exemplars,
687                min_fixtures,
688                coverage_met,
689            });
690        }
691    }
692
693    // Build coverage summary
694    let total_cells = matrix.len();
695    let required_cells = matrix.iter().filter(|c| c.required).count();
696    let covered_cells = fixture_assignments
697        .iter()
698        .filter(|a| a.coverage_met)
699        .count();
700    let uncovered_required_cells = fixture_assignments
701        .iter()
702        .filter(|a| {
703            !a.coverage_met
704                && matrix.iter().any(|c| {
705                    format!("{:?}:{:?}", c.category, c.capability) == a.cell_key && c.required
706                })
707        })
708        .count();
709    let total_exemplars: BTreeSet<&str> = ext_map.keys().map(String::as_str).collect();
710    let categories_covered: std::collections::HashSet<String> = ext_map
711        .values()
712        .map(|(cat, _)| format!("{cat:?}"))
713        .collect();
714    let capabilities_covered: BTreeSet<&HostCapability> =
715        ext_map.values().flat_map(|(_, caps)| caps.iter()).collect();
716
717    let coverage = CoverageSummary {
718        total_cells,
719        required_cells,
720        covered_cells,
721        uncovered_required_cells,
722        total_exemplar_extensions: total_exemplars.len(),
723        categories_covered: categories_covered.len(),
724        capabilities_covered: capabilities_covered.len(),
725    };
726
727    let category_criteria = build_category_criteria();
728
729    ConformanceTestPlan {
730        schema: "pi.ext.conformance-matrix.v1".to_string(),
731        generated_at: crate::extension_validation::chrono_now_iso(),
732        task: task_id.to_string(),
733        matrix,
734        fixture_assignments,
735        category_criteria,
736        coverage,
737    }
738}
739
740// ────────────────────────────────────────────────────────────────────────────
741// Tests
742// ────────────────────────────────────────────────────────────────────────────
743
744#[cfg(test)]
745mod tests {
746    use super::*;
747
748    #[test]
749    fn host_capability_from_str_all_variants() {
750        assert_eq!(
751            HostCapability::from_str_loose("read"),
752            Some(HostCapability::Read)
753        );
754        assert_eq!(
755            HostCapability::from_str_loose("WRITE"),
756            Some(HostCapability::Write)
757        );
758        assert_eq!(
759            HostCapability::from_str_loose("Exec"),
760            Some(HostCapability::Exec)
761        );
762        assert_eq!(
763            HostCapability::from_str_loose("http"),
764            Some(HostCapability::Http)
765        );
766        assert_eq!(
767            HostCapability::from_str_loose("session"),
768            Some(HostCapability::Session)
769        );
770        assert_eq!(
771            HostCapability::from_str_loose("ui"),
772            Some(HostCapability::Ui)
773        );
774        assert_eq!(HostCapability::from_str_loose("unknown"), None);
775    }
776
777    #[test]
778    fn build_behaviors_tool_read() {
779        let behaviors = build_behaviors(&ExtensionCategory::Tool, HostCapability::Read);
780        assert_eq!(behaviors.len(), 1);
781        assert!(behaviors[0].description.contains("reads files"));
782    }
783
784    #[test]
785    fn build_behaviors_provider_http() {
786        let behaviors = build_behaviors(&ExtensionCategory::Provider, HostCapability::Http);
787        assert_eq!(behaviors.len(), 1);
788        assert!(behaviors[0].description.contains("streams LLM"));
789    }
790
791    #[test]
792    fn build_behaviors_empty_for_irrelevant_cell() {
793        let behaviors = build_behaviors(&ExtensionCategory::UiComponent, HostCapability::Exec);
794        assert!(behaviors.is_empty());
795    }
796
797    #[test]
798    fn is_required_tool_read() {
799        assert!(is_required_cell(
800            &ExtensionCategory::Tool,
801            HostCapability::Read
802        ));
803    }
804
805    #[test]
806    fn is_required_provider_http() {
807        assert!(is_required_cell(
808            &ExtensionCategory::Provider,
809            HostCapability::Http
810        ));
811    }
812
813    #[test]
814    fn not_required_tool_session() {
815        assert!(!is_required_cell(
816            &ExtensionCategory::Tool,
817            HostCapability::Session
818        ));
819    }
820
821    #[test]
822    fn capabilities_from_api_entry_basic() {
823        let entry = ApiMatrixEntry {
824            registration_types: vec!["tool".into()],
825            hostcalls: vec!["pi.http()".into()],
826            capabilities_required: vec!["read".into(), "write".into()],
827            events_listened: vec![],
828            node_apis: vec!["fs".into()],
829            third_party_deps: vec![],
830        };
831        let caps = capabilities_from_api_entry(&entry);
832        assert!(caps.contains(&HostCapability::Read));
833        assert!(caps.contains(&HostCapability::Write));
834        assert!(caps.contains(&HostCapability::Http));
835    }
836
837    #[test]
838    fn category_criteria_all_categories_covered() {
839        let criteria = build_category_criteria();
840        assert_eq!(criteria.len(), 8); // All 8 categories
841        let cats: Vec<_> = criteria.iter().map(|c| &c.category).collect();
842        assert!(cats.contains(&&ExtensionCategory::Tool));
843        assert!(cats.contains(&&ExtensionCategory::Provider));
844        assert!(cats.contains(&&ExtensionCategory::General));
845    }
846
847    #[test]
848    fn build_test_plan_empty_inclusion() {
849        let inclusion = InclusionList {
850            schema: "pi.ext.inclusion.v1".into(),
851            generated_at: "2026-01-01T00:00:00Z".into(),
852            task: Some("test".into()),
853            stats: Some(crate::extension_inclusion::InclusionStats {
854                total_included: 0,
855                tier0_count: 0,
856                tier1_count: 0,
857                tier2_count: 0,
858                excluded_count: 0,
859                pinned_npm: 0,
860                pinned_git: 0,
861                pinned_url: 0,
862                pinned_checksum_only: 0,
863            }),
864            tier0: vec![],
865            tier1: vec![],
866            tier2: vec![],
867            exclusions: vec![],
868            category_coverage: std::collections::HashMap::new(),
869            summary: None,
870            tier1_review: vec![],
871            coverage: None,
872            exclusion_notes: vec![],
873        };
874
875        let plan = build_test_plan(&inclusion, None, "test-task");
876        assert_eq!(plan.schema, "pi.ext.conformance-matrix.v1");
877        assert!(!plan.matrix.is_empty()); // Cells defined even without extensions
878        assert_eq!(plan.coverage.total_exemplar_extensions, 0);
879    }
880
881    #[test]
882    fn capitalize_first_works() {
883        assert_eq!(capitalize_first("tool"), "Tool");
884        assert_eq!(capitalize_first(""), "");
885        assert_eq!(capitalize_first("a"), "A");
886    }
887
888    #[test]
889    fn host_capability_all_count() {
890        assert_eq!(HostCapability::all().len(), 9);
891    }
892
893    #[test]
894    fn serde_roundtrip_host_capability() {
895        let cap = HostCapability::Http;
896        let json = serde_json::to_string(&cap).unwrap();
897        assert_eq!(json, "\"http\"");
898        let back: HostCapability = serde_json::from_str(&json).unwrap();
899        assert_eq!(back, cap);
900    }
901
902    #[test]
903    fn serde_roundtrip_conformance_cell() {
904        let cell = ConformanceCell {
905            category: ExtensionCategory::Tool,
906            capability: HostCapability::Read,
907            required: true,
908            behaviors: vec![ExpectedBehavior {
909                description: "test".into(),
910                protocol_surface: "test".into(),
911                pass_criteria: "test".into(),
912                fail_criteria: "test".into(),
913            }],
914            exemplar_extensions: vec!["hello".into()],
915        };
916        let json = serde_json::to_string(&cell).unwrap();
917        let back: ConformanceCell = serde_json::from_str(&json).unwrap();
918        assert_eq!(back.category, ExtensionCategory::Tool);
919        assert!(back.required);
920    }
921
922    mod proptest_conformance_matrix {
923        use super::*;
924        use proptest::prelude::*;
925
926        const ALL_CAP_NAMES: &[&str] = &[
927            "read", "write", "exec", "http", "session", "ui", "log", "env", "tool",
928        ];
929
930        const fn category_from_index(index: usize) -> ExtensionCategory {
931            match index {
932                0 => ExtensionCategory::Tool,
933                1 => ExtensionCategory::Command,
934                2 => ExtensionCategory::Provider,
935                3 => ExtensionCategory::EventHook,
936                4 => ExtensionCategory::UiComponent,
937                5 => ExtensionCategory::Configuration,
938                6 => ExtensionCategory::Multi,
939                _ => ExtensionCategory::General,
940            }
941        }
942
943        fn mask_case(input: &str, upper_mask: &[bool]) -> String {
944            input
945                .chars()
946                .zip(upper_mask.iter().copied())
947                .map(
948                    |(ch, upper)| {
949                        if upper { ch.to_ascii_uppercase() } else { ch }
950                    },
951                )
952                .collect()
953        }
954
955        fn make_inclusion_entry(id: String, category: ExtensionCategory) -> InclusionEntry {
956            InclusionEntry {
957                id,
958                name: None,
959                tier: None,
960                score: None,
961                category,
962                registrations: Vec::new(),
963                version_pin: None,
964                sha256: None,
965                artifact_path: None,
966                license: None,
967                source_tier: None,
968                rationale: None,
969                directory: None,
970                provenance: None,
971                capabilities: None,
972                risk_level: None,
973                inclusion_rationale: None,
974            }
975        }
976
977        fn build_synthetic_plan(
978            specs: &[(usize, Vec<usize>)],
979            reverse_tier_order: bool,
980        ) -> ConformanceTestPlan {
981            let mut tier0 = specs
982                .iter()
983                .enumerate()
984                .map(|(idx, (cat_idx, _))| {
985                    make_inclusion_entry(format!("ext-{idx}"), category_from_index(*cat_idx))
986                })
987                .collect::<Vec<_>>();
988
989            if reverse_tier_order {
990                tier0.reverse();
991            }
992
993            let inclusion = InclusionList {
994                schema: "pi.ext.inclusion.v1".to_string(),
995                generated_at: "2026-01-01T00:00:00Z".to_string(),
996                task: Some("prop-generated".to_string()),
997                stats: None,
998                tier0,
999                tier1: Vec::new(),
1000                tier2: Vec::new(),
1001                exclusions: Vec::new(),
1002                category_coverage: std::collections::HashMap::new(),
1003                summary: None,
1004                tier1_review: Vec::new(),
1005                coverage: None,
1006                exclusion_notes: Vec::new(),
1007            };
1008
1009            let extensions = specs
1010                .iter()
1011                .enumerate()
1012                .map(|(idx, (_, cap_indices))| {
1013                    let id = format!("ext-{idx}");
1014                    let entry = ApiMatrixEntry {
1015                        registration_types: Vec::new(),
1016                        hostcalls: Vec::new(),
1017                        capabilities_required: cap_indices
1018                            .iter()
1019                            .map(|cap_idx| ALL_CAP_NAMES[*cap_idx].to_string())
1020                            .collect(),
1021                        events_listened: Vec::new(),
1022                        node_apis: Vec::new(),
1023                        third_party_deps: Vec::new(),
1024                    };
1025                    (id, entry)
1026                })
1027                .collect::<std::collections::HashMap<_, _>>();
1028
1029            let api_matrix = ApiMatrix {
1030                schema: "pi.ext.api-matrix.v1".to_string(),
1031                extensions,
1032            };
1033
1034            build_test_plan(&inclusion, Some(&api_matrix), "prop-generated")
1035        }
1036
1037        proptest! {
1038            /// `from_str_loose` is case-insensitive for valid names.
1039            #[test]
1040            fn from_str_loose_case_insensitive(idx in 0..ALL_CAP_NAMES.len()) {
1041                let name = ALL_CAP_NAMES[idx];
1042                let lower = HostCapability::from_str_loose(name);
1043                let upper = HostCapability::from_str_loose(&name.to_uppercase());
1044                let mixed = HostCapability::from_str_loose(&capitalize_first(name));
1045                assert_eq!(lower, upper);
1046                assert_eq!(lower, mixed);
1047                assert!(lower.is_some());
1048            }
1049
1050            /// Arbitrary mixed-case variants still parse identically.
1051            #[test]
1052            fn from_str_loose_arbitrary_case_masks(
1053                idx in 0..ALL_CAP_NAMES.len(),
1054                upper_mask in prop::collection::vec(any::<bool>(), 0..64usize),
1055            ) {
1056                let canonical = ALL_CAP_NAMES[idx];
1057                let mut effective_mask = upper_mask;
1058                effective_mask.resize(canonical.len(), false);
1059                effective_mask.truncate(canonical.len());
1060                let variant = mask_case(canonical, &effective_mask);
1061
1062                assert_eq!(
1063                    HostCapability::from_str_loose(canonical),
1064                    HostCapability::from_str_loose(&variant)
1065                );
1066            }
1067
1068            /// `from_str_loose` returns None for unknown strings.
1069            #[test]
1070            fn from_str_loose_unknown(s in "[a-z]{10,20}") {
1071                if !ALL_CAP_NAMES.contains(&s.as_str()) {
1072                    assert!(HostCapability::from_str_loose(&s).is_none());
1073                }
1074            }
1075
1076            /// `all()` always returns exactly 9 capabilities.
1077            #[test]
1078            fn all_count(_dummy in 0..1u8) {
1079                assert_eq!(HostCapability::all().len(), 9);
1080            }
1081
1082            /// `HostCapability` serde roundtrip for all variants.
1083            #[test]
1084            fn capability_serde_roundtrip(idx in 0..9usize) {
1085                let cap = HostCapability::all()[idx];
1086                let json = serde_json::to_string(&cap).unwrap();
1087                let back: HostCapability = serde_json::from_str(&json).unwrap();
1088                assert_eq!(cap, back);
1089            }
1090
1091            /// `is_required_cell` — Multi category requires all capabilities.
1092            #[test]
1093            fn multi_requires_all(idx in 0..9usize) {
1094                let cap = HostCapability::all()[idx];
1095                assert!(is_required_cell(&ExtensionCategory::Multi, cap));
1096            }
1097
1098            /// `is_required_cell` is deterministic.
1099            #[test]
1100            fn required_cell_deterministic(cat_idx in 0..8usize, cap_idx in 0..9usize) {
1101                let cats = [
1102                    ExtensionCategory::Tool,
1103                    ExtensionCategory::Command,
1104                    ExtensionCategory::Provider,
1105                    ExtensionCategory::EventHook,
1106                    ExtensionCategory::UiComponent,
1107                    ExtensionCategory::Configuration,
1108                    ExtensionCategory::Multi,
1109                    ExtensionCategory::General,
1110                ];
1111                let cap = HostCapability::all()[cap_idx];
1112                let first = is_required_cell(&cats[cat_idx], cap);
1113                let second = is_required_cell(&cats[cat_idx], cap);
1114                assert_eq!(first, second);
1115            }
1116
1117            /// `capitalize_first` on empty string returns empty.
1118            #[test]
1119            fn capitalize_first_empty(_dummy in 0..1u8) {
1120                assert_eq!(capitalize_first(""), "");
1121            }
1122
1123            /// `capitalize_first` capitalizes first char.
1124            #[test]
1125            fn capitalize_first_works(s in "[a-z]{1,20}") {
1126                let result = capitalize_first(&s);
1127                let first = result.chars().next().unwrap();
1128                assert!(first.is_uppercase());
1129                assert_eq!(&result[first.len_utf8()..], &s[1..]);
1130            }
1131
1132            /// `capitalize_first` is idempotent on already-capitalized.
1133            #[test]
1134            fn capitalize_first_idempotent(s in "[A-Z][a-z]{0,15}") {
1135                assert_eq!(capitalize_first(&s), s);
1136            }
1137
1138            /// `build_behaviors` never panics for any category/capability combo.
1139            #[test]
1140            fn build_behaviors_never_panics(cat_idx in 0..8usize, cap_idx in 0..9usize) {
1141                let cats = [
1142                    ExtensionCategory::Tool,
1143                    ExtensionCategory::Command,
1144                    ExtensionCategory::Provider,
1145                    ExtensionCategory::EventHook,
1146                    ExtensionCategory::UiComponent,
1147                    ExtensionCategory::Configuration,
1148                    ExtensionCategory::Multi,
1149                    ExtensionCategory::General,
1150                ];
1151                let cap = HostCapability::all()[cap_idx];
1152                let behaviors = build_behaviors(&cats[cat_idx], cap);
1153                // All behaviors should have non-empty fields
1154                for b in &behaviors {
1155                    assert!(!b.description.is_empty());
1156                    assert!(!b.protocol_surface.is_empty());
1157                    assert!(!b.pass_criteria.is_empty());
1158                    assert!(!b.fail_criteria.is_empty());
1159                }
1160            }
1161
1162            /// `build_test_plan` maintains internal matrix/coverage consistency.
1163            #[test]
1164            fn build_test_plan_coverage_invariants(task_id in "[a-z0-9_-]{1,32}") {
1165                let inclusion = InclusionList {
1166                    schema: "pi.ext.inclusion.v1".to_string(),
1167                    generated_at: "2026-01-01T00:00:00Z".to_string(),
1168                    task: Some(task_id.clone()),
1169                    stats: None,
1170                    tier0: Vec::new(),
1171                    tier1: Vec::new(),
1172                    tier2: Vec::new(),
1173                    exclusions: Vec::new(),
1174                    category_coverage: std::collections::HashMap::new(),
1175                    summary: None,
1176                    tier1_review: Vec::new(),
1177                    coverage: None,
1178                    exclusion_notes: Vec::new(),
1179                };
1180
1181                let plan = build_test_plan(&inclusion, None, &task_id);
1182                assert_eq!(plan.task, task_id);
1183                assert_eq!(plan.coverage.total_cells, plan.matrix.len());
1184                assert_eq!(plan.fixture_assignments.len(), plan.matrix.len());
1185                assert!(plan.coverage.required_cells <= plan.coverage.total_cells);
1186                assert!(plan.coverage.covered_cells <= plan.coverage.total_cells);
1187                assert!(plan.coverage.uncovered_required_cells <= plan.coverage.required_cells);
1188
1189                for assignment in &plan.fixture_assignments {
1190                    let matches = plan
1191                        .matrix
1192                        .iter()
1193                        .filter(|cell| format!("{:?}:{:?}", cell.category, cell.capability) == assignment.cell_key)
1194                        .count();
1195                    assert_eq!(matches, 1);
1196                }
1197            }
1198
1199            /// Fixture assignment coverage thresholds always match required-ness.
1200            #[test]
1201            fn build_test_plan_fixture_thresholds_align_with_required_cells(
1202                specs in prop::collection::vec(
1203                    (
1204                        0usize..8usize,
1205                        prop::collection::vec(0usize..ALL_CAP_NAMES.len(), 0..12usize),
1206                    ),
1207                    0..24usize
1208                )
1209            ) {
1210                let plan = build_synthetic_plan(&specs, false);
1211                let required_by_key = plan
1212                    .matrix
1213                    .iter()
1214                    .map(|cell| {
1215                        (
1216                            format!("{:?}:{:?}", cell.category, cell.capability),
1217                            cell.required,
1218                        )
1219                    })
1220                    .collect::<std::collections::BTreeMap<_, _>>();
1221
1222                for assignment in &plan.fixture_assignments {
1223                    let required = required_by_key.get(&assignment.cell_key);
1224                    prop_assert!(required.is_some());
1225                    let min_expected = if *required.expect("present") { 2 } else { 1 };
1226                    prop_assert_eq!(assignment.min_fixtures, min_expected);
1227                    prop_assert_eq!(
1228                        assignment.coverage_met,
1229                        assignment.fixture_extensions.len() >= assignment.min_fixtures
1230                    );
1231                }
1232
1233                let uncovered_required = plan
1234                    .fixture_assignments
1235                    .iter()
1236                    .filter(|assignment| {
1237                        !assignment.coverage_met
1238                            && required_by_key
1239                                .get(&assignment.cell_key)
1240                                .is_some_and(|required| *required)
1241                    })
1242                    .count();
1243                prop_assert_eq!(plan.coverage.uncovered_required_cells, uncovered_required);
1244            }
1245
1246            /// Matrix and fixture shape should be deterministic regardless of tier ordering.
1247            #[test]
1248            fn build_test_plan_shape_is_stable_under_tier_reordering(
1249                specs in prop::collection::vec(
1250                    (
1251                        0usize..8usize,
1252                        prop::collection::vec(0usize..ALL_CAP_NAMES.len(), 0..12usize),
1253                    ),
1254                    0..24usize
1255                )
1256            ) {
1257                let forward = build_synthetic_plan(&specs, false);
1258                let reversed = build_synthetic_plan(&specs, true);
1259
1260                let forward_matrix = serde_json::to_string(&forward.matrix).expect("serialize matrix");
1261                let reversed_matrix = serde_json::to_string(&reversed.matrix).expect("serialize matrix");
1262                prop_assert_eq!(forward_matrix, reversed_matrix);
1263
1264                let forward_assignments =
1265                    serde_json::to_string(&forward.fixture_assignments).expect("serialize assignments");
1266                let reversed_assignments =
1267                    serde_json::to_string(&reversed.fixture_assignments).expect("serialize assignments");
1268                prop_assert_eq!(forward_assignments, reversed_assignments);
1269
1270                let forward_coverage =
1271                    serde_json::to_string(&forward.coverage).expect("serialize coverage");
1272                let reversed_coverage =
1273                    serde_json::to_string(&reversed.coverage).expect("serialize coverage");
1274                prop_assert_eq!(forward_coverage, reversed_coverage);
1275            }
1276
1277            /// Declared capability names always map into the computed capability set.
1278            #[test]
1279            fn capabilities_from_api_entry_includes_declared_valid_capabilities(
1280                cap_indices in proptest::collection::vec(0usize..ALL_CAP_NAMES.len(), 0..24usize)
1281            ) {
1282                let declared = cap_indices
1283                    .iter()
1284                    .map(|idx| ALL_CAP_NAMES[*idx].to_string())
1285                    .collect::<Vec<_>>();
1286                let entry = ApiMatrixEntry {
1287                    registration_types: vec!["tool".to_string()],
1288                    hostcalls: Vec::new(),
1289                    capabilities_required: declared.clone(),
1290                    events_listened: Vec::new(),
1291                    node_apis: Vec::new(),
1292                    third_party_deps: Vec::new(),
1293                };
1294                let computed = capabilities_from_api_entry(&entry);
1295                for cap in declared {
1296                    let parsed = HostCapability::from_str_loose(&cap).expect("declared capability must parse");
1297                    assert!(computed.contains(&parsed));
1298                }
1299            }
1300        }
1301    }
1302}