Skip to main content

sim_lib_standard_core/
matrix.rs

1//! Shared language conformance matrix data structures.
2
3use indexmap::IndexMap;
4use sim_kernel::{Cx, Error, Expr, Result, Symbol, Value};
5
6use crate::{
7    ConformanceOutcome, LanguageProfile, matrix_claims::publish_matrix_cell_claim,
8    standard_test_capability,
9};
10
11/// Expected outcome for a source-level conformance case.
12#[derive(Clone, Debug, PartialEq, Eq)]
13pub enum SourceExpectation {
14    /// The source lowers to the described shared expression form.
15    LowersTo(String),
16    /// The source is an explicit known gap with a machine-readable code.
17    ExpectedGap {
18        /// Gap code.
19        code: Symbol,
20        /// Human-readable reason.
21        reason: String,
22    },
23}
24
25/// Observation produced by a language-specific source-case runner.
26#[derive(Clone, Debug, PartialEq, Eq)]
27pub enum SourceObservation {
28    /// Source lowered to the displayed shared form.
29    LowersTo(String),
30    /// Source is a declared gap with a machine-readable code and reason.
31    Gap {
32        /// Gap code.
33        code: Symbol,
34        /// Human-readable reason.
35        reason: String,
36    },
37}
38
39/// One source-language conformance case.
40#[derive(Clone, Debug, PartialEq, Eq)]
41pub struct SourceConformanceCase {
42    /// Stable symbol identifying this case.
43    pub symbol: Symbol,
44    /// Organ exercised by this case.
45    pub organ: Symbol,
46    /// Source filename or display name.
47    pub source_name: String,
48    /// Source text.
49    pub source: String,
50    /// Expected result.
51    pub expectation: SourceExpectation,
52    /// Fidelity badge affected by this case, if any.
53    pub affects_badge: Option<Symbol>,
54}
55
56/// Codec-faithful source case that decodes to the shared `Expr` graph.
57///
58/// The case records source text plus the canonical display expected from the
59/// decoded expression. A missing expected display means successful decoding is
60/// enough; a language-specific decoder returns `Ok(None)` for an explicit gap.
61#[derive(Clone, Debug, PartialEq, Eq)]
62pub struct ExprRoundTripCase {
63    /// Stable symbol identifying this case.
64    pub symbol: Symbol,
65    /// Language exercised by this case.
66    pub language: Symbol,
67    /// Source text.
68    pub source: String,
69    /// Expected canonical display of the decoded expression.
70    pub expected_display: Option<String>,
71    /// Fidelity badge affected by this case, if any.
72    pub affects_badge: Option<Symbol>,
73}
74
75/// Observation produced by running an [`ExprRoundTripCase`].
76#[derive(Clone, Debug, PartialEq, Eq)]
77pub enum ExprRoundTripObservation {
78    /// Decoded and matched the expected display, or no display was required.
79    RoundTripped(String),
80    /// Decoded but did not match the expected display.
81    Mismatch {
82        /// Expected expression display.
83        expected: String,
84        /// Actual expression display.
85        got: String,
86    },
87    /// Codec returned a diagnostic code.
88    Diagnostic(Symbol),
89    /// Known gap; decode was not attempted.
90    Gap(Symbol),
91}
92
93impl ExprRoundTripCase {
94    /// Runs this case using `decode_fn` to decode source into an expression.
95    pub fn run_expr_round_trip(
96        &self,
97        cx: &mut Cx,
98        decode_fn: impl Fn(&mut Cx, &str) -> Result<Option<Expr>>,
99    ) -> ExprRoundTripObservation {
100        match decode_fn(cx, &self.source) {
101            Err(err) => ExprRoundTripObservation::Diagnostic(Symbol::qualified(
102                "codec",
103                diagnostic_slug(&err),
104            )),
105            Ok(None) => ExprRoundTripObservation::Gap(Symbol::qualified("codec", "declared-gap")),
106            Ok(Some(expr)) => {
107                let got = expr_display(&expr);
108                match &self.expected_display {
109                    None => ExprRoundTripObservation::RoundTripped(got),
110                    Some(expected) if expected == &got => {
111                        ExprRoundTripObservation::RoundTripped(got)
112                    }
113                    Some(expected) => ExprRoundTripObservation::Mismatch {
114                        expected: expected.clone(),
115                        got,
116                    },
117                }
118            }
119        }
120    }
121
122    /// Runs this case using `decode_fn` to decode source into an expression.
123    pub fn run(
124        &self,
125        cx: &mut Cx,
126        decode_fn: impl Fn(&mut Cx, &str) -> Result<Option<Expr>>,
127    ) -> ExprRoundTripObservation {
128        self.run_expr_round_trip(cx, decode_fn)
129    }
130}
131
132/// A single language surface registered in the shared conformance matrix.
133///
134/// The row contains current conformance evidence for one language profile. Each
135/// row uses a stable language symbol, owns the profile metadata for that row,
136/// and carries only explicit source or expression cases. An empty row is a
137/// declared language entry without scored evidence.
138#[derive(Clone, Debug, PartialEq, Eq)]
139pub struct LanguageRow {
140    /// Language symbol, for example `scheme` or `lua`.
141    pub language: Symbol,
142    /// Profile supplied by the language crate.
143    pub profile: LanguageProfile,
144    /// Source cases registered for this language.
145    pub cases: Vec<SourceConformanceCase>,
146    /// Expression round-trip cases registered for this language.
147    pub expr_cases: Vec<ExprRoundTripCase>,
148}
149
150impl LanguageRow {
151    /// Declares a language row with no source cases.
152    pub fn declared_empty(language: Symbol, profile: LanguageProfile) -> Self {
153        Self {
154            language,
155            profile,
156            cases: Vec::new(),
157            expr_cases: Vec::new(),
158        }
159    }
160
161    /// Returns whether this row currently has no cases.
162    pub fn is_empty(&self) -> bool {
163        self.cases.is_empty() && self.expr_cases.is_empty()
164    }
165
166    /// Replaces expression round-trip cases for this row.
167    pub fn with_expr_cases(mut self, expr_cases: Vec<ExprRoundTripCase>) -> Self {
168        self.expr_cases = expr_cases;
169        self
170    }
171}
172
173/// Builder for [`LanguageRow`] values.
174#[derive(Clone, Debug)]
175pub struct LanguageRowBuilder {
176    language: Symbol,
177    profile: LanguageProfile,
178    cases: Vec<SourceConformanceCase>,
179    expr_cases: Vec<ExprRoundTripCase>,
180}
181
182impl LanguageRowBuilder {
183    /// Starts a row builder for `language` and `profile`.
184    pub fn new(language: Symbol, profile: LanguageProfile) -> Self {
185        Self {
186            language,
187            profile,
188            cases: Vec::new(),
189            expr_cases: Vec::new(),
190        }
191    }
192
193    /// Appends one source case.
194    pub fn with_case(mut self, case: SourceConformanceCase) -> Self {
195        self.cases.push(case);
196        self
197    }
198
199    /// Appends source cases from an iterator.
200    pub fn with_cases<I>(mut self, cases: I) -> Self
201    where
202        I: IntoIterator<Item = SourceConformanceCase>,
203    {
204        self.cases.extend(cases);
205        self
206    }
207
208    /// Appends expression round-trip cases from an iterator.
209    pub fn with_expr_cases<I>(mut self, cases: I) -> Self
210    where
211        I: IntoIterator<Item = ExprRoundTripCase>,
212    {
213        self.expr_cases.extend(cases);
214        self
215    }
216
217    /// Builds the row.
218    pub fn build(self) -> LanguageRow {
219        LanguageRow {
220            language: self.language,
221            profile: self.profile,
222            cases: self.cases,
223            expr_cases: self.expr_cases,
224        }
225    }
226}
227
228/// Outcome for a single language/case cell in a matrix run.
229#[derive(Clone, Debug, PartialEq, Eq)]
230pub struct MatrixCellResult {
231    /// Language symbol for this row.
232    pub language: Symbol,
233    /// Profile symbol for this row.
234    pub profile: Symbol,
235    /// Organ exercised by this case.
236    pub organ: Symbol,
237    /// Stable case symbol.
238    pub case_symbol: Symbol,
239    /// Compared conformance outcome.
240    pub outcome: ConformanceOutcome,
241}
242
243/// Accumulated results for one matrix run.
244///
245/// The report is evidence produced by a runner invocation. Gaps remain visible
246/// as cells, while fidelity counts only pass and fail cells so declared gaps do
247/// not inflate or reduce the score.
248#[derive(Clone, Debug, PartialEq, Eq)]
249pub struct MatrixRunReport {
250    /// Matrix cells produced by the run.
251    pub cells: Vec<MatrixCellResult>,
252}
253
254impl MatrixRunReport {
255    /// Number of passing cells.
256    pub fn pass_count(&self) -> usize {
257        self.cells
258            .iter()
259            .filter(|cell| cell.outcome.is_pass())
260            .count()
261    }
262
263    /// Number of declared gap cells.
264    pub fn gap_count(&self) -> usize {
265        self.cells
266            .iter()
267            .filter(|cell| cell.outcome.is_gap())
268            .count()
269    }
270
271    /// Number of failing cells.
272    pub fn fail_count(&self) -> usize {
273        self.cells
274            .iter()
275            .filter(|cell| cell.outcome.is_fail())
276            .count()
277    }
278
279    /// Fidelity for one language: passes divided by passes plus failures,
280    /// ignoring declared gaps. Returns `None` when no pass-or-fail cells exist.
281    pub fn language_fidelity(&self, language: &Symbol) -> Option<f32> {
282        let pass = self
283            .cells
284            .iter()
285            .filter(|cell| &cell.language == language && cell.outcome.is_pass())
286            .count();
287        let fail = self
288            .cells
289            .iter()
290            .filter(|cell| &cell.language == language && cell.outcome.is_fail())
291            .count();
292        if pass + fail == 0 {
293            None
294        } else {
295            Some(pass as f32 / (pass + fail) as f32)
296        }
297    }
298
299    /// Produces Card fields for one language's browseable conformance surface.
300    ///
301    /// These fields answer how much of a language profile is backed by current
302    /// matrix evidence for agents and humans browsing the Card.
303    pub fn conformance_card_fields(
304        &self,
305        cx: &mut Cx,
306        language: &Symbol,
307    ) -> Result<Vec<(Symbol, Value)>> {
308        let pass = self.language_outcome_count(language, ConformanceOutcome::is_pass);
309        let gap = self.language_outcome_count(language, ConformanceOutcome::is_gap);
310        let fail = self.language_outcome_count(language, ConformanceOutcome::is_fail);
311        let fidelity = self
312            .language_fidelity(language)
313            .map(|value| format!("{:.0}%", value * 100.0))
314            .unwrap_or_else(|| "unscored".to_owned());
315        conformance_card_fields(cx, pass, gap, fail, fidelity)
316    }
317
318    /// Produces zero-count conformance Card fields with unscored fidelity.
319    pub fn unscored_conformance_card_fields(cx: &mut Cx) -> Result<Vec<(Symbol, Value)>> {
320        conformance_card_fields(cx, 0, 0, 0, "unscored".to_owned())
321    }
322
323    /// Writes one evidence claim per cell into the claim store.
324    pub fn publish_claims(&self, cx: &mut Cx) -> Result<()> {
325        cx.require(&standard_test_capability())?;
326        for cell in &self.cells {
327            publish_matrix_cell_claim(cx, cell)?;
328        }
329        Ok(())
330    }
331
332    fn language_outcome_count(
333        &self,
334        language: &Symbol,
335        matches: impl Fn(&ConformanceOutcome) -> bool,
336    ) -> usize {
337        self.cells
338            .iter()
339            .filter(|cell| &cell.language == language && matches(&cell.outcome))
340            .count()
341    }
342}
343
344/// Runs language rows through caller-supplied source-case runners.
345///
346/// The runner compares the row's expected source outcomes with observations
347/// from the caller. It does not depend on a concrete language codec; each
348/// language crate supplies its own execution closure and publishes the report
349/// when evidence claims are needed.
350pub struct MatrixRunner;
351
352impl MatrixRunner {
353    /// Runs a single language row, using `run_case` to execute each source case.
354    pub fn run_row<F>(cx: &mut Cx, row: &LanguageRow, run_case: F) -> MatrixRunReport
355    where
356        F: Fn(&mut Cx, &SourceConformanceCase) -> Result<SourceObservation>,
357    {
358        let mut cells = Vec::with_capacity(row.cases.len());
359        for case in &row.cases {
360            let outcome = match run_case(cx, case) {
361                Ok(observation) => compare_source_observation(case, observation),
362                Err(err) => ConformanceOutcome::fail_with(err.to_string()),
363            };
364            cells.push(MatrixCellResult {
365                language: row.language.clone(),
366                profile: row.profile.symbol.clone(),
367                organ: case.organ.clone(),
368                case_symbol: case.symbol.clone(),
369                outcome,
370            });
371        }
372        MatrixRunReport { cells }
373    }
374}
375
376/// Compares a source observation against its expected result.
377pub fn compare_source_observation(
378    case: &SourceConformanceCase,
379    observation: SourceObservation,
380) -> ConformanceOutcome {
381    match (&case.expectation, observation) {
382        (SourceExpectation::LowersTo(expected), SourceObservation::LowersTo(got)) => {
383            if expected == &got {
384                ConformanceOutcome::pass()
385            } else {
386                ConformanceOutcome::fail(format!("expected {expected}, got {got}"))
387            }
388        }
389        (
390            SourceExpectation::ExpectedGap { code, reason },
391            SourceObservation::Gap {
392                code: got,
393                reason: got_reason,
394            },
395        ) => {
396            if code == &got {
397                ConformanceOutcome::gap(reason.clone())
398            } else {
399                ConformanceOutcome::fail(format!(
400                    "expected gap {code}, got gap {got}: {got_reason}"
401                ))
402            }
403        }
404        (SourceExpectation::ExpectedGap { code, .. }, SourceObservation::LowersTo(got)) => {
405            ConformanceOutcome::fail(format!("expected gap {code}, got {got}"))
406        }
407        (SourceExpectation::LowersTo(expected), SourceObservation::Gap { code, reason }) => {
408            ConformanceOutcome::fail(format!("expected {expected}, got gap {code}: {reason}"))
409        }
410    }
411}
412
413/// Shared conformance matrix keyed by language symbol.
414///
415/// Rows preserve registration order and are unique by language symbol. The
416/// matrix owns row metadata and case definitions only; execution lives in
417/// [`MatrixRunner`] and language-specific runners.
418#[derive(Default)]
419pub struct ConformanceMatrix {
420    rows: IndexMap<Symbol, LanguageRow>,
421}
422
423impl ConformanceMatrix {
424    /// Creates an empty matrix.
425    pub fn new() -> Self {
426        Self::default()
427    }
428
429    /// Registers a language row.
430    ///
431    /// # Panics
432    ///
433    /// Panics when the language symbol is already registered.
434    pub fn register(&mut self, row: LanguageRow) {
435        let language = row.language.clone();
436        assert!(
437            self.rows.insert(language.clone(), row).is_none(),
438            "language already registered in matrix: {language}",
439        );
440    }
441
442    /// Number of registered languages.
443    pub fn language_count(&self) -> usize {
444        self.rows.len()
445    }
446
447    /// Returns the row for `language`, if registered.
448    pub fn row(&self, language: &Symbol) -> Option<&LanguageRow> {
449        self.rows.get(language)
450    }
451
452    /// Iterates rows in registration order.
453    pub fn iter_rows(&self) -> impl Iterator<Item = &LanguageRow> {
454        self.rows.values()
455    }
456
457    /// Total source cases across all registered languages.
458    pub fn total_cases(&self) -> usize {
459        self.rows.values().map(|row| row.cases.len()).sum()
460    }
461
462    /// Total expression round-trip cases across all registered languages.
463    pub fn total_expr_cases(&self) -> usize {
464        self.rows.values().map(|row| row.expr_cases.len()).sum()
465    }
466}
467
468fn expr_display(expr: &Expr) -> String {
469    format!("Expr::{expr:?}")
470}
471
472fn diagnostic_slug(err: &Error) -> &'static str {
473    if err.to_string().to_ascii_lowercase().contains("unsupported") {
474        "unsupported"
475    } else {
476        "error"
477    }
478}
479
480fn conformance_card_fields(
481    cx: &mut Cx,
482    pass: usize,
483    gap: usize,
484    fail: usize,
485    fidelity: String,
486) -> Result<Vec<(Symbol, Value)>> {
487    Ok(vec![
488        (conformance_field("pass"), count_value(cx, pass)?),
489        (conformance_field("gap"), count_value(cx, gap)?),
490        (conformance_field("fail"), count_value(cx, fail)?),
491        (
492            conformance_field("fidelity"),
493            cx.factory().string(fidelity)?,
494        ),
495    ])
496}
497
498fn conformance_field(name: &str) -> Symbol {
499    Symbol::new(format!("conformance.{name}"))
500}
501
502fn count_value(cx: &mut Cx, count: usize) -> Result<Value> {
503    cx.factory()
504        .number_literal(Symbol::qualified("numbers", "u64"), count.to_string())
505}