Function compare_languages

Source

pub fn compare_languages(a: Language, b: Language) -> LanguageComparison

Expand description

Compare language a against baseline b across all four axes.

Examples found in repository ?

examples/swe_languages.rs (line 80)

25fn main() {
26    println!("=== Agentic-SWE language comparison ===\n");
27
28    // ── 1. Canonical agentic fitness (unweighted mean) ────────────────────────
29    println!("CANONICAL agentic fitness (unweighted mean of the four axes)");
30    println!(
31        "{:<12} {:>7}   {:>6} {:>6} {:>6} {:>6}",
32        "language", "fitness", "token", "determ", "reliab", "safety"
33    );
34    for p in rank_languages() {
35        let tag = match p.language {
36            Language::Ideal => "  ← design-target ceiling (not a real language)",
37            Language::MechGen => "  ← this project's language (bias-audited)",
38            _ => "",
39        };
40        println!(
41            "{:<12} {:>7.3}   {:>6.2} {:>6.2} {:>6.2} {:>6.2}{}",
42            p.language.name(),
43            p.fitness(),
44            p.token_efficiency,
45            p.determinism,
46            p.reliability,
47            p.safety,
48            tag,
49        );
50    }
51
52    // ── 2. SWE-weighted ranking ───────────────────────────────────────────────
53    println!("\nSWE-WEIGHTED ranking (reliability .35, determinism .30, safety .20, token .15)");
54    let mut ranked: Vec<LanguageProfile> = Language::all().iter().map(|&l| profile(l)).collect();
55    ranked.sort_by(|a, b| swe_score(b).partial_cmp(&swe_score(a)).unwrap());
56    println!("{:<12} {:>9}   vs canonical", "language", "swe-score");
57    for p in &ranked {
58        let delta = swe_score(p) - p.fitness();
59        let note = if p.language == Language::Ideal {
60            "  (design target)"
61        } else {
62            ""
63        };
64        println!(
65            "{:<12} {:>9.3}   {:+.3}{}",
66            p.language.name(),
67            swe_score(p),
68            delta,
69            note,
70        );
71    }
72    println!(
73        "  (the SWE lens lifts strongly-typed/reproducible languages — Rust, MechGen, Go —\n   \
74         and demotes terse-but-unsafe ones — Python, Bash — vs the unweighted mean.)"
75    );
76
77    // ── 3. Head-to-head: MechGen vs the popular real languages ─────────────────
78    println!("\nHEAD-TO-HEAD (positive = MechGen fits agentic SWE better)");
79    for other in [Language::Rust, Language::Python, Language::Go, Language::TypeScript] {
80        let c = compare_languages(Language::MechGen, other);
81        print!("{c}");
82    }
83
84    // ── 4. Reading + honesty ──────────────────────────────────────────────────
85    let mg = profile(Language::MechGen);
86    let rust = profile(Language::Rust);
87    let py = profile(Language::Python);
88    println!("READING");
89    println!(
90        "  Among IMPLEMENTED languages MechGen ranks #1 ({:.3}); only the `ideal` DESIGN TARGET\n  \
91         ({:.3}, token-floored, unreachable for any text language) sits above it.",
92        mg.fitness(),
93        profile(Language::Ideal).fitness()
94    );
95    println!(
96        "  Under the SWE weighting it stays #1 among real languages: its reliability ({:.2}) and\n  \
97         determinism ({:.2}) — sound effects, exhaustiveness, machine-readable fixes, byte-stable IR —\n  \
98         are exactly what the build→test→debug loop rewards.",
99        mg.reliability, mg.determinism
100    );
101    println!("\nHONESTY (this is the project's own language — bias is the risk):");
102    println!(
103        "  • Scores move on EVIDENCE, both ways: token was corrected DOWN 0.73→0.60 (a C/Go\n    \
104         head-to-head exposed the old surface as MORE verbose), then RAISED 0.60→0.80 after the\n    \
105         ab-initio migration LANDED type inference + `;`-removal (1166 tests green) and measured\n    \
106         #1 of six on the real-BPE swe_token_benchmark. Composite was also corrected 0.95→0.865.");
107    println!(
108        "  • Falsifiable guards still hold: token ({:.2}) ≤ Python ({:.2}) — measured tersest but\n    \
109         scored at parity, not above; reliability ({:.2}) ≤ Rust ({:.2}); no axis ≥ 0.98 (prototype).",
110        mg.token_efficiency, py.token_efficiency, mg.reliability, rust.reliability
111    );
112    println!(
113        "  • The advantage is real on the axes SWE cares about (reliability/determinism/safety),\n    \
114         NOT on tokens — and it is a young prototype vs battle-tested Rust on correctness maturity."
115    );
116}

compare_languages

Function compare_languages Copy item path

Examples found in repository?

Function compare_languages

Examples found in repository ?