pub fn profile(lang: Language) -> LanguageProfileExpand description
The curated profile for lang. Scores are static, documented judgments
(see module docs); evidence strings carry the rationale.
Examples found in repository?
examples/swe_lang_profiles.rs (line 43)
39fn real_languages() -> Vec<LanguageProfile> {
40 Language::all()
41 .iter()
42 .filter(|&&l| l != Language::Ideal)
43 .map(|&l| profile(l))
44 .collect()
45}
46
47fn main() {
48 println!("=== Extensive agentic-SWE language comparison (sensitivity analysis) ===\n");
49
50 let langs = real_languages();
51 let ideal = profile(Language::Ideal);
52
53 // ── Per-axis champions ────────────────────────────────────────────────────
54 println!("PER-AXIS CHAMPIONS (best implemented language on each axis)");
55 let axes: [(&str, fn(&LanguageProfile) -> f64); 4] = [
56 ("token", |p| p.token_efficiency),
57 ("determinism", |p| p.determinism),
58 ("reliability", |p| p.reliability),
59 ("safety", |p| p.safety),
60 ];
61 for (name, f) in axes {
62 let best = langs.iter().max_by(|a, b| f(a).partial_cmp(&f(b)).unwrap()).unwrap();
63 println!(" {name:<12} {} ({:.2}) [ideal ceiling {:.2}]", best.language.name(), f(best), f(&ideal));
64 }
65
66 // ── Ranking under each SWE scenario + rank matrix ─────────────────────────
67 println!("\nRANKING BY SWE SCENARIO (implemented languages; score, and MechGen's rank)");
68 let mut rank_of: std::collections::HashMap<&str, Vec<usize>> = std::collections::HashMap::new();
69 let mut top3_count: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
70 for prof in PROFILES {
71 let mut ranked: Vec<&LanguageProfile> = langs.iter().collect();
72 ranked.sort_by(|a, b| score(b, &prof.w).partial_cmp(&score(a, &prof.w)).unwrap());
73 let order: Vec<String> = ranked.iter().map(|p| format!("{}({:.3})", p.language.name(), score(p, &prof.w))).collect();
74 let mg_rank = ranked.iter().position(|p| p.language == Language::MechGen).unwrap() + 1;
75 for (i, p) in ranked.iter().enumerate() {
76 rank_of.entry(p.language.name()).or_default().push(i + 1);
77 if i < 3 {
78 *top3_count.entry(p.language.name()).or_default() += 1;
79 }
80 }
81 println!("\n [{}] {}", prof.name, prof.why);
82 println!(" {}", order[..order.len().min(5)].join(" > "));
83 println!(" … MechGen #{mg_rank} of {}", langs.len());
84 }
85
86 // ── Robustness: how often each language lands top-3 across the 5 scenarios ─
87 println!("\nROBUSTNESS (top-3 finishes across all {} scenarios; best/worst rank)", PROFILES.len());
88 let mut summary: Vec<(&str, usize, usize, usize)> = top3_count
89 .keys()
90 .map(|&name| {
91 let ranks = &rank_of[name];
92 (name, top3_count[name], *ranks.iter().min().unwrap(), *ranks.iter().max().unwrap())
93 })
94 .collect();
95 // include languages that never hit top-3
96 for p in &langs {
97 let n = p.language.name();
98 if !top3_count.contains_key(n) {
99 let ranks = &rank_of[n];
100 summary.push((n, 0, *ranks.iter().min().unwrap(), *ranks.iter().max().unwrap()));
101 }
102 }
103 summary.sort_by(|a, b| b.1.cmp(&a.1).then(a.2.cmp(&b.2)));
104 for (name, t3, best, worst) in &summary {
105 println!(" {name:<12} top-3 in {t3}/{} rank range #{best}–#{worst}", PROFILES.len());
106 }
107
108 // ── Gap to the design-target ceiling (canonical fitness) ──────────────────
109 println!("\nGAP TO THE `ideal` CEILING (canonical unweighted fitness)");
110 let mut byfit: Vec<&LanguageProfile> = langs.iter().collect();
111 byfit.sort_by(|a, b| b.fitness().partial_cmp(&a.fitness()).unwrap());
112 for p in &byfit {
113 println!(" {:<12} {:.3} (−{:.3} from ideal {:.3})", p.language.name(), p.fitness(), ideal.fitness() - p.fitness(), ideal.fitness());
114 }
115
116 // ── Crossover: how token-obsessed must a weighting be to dethrone MechGen? ─
117 // Score model: wt·token + (1−wt)/3·(determinism+reliability+safety). Solve for
118 // the token weight wt where the terse champion ties MechGen.
119 let mg = profile(Language::MechGen);
120 let mg_other = (mg.determinism + mg.reliability + mg.safety) / 3.0;
121 let crossover = |b: &LanguageProfile| -> f64 {
122 let bo = (b.determinism + b.reliability + b.safety) / 3.0;
123 // wt = (bo − mg_other) / (mg.token − mg_other − b.token + bo)
124 (bo - mg_other) / (mg.token_efficiency - mg_other - b.token_efficiency + bo)
125 };
126 let bash = profile(Language::Bash);
127 let py = profile(Language::Python);
128 println!("\nCROSSOVER (token-weight at which a terse language overtakes MechGen)");
129 println!(" vs bash (token {:.2}): {:.0}% token weight", bash.token_efficiency, crossover(&bash) * 100.0);
130 println!(" vs python (token {:.2}): {:.0}% token weight", py.token_efficiency, crossover(&py) * 100.0);
131 println!(" → no realistic SWE weighting (token ≤ 40%) flips it; the crossover above (now even");
132 println!(" higher, since the landed inference made MechGen's token axis competitive) needs");
133 println!(" near-pure code-golf that ignores correctness for a terse language to win.");
134
135 // ── Honest reading ────────────────────────────────────────────────────────
136 println!("\nREADING (honest — MechGen is the project's own language):");
137 println!(" • MechGen ranks #1 under ALL FIVE realistic SWE scenarios — including the");
138 println!(" token-leaning rapid-prototyping one — because its reliability/determinism/safety");
139 println!(" lead is large enough that realistic token weighting cannot overcome it.");
140 println!(" • This is robust, not a one-weighting artifact: it never owns the `token` axis");
141 println!(" (Bash does), yet it tops every scenario; only a ~70%-token weighting (above)");
142 println!(" that essentially ignores correctness would favor terse languages — its token FLOOR.");
143 println!(" • Bias guards hold (token ≤ Python, reliability ≤ Rust, no axis ≥ 0.98); scores were");
144 println!(" corrected DOWN twice. The only thing above it is the unreachable `ideal` design target,");
145 println!(" and it loses reliability head-to-head to battle-tested Rust (0.94 vs 0.95).");
146}More examples
examples/swe_languages.rs (line 54)
25fn main() {
26 println!("=== Agentic-SWE language comparison ===\n");
27
28 // ── 1. Canonical agentic fitness (unweighted mean) ────────────────────────
29 println!("CANONICAL agentic fitness (unweighted mean of the four axes)");
30 println!(
31 "{:<12} {:>7} {:>6} {:>6} {:>6} {:>6}",
32 "language", "fitness", "token", "determ", "reliab", "safety"
33 );
34 for p in rank_languages() {
35 let tag = match p.language {
36 Language::Ideal => " ← design-target ceiling (not a real language)",
37 Language::MechGen => " ← this project's language (bias-audited)",
38 _ => "",
39 };
40 println!(
41 "{:<12} {:>7.3} {:>6.2} {:>6.2} {:>6.2} {:>6.2}{}",
42 p.language.name(),
43 p.fitness(),
44 p.token_efficiency,
45 p.determinism,
46 p.reliability,
47 p.safety,
48 tag,
49 );
50 }
51
52 // ── 2. SWE-weighted ranking ───────────────────────────────────────────────
53 println!("\nSWE-WEIGHTED ranking (reliability .35, determinism .30, safety .20, token .15)");
54 let mut ranked: Vec<LanguageProfile> = Language::all().iter().map(|&l| profile(l)).collect();
55 ranked.sort_by(|a, b| swe_score(b).partial_cmp(&swe_score(a)).unwrap());
56 println!("{:<12} {:>9} vs canonical", "language", "swe-score");
57 for p in &ranked {
58 let delta = swe_score(p) - p.fitness();
59 let note = if p.language == Language::Ideal {
60 " (design target)"
61 } else {
62 ""
63 };
64 println!(
65 "{:<12} {:>9.3} {:+.3}{}",
66 p.language.name(),
67 swe_score(p),
68 delta,
69 note,
70 );
71 }
72 println!(
73 " (the SWE lens lifts strongly-typed/reproducible languages — Rust, MechGen, Go —\n \
74 and demotes terse-but-unsafe ones — Python, Bash — vs the unweighted mean.)"
75 );
76
77 // ── 3. Head-to-head: MechGen vs the popular real languages ─────────────────
78 println!("\nHEAD-TO-HEAD (positive = MechGen fits agentic SWE better)");
79 for other in [Language::Rust, Language::Python, Language::Go, Language::TypeScript] {
80 let c = compare_languages(Language::MechGen, other);
81 print!("{c}");
82 }
83
84 // ── 4. Reading + honesty ──────────────────────────────────────────────────
85 let mg = profile(Language::MechGen);
86 let rust = profile(Language::Rust);
87 let py = profile(Language::Python);
88 println!("READING");
89 println!(
90 " Among IMPLEMENTED languages MechGen ranks #1 ({:.3}); only the `ideal` DESIGN TARGET\n \
91 ({:.3}, token-floored, unreachable for any text language) sits above it.",
92 mg.fitness(),
93 profile(Language::Ideal).fitness()
94 );
95 println!(
96 " Under the SWE weighting it stays #1 among real languages: its reliability ({:.2}) and\n \
97 determinism ({:.2}) — sound effects, exhaustiveness, machine-readable fixes, byte-stable IR —\n \
98 are exactly what the build→test→debug loop rewards.",
99 mg.reliability, mg.determinism
100 );
101 println!("\nHONESTY (this is the project's own language — bias is the risk):");
102 println!(
103 " • Scores move on EVIDENCE, both ways: token was corrected DOWN 0.73→0.60 (a C/Go\n \
104 head-to-head exposed the old surface as MORE verbose), then RAISED 0.60→0.80 after the\n \
105 ab-initio migration LANDED type inference + `;`-removal (1166 tests green) and measured\n \
106 #1 of six on the real-BPE swe_token_benchmark. Composite was also corrected 0.95→0.865.");
107 println!(
108 " • Falsifiable guards still hold: token ({:.2}) ≤ Python ({:.2}) — measured tersest but\n \
109 scored at parity, not above; reliability ({:.2}) ≤ Rust ({:.2}); no axis ≥ 0.98 (prototype).",
110 mg.token_efficiency, py.token_efficiency, mg.reliability, rust.reliability
111 );
112 println!(
113 " • The advantage is real on the axes SWE cares about (reliability/determinism/safety),\n \
114 NOT on tokens — and it is a young prototype vs battle-tested Rust on correctness maturity."
115 );
116}