Skip to main content

profile

Function profile 

Source
pub fn profile(lang: Language) -> LanguageProfile
Expand description

The curated profile for lang. Scores are static, documented judgments (see module docs); evidence strings carry the rationale.

Examples found in repository?
examples/swe_lang_profiles.rs (line 43)
39fn real_languages() -> Vec<LanguageProfile> {
40    Language::all()
41        .iter()
42        .filter(|&&l| l != Language::Ideal)
43        .map(|&l| profile(l))
44        .collect()
45}
46
47fn main() {
48    println!("=== Extensive agentic-SWE language comparison (sensitivity analysis) ===\n");
49
50    let langs = real_languages();
51    let ideal = profile(Language::Ideal);
52
53    // ── Per-axis champions ────────────────────────────────────────────────────
54    println!("PER-AXIS CHAMPIONS (best implemented language on each axis)");
55    let axes: [(&str, fn(&LanguageProfile) -> f64); 4] = [
56        ("token", |p| p.token_efficiency),
57        ("determinism", |p| p.determinism),
58        ("reliability", |p| p.reliability),
59        ("safety", |p| p.safety),
60    ];
61    for (name, f) in axes {
62        let best = langs.iter().max_by(|a, b| f(a).partial_cmp(&f(b)).unwrap()).unwrap();
63        println!("  {name:<12} {} ({:.2})   [ideal ceiling {:.2}]", best.language.name(), f(best), f(&ideal));
64    }
65
66    // ── Ranking under each SWE scenario + rank matrix ─────────────────────────
67    println!("\nRANKING BY SWE SCENARIO (implemented languages; score, and MechGen's rank)");
68    let mut rank_of: std::collections::HashMap<&str, Vec<usize>> = std::collections::HashMap::new();
69    let mut top3_count: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
70    for prof in PROFILES {
71        let mut ranked: Vec<&LanguageProfile> = langs.iter().collect();
72        ranked.sort_by(|a, b| score(b, &prof.w).partial_cmp(&score(a, &prof.w)).unwrap());
73        let order: Vec<String> = ranked.iter().map(|p| format!("{}({:.3})", p.language.name(), score(p, &prof.w))).collect();
74        let mg_rank = ranked.iter().position(|p| p.language == Language::MechGen).unwrap() + 1;
75        for (i, p) in ranked.iter().enumerate() {
76            rank_of.entry(p.language.name()).or_default().push(i + 1);
77            if i < 3 {
78                *top3_count.entry(p.language.name()).or_default() += 1;
79            }
80        }
81        println!("\n  [{}] {}", prof.name, prof.why);
82        println!("    {}", order[..order.len().min(5)].join("  >  "));
83        println!("    … MechGen #{mg_rank} of {}", langs.len());
84    }
85
86    // ── Robustness: how often each language lands top-3 across the 5 scenarios ─
87    println!("\nROBUSTNESS (top-3 finishes across all {} scenarios; best/worst rank)", PROFILES.len());
88    let mut summary: Vec<(&str, usize, usize, usize)> = top3_count
89        .keys()
90        .map(|&name| {
91            let ranks = &rank_of[name];
92            (name, top3_count[name], *ranks.iter().min().unwrap(), *ranks.iter().max().unwrap())
93        })
94        .collect();
95    // include languages that never hit top-3
96    for p in &langs {
97        let n = p.language.name();
98        if !top3_count.contains_key(n) {
99            let ranks = &rank_of[n];
100            summary.push((n, 0, *ranks.iter().min().unwrap(), *ranks.iter().max().unwrap()));
101        }
102    }
103    summary.sort_by(|a, b| b.1.cmp(&a.1).then(a.2.cmp(&b.2)));
104    for (name, t3, best, worst) in &summary {
105        println!("  {name:<12} top-3 in {t3}/{}   rank range #{best}–#{worst}", PROFILES.len());
106    }
107
108    // ── Gap to the design-target ceiling (canonical fitness) ──────────────────
109    println!("\nGAP TO THE `ideal` CEILING (canonical unweighted fitness)");
110    let mut byfit: Vec<&LanguageProfile> = langs.iter().collect();
111    byfit.sort_by(|a, b| b.fitness().partial_cmp(&a.fitness()).unwrap());
112    for p in &byfit {
113        println!("  {:<12} {:.3}   (−{:.3} from ideal {:.3})", p.language.name(), p.fitness(), ideal.fitness() - p.fitness(), ideal.fitness());
114    }
115
116    // ── Crossover: how token-obsessed must a weighting be to dethrone MechGen? ─
117    // Score model: wt·token + (1−wt)/3·(determinism+reliability+safety). Solve for
118    // the token weight wt where the terse champion ties MechGen.
119    let mg = profile(Language::MechGen);
120    let mg_other = (mg.determinism + mg.reliability + mg.safety) / 3.0;
121    let crossover = |b: &LanguageProfile| -> f64 {
122        let bo = (b.determinism + b.reliability + b.safety) / 3.0;
123        // wt = (bo − mg_other) / (mg.token − mg_other − b.token + bo)
124        (bo - mg_other) / (mg.token_efficiency - mg_other - b.token_efficiency + bo)
125    };
126    let bash = profile(Language::Bash);
127    let py = profile(Language::Python);
128    println!("\nCROSSOVER (token-weight at which a terse language overtakes MechGen)");
129    println!("  vs bash (token {:.2}):   {:.0}% token weight", bash.token_efficiency, crossover(&bash) * 100.0);
130    println!("  vs python (token {:.2}): {:.0}% token weight", py.token_efficiency, crossover(&py) * 100.0);
131    println!("  → no realistic SWE weighting (token ≤ 40%) flips it; the crossover above (now even");
132    println!("    higher, since the landed inference made MechGen's token axis competitive) needs");
133    println!("    near-pure code-golf that ignores correctness for a terse language to win.");
134
135    // ── Honest reading ────────────────────────────────────────────────────────
136    println!("\nREADING (honest — MechGen is the project's own language):");
137    println!("  • MechGen ranks #1 under ALL FIVE realistic SWE scenarios — including the");
138    println!("    token-leaning rapid-prototyping one — because its reliability/determinism/safety");
139    println!("    lead is large enough that realistic token weighting cannot overcome it.");
140    println!("  • This is robust, not a one-weighting artifact: it never owns the `token` axis");
141    println!("    (Bash does), yet it tops every scenario; only a ~70%-token weighting (above)");
142    println!("    that essentially ignores correctness would favor terse languages — its token FLOOR.");
143    println!("  • Bias guards hold (token ≤ Python, reliability ≤ Rust, no axis ≥ 0.98); scores were");
144    println!("    corrected DOWN twice. The only thing above it is the unreachable `ideal` design target,");
145    println!("    and it loses reliability head-to-head to battle-tested Rust (0.94 vs 0.95).");
146}
More examples
Hide additional examples
examples/swe_languages.rs (line 54)
25fn main() {
26    println!("=== Agentic-SWE language comparison ===\n");
27
28    // ── 1. Canonical agentic fitness (unweighted mean) ────────────────────────
29    println!("CANONICAL agentic fitness (unweighted mean of the four axes)");
30    println!(
31        "{:<12} {:>7}   {:>6} {:>6} {:>6} {:>6}",
32        "language", "fitness", "token", "determ", "reliab", "safety"
33    );
34    for p in rank_languages() {
35        let tag = match p.language {
36            Language::Ideal => "  ← design-target ceiling (not a real language)",
37            Language::MechGen => "  ← this project's language (bias-audited)",
38            _ => "",
39        };
40        println!(
41            "{:<12} {:>7.3}   {:>6.2} {:>6.2} {:>6.2} {:>6.2}{}",
42            p.language.name(),
43            p.fitness(),
44            p.token_efficiency,
45            p.determinism,
46            p.reliability,
47            p.safety,
48            tag,
49        );
50    }
51
52    // ── 2. SWE-weighted ranking ───────────────────────────────────────────────
53    println!("\nSWE-WEIGHTED ranking (reliability .35, determinism .30, safety .20, token .15)");
54    let mut ranked: Vec<LanguageProfile> = Language::all().iter().map(|&l| profile(l)).collect();
55    ranked.sort_by(|a, b| swe_score(b).partial_cmp(&swe_score(a)).unwrap());
56    println!("{:<12} {:>9}   vs canonical", "language", "swe-score");
57    for p in &ranked {
58        let delta = swe_score(p) - p.fitness();
59        let note = if p.language == Language::Ideal {
60            "  (design target)"
61        } else {
62            ""
63        };
64        println!(
65            "{:<12} {:>9.3}   {:+.3}{}",
66            p.language.name(),
67            swe_score(p),
68            delta,
69            note,
70        );
71    }
72    println!(
73        "  (the SWE lens lifts strongly-typed/reproducible languages — Rust, MechGen, Go —\n   \
74         and demotes terse-but-unsafe ones — Python, Bash — vs the unweighted mean.)"
75    );
76
77    // ── 3. Head-to-head: MechGen vs the popular real languages ─────────────────
78    println!("\nHEAD-TO-HEAD (positive = MechGen fits agentic SWE better)");
79    for other in [Language::Rust, Language::Python, Language::Go, Language::TypeScript] {
80        let c = compare_languages(Language::MechGen, other);
81        print!("{c}");
82    }
83
84    // ── 4. Reading + honesty ──────────────────────────────────────────────────
85    let mg = profile(Language::MechGen);
86    let rust = profile(Language::Rust);
87    let py = profile(Language::Python);
88    println!("READING");
89    println!(
90        "  Among IMPLEMENTED languages MechGen ranks #1 ({:.3}); only the `ideal` DESIGN TARGET\n  \
91         ({:.3}, token-floored, unreachable for any text language) sits above it.",
92        mg.fitness(),
93        profile(Language::Ideal).fitness()
94    );
95    println!(
96        "  Under the SWE weighting it stays #1 among real languages: its reliability ({:.2}) and\n  \
97         determinism ({:.2}) — sound effects, exhaustiveness, machine-readable fixes, byte-stable IR —\n  \
98         are exactly what the build→test→debug loop rewards.",
99        mg.reliability, mg.determinism
100    );
101    println!("\nHONESTY (this is the project's own language — bias is the risk):");
102    println!(
103        "  • Scores move on EVIDENCE, both ways: token was corrected DOWN 0.73→0.60 (a C/Go\n    \
104         head-to-head exposed the old surface as MORE verbose), then RAISED 0.60→0.80 after the\n    \
105         ab-initio migration LANDED type inference + `;`-removal (1166 tests green) and measured\n    \
106         #1 of six on the real-BPE swe_token_benchmark. Composite was also corrected 0.95→0.865.");
107    println!(
108        "  • Falsifiable guards still hold: token ({:.2}) ≤ Python ({:.2}) — measured tersest but\n    \
109         scored at parity, not above; reliability ({:.2}) ≤ Rust ({:.2}); no axis ≥ 0.98 (prototype).",
110        mg.token_efficiency, py.token_efficiency, mg.reliability, rust.reliability
111    );
112    println!(
113        "  • The advantage is real on the axes SWE cares about (reliability/determinism/safety),\n    \
114         NOT on tokens — and it is a young prototype vs battle-tested Rust on correctness maturity."
115    );
116}