harn-stdlib 0.8.50

/**
 * `harn eval context` rendering layer ported to .harn — see harn#2306
 * (W6).
 *
 * **Pragmatic partial port.** The Rust handler in
 * `crates/harn-cli/src/commands/eval_context.rs` does manifest loading,
 * `evaluate_context_eval_manifest` invocation (which reaches deep into
 * `harn_vm::orchestration::context_eval` — projection policies, mode
 * runs, scoring), and writes `summary.json` + `per_run.jsonl` to disk.
 * None of that is portable to script-land today without G4 (#2297)
 * exposing the orchestration surface.
 *
 * What this script owns: the **markdown report body** rendered into
 * `summary.md`, plus the one-line human summary the legacy handler
 * prints to stdout, plus the JSON pretty rendering used when the
 * caller passes `--json`. The Rust shim writes the JSON artifacts and
 * the markdown file on its side (the script runs in the standard
 * `harn run` sandbox where `harness.fs.write_text` is restricted to
 * `workspace_roots`, but users invoke `--output /tmp/...` constantly).
 *
 * Inputs (from the dispatch shim in crates/harn-cli/src/commands/eval_context.rs):
 *   HARN_EVAL_CONTEXT_REPORT_JSON — serialised `ContextEvalReport`.
 *   HARN_EVAL_CONTEXT_OUTPUT_MODE — one of:
 *     "markdown" — render the summary.md body to stdout (default).
 *     "summary"  — render the one-line human summary to stdout.
 *     "json"     — render the JSON pretty form to stdout for `--json`.
 *
 * The wider port (replacing the Rust shim) is gated on G4 (#2297).
 * C1 (#2314) will delete the `HARN_CLI_IMPL=rust` escape hatch.
 */
fn __safe_string(value, fallback: string) -> string {
  if type_of(value) == "string" {
    return value
  }
  return fallback
}

fn __safe_list(value) -> list {
  if type_of(value) == "list" {
    return value
  }
  return []
}

fn __safe_dict(value) -> dict {
  if type_of(value) == "dict" {
    return value
  }
  return {}
}

/**
 * Format a float as `"X.YYYY"` (4 decimals, half-up padded).
 * Mirrors Rust's `format!("{:.4}", x)` exactly so the markdown body
 * stays byte-identical with the legacy renderer.
 */
fn __format_float_4(value) -> string {
  let f = to_float(value) ?? 0.0
  let negative = f < 0.0
  let abs_f = if negative {
    -f
  } else {
    f
  }
  let scaled = to_int(round(abs_f * 10000.0)) ?? 0
  let whole = scaled / 10000
  let frac = scaled - whole * 10000
  var frac_str = to_string(frac)
  while len(frac_str) < 4 {
    frac_str = "0" + frac_str
  }
  let sign = if negative && (whole != 0 || frac != 0) {
    "-"
  } else {
    ""
  }
  return sign + to_string(whole) + "." + frac_str
}

/**
 * Format a float as `"X.YY"` (2 decimals, half-up padded). Used for
 * the one-line summary text `mean_correctness={:.2} mean_tool_quality={:.2}`.
 */
fn __format_float_2(value) -> string {
  let f = to_float(value) ?? 0.0
  let negative = f < 0.0
  let abs_f = if negative {
    -f
  } else {
    f
  }
  let scaled = to_int(round(abs_f * 100.0)) ?? 0
  let whole = scaled / 100
  let frac = scaled - whole * 100
  var frac_str = to_string(frac)
  while len(frac_str) < 2 {
    frac_str = "0" + frac_str
  }
  let sign = if negative && (whole != 0 || frac != 0) {
    "-"
  } else {
    ""
  }
  return sign + to_string(whole) + "." + frac_str
}

/**
 * Format a float as `"X.YYYYYY"` (6 decimals, half-up padded). Used
 * for the `cost USD: {:.6}` line in summary.md.
 */
fn __format_float_6(value) -> string {
  let f = to_float(value) ?? 0.0
  let negative = f < 0.0
  let abs_f = if negative {
    -f
  } else {
    f
  }
  let scaled = to_int(round(abs_f * 1000000.0)) ?? 0
  let whole = scaled / 1000000
  let frac = scaled - whole * 1000000
  var frac_str = to_string(frac)
  while len(frac_str) < 6 {
    frac_str = "0" + frac_str
  }
  let sign = if negative && (whole != 0 || frac != 0) {
    "-"
  } else {
    ""
  }
  return sign + to_string(whole) + "." + frac_str
}

fn __escape_md(value: string) -> string {
  return value.replace("|", "\\|")
}

fn __render_markdown(report: dict) -> string {
  let aggregate = __safe_dict(report["aggregate"])
  let manifest_name = report["manifest_name"]
  let title = if type_of(manifest_name) == "string" {
    manifest_name
  } else {
    __safe_string(report["manifest_id"], "")
  }
  let pass = report["pass"] ?? false
  let status = if pass {
    "PASS"
  } else {
    "FAIL"
  }
  var out = "# Context Eval: " + title + "\n\n"
  out = out + "- status: " + status + "\n"
  out = out + "- runs: " + to_string(report["passed_runs"] ?? 0)
    + "/"
    + to_string(report["total_runs"] ?? 0)
    + " passed\n"
  out = out + "- mean correctness: "
    + __format_float_4(aggregate["mean_final_correctness"] ?? 0.0)
    + "\n"
  out = out + "- mean tool quality: "
    + __format_float_4(aggregate["mean_tool_call_quality"] ?? 0.0)
    + "\n"
  out = out + "- input tokens: " + to_string(aggregate["total_input_tokens"] ?? 0) + "\n"
  out = out + "- output tokens: " + to_string(aggregate["total_output_tokens"] ?? 0) + "\n"
  out = out + "- cost USD: " + __format_float_6(aggregate["total_cost_usd"] ?? 0.0) + "\n\n"
  out = out
    + "| task | mode | pass | correctness | tools | reads before edit | input tokens | compactions | cache key |\n"
  out = out + "|---|---|---:|---:|---:|---:|---:|---:|---|\n"
  for run in __safe_list(report["runs"]) {
    let run_d = __safe_dict(run)
    let correctness = __safe_dict(run_d["final_correctness"])
    let tool_quality = __safe_dict(run_d["tool_call_quality"])
    let cache = __safe_dict(run_d["cache"])
    let passed_label = if run_d["passed"] ?? false {
      "yes"
    } else {
      "no"
    }
    out = out + "| " + __escape_md(__safe_string(run_d["task_id"], ""))
      + " | "
      + __escape_md(__safe_string(run_d["mode_id"], ""))
      + " | "
      + passed_label
      + " | "
      + __format_float_4(correctness["score"] ?? 0.0)
      + " | "
      + __format_float_4(tool_quality["score"] ?? 0.0)
      + " | "
      + to_string(run_d["reads_before_first_edit"] ?? 0)
      + " | "
      + to_string(run_d["input_tokens"] ?? 0)
      + " | "
      + to_string(run_d["compaction_count"] ?? 0)
      + " | `"
      + __safe_string(cache["key"], "")
      + "` |\n"
  }
  return out
}

fn __render_summary_line(report: dict) -> string {
  let aggregate = __safe_dict(report["aggregate"])
  return "context eval: " + to_string(report["passed_runs"] ?? 0)
    + "/"
    + to_string(report["total_runs"] ?? 0)
    + " passed, mean_correctness="
    + __format_float_2(aggregate["mean_final_correctness"] ?? 0.0)
    + ", mean_tool_quality="
    + __format_float_2(aggregate["mean_tool_call_quality"] ?? 0.0)
}

/**
 * Entrypoint. Returns an integer exit code rather than calling
 * `exit()` so the dispatch wedge's captured stdout/stderr buffers
 * flush back to the Rust shim — `exit()` in the embedded `harn run`
 * pipeline calls `std::process::exit` which terminates the host
 * binary mid-render and drops the captured streams.
 */
fn main(harness: Harness) -> int {
  let raw = harness.env.get_or("HARN_EVAL_CONTEXT_REPORT_JSON", "")
  if raw == "" {
    harness.stdio
      .eprintln("internal error: HARN_EVAL_CONTEXT_REPORT_JSON not set by dispatch shim")
    return 70
  }
  let report = try {
    json_parse(raw)
  } catch (e) {
    harness.stdio.eprintln("internal error: failed to parse ContextEvalReport: " + to_string(e))
    return 70
  }
  let mode = harness.env.get_or("HARN_EVAL_CONTEXT_OUTPUT_MODE", "markdown")
  let payload = if mode == "summary" {
    __render_summary_line(report)
  } else if mode == "json" {
    json_stringify_pretty(report)
  } else {
    __render_markdown(report)
  }
  __io_print(payload)
  return 0
}