harn-stdlib 0.9.8

/**
 * `harn models lora preflight` renderer.
 *
 * The dispatch shim reads corpus/config files and computes deterministic
 * readiness facts. This Harn source owns report presentation and JSON
 * passthrough.
 *
 * Inputs (from the dispatch shim):
 *   HARN_MODELS_LORA_PREFLIGHT_PAYLOAD_JSON   - compact report JSON.
 *   HARN_MODELS_LORA_PREFLIGHT_PAYLOAD_PRETTY - pretty report JSON.
 *   HARN_OUTPUT_JSON                          - "1" for JSON, else human text.
 */
import {
  cli_json_envelope,
  print_list,
  safe_bool,
  safe_dict,
  safe_int_string,
  safe_list,
  safe_string,
} from "std/cli/render"

fn __percent(value) -> string {
  let number = (to_float(value) ?? 0.0) * 100.0
  return to_string(number) + "%"
}

fn __render_human(harness: Harness, report: dict) {
  let base = safe_dict(report["base"])
  let request = safe_dict(report["request"])
  let thresholds = safe_dict(report["thresholds"])
  let stats = safe_dict(report["stats"])
  let tool_calls = safe_dict(stats["tool_calls"])
  let config = safe_dict(report["config"])
  let breakdown = safe_dict(report["breakdown"])
  let longest = safe_list(report["longest"])
  let problems = safe_list(report["problem_examples"])
  let skipped = safe_list(report["skipped_records"])
  let warnings = safe_list(report["warnings"])
  let errors = safe_list(report["errors"])
  let base_id = safe_string(base["id"], "")
  let provider = safe_string(base["provider"], "")
  harness.stdio.println("LoRA preflight for " + base_id + " via " + provider)
  harness.stdio.println("  corpus: " + safe_string(request["corpus"], ""))
  let config_path = safe_string(config["path"], "")
  if config_path != "" {
    harness.stdio.println("  config: " + config_path)
  }
  harness.stdio.println("  target tool format: " + safe_string(request["target_tool_format"], ""))
  harness.stdio
    .println(
    "  expected source tool format: "
      + safe_string(thresholds["expected_source_tool_format"], ""),
  )
  harness.stdio
    .println("  max_seq_length: " + safe_int_string(thresholds["max_seq_length"], "0"))
  harness.stdio
    .println("  required fit ratio: " + __percent(thresholds["min_fit_ratio"]))
  harness.stdio
    .println("  hard token estimate limit: " + safe_int_string(thresholds["hard_token_limit"], "0"))
  harness.stdio
    .println("  minimum records: " + safe_int_string(thresholds["min_records"], "0"))
  harness.stdio
    .println("  minimum tool-call share: " + __percent(thresholds["min_tool_call_share"]))
  let marker = safe_string(thresholds["done_marker"], "")
  if marker != "" {
    harness.stdio.println("  required done marker: " + marker)
  }
  harness.stdio
    .println(
    "  records: raw="
      + safe_int_string(stats["raw_records"], "0")
      + " trainable="
      + safe_int_string(stats["trainable_records"], "0")
      + " skipped="
      + to_string(skipped.count),
  )
  harness.stdio
    .println(
    "  fit: "
      + safe_int_string(stats["fit_records"], "0")
      + "/"
      + safe_int_string(stats["trainable_records"], "0")
      + " ("
      + __percent(stats["fit_ratio"])
      + ")",
  )
  harness.stdio
    .println(
    "  tool calls: json="
      + safe_int_string(tool_calls["json_tool_calls"], "0")
      + " text="
      + safe_int_string(tool_calls["text_tool_calls"], "0")
      + " unknown="
      + safe_int_string(tool_calls["unknown_tool_blocks"], "0")
      + " malformed_json="
      + safe_int_string(tool_calls["malformed_json_bodies"], "0"),
  )
  harness.stdio
    .println(
    "  declared tool formats: "
      + json_stringify(safe_dict(breakdown["declared_tool_formats"])),
  )
  harness.stdio.println("  languages: " + json_stringify(safe_dict(breakdown["languages"])))
  harness.stdio.println("  task types: " + json_stringify(safe_dict(breakdown["task_types"])))
  if longest.count > 0 {
    harness.stdio.println("  longest examples:")
    for example in longest {
      let item = safe_dict(example)
      harness.stdio
        .println(
        "    - "
          + safe_string(item["id"], "")
          + " line="
          + safe_int_string(item["line_number"], "0")
          + " approx_tokens="
          + safe_int_string(item["approx_tokens"], "0")
          + " "
          + safe_string(item["language"], "")
          + "/"
          + safe_string(item["task_type"], ""),
      )
    }
  }
  if problems.count > 0 {
    harness.stdio.println("  problem examples:")
    for problem in problems {
      let item = safe_dict(problem)
      harness.stdio
        .println(
        "    - "
          + safe_string(item["id"], "")
          + " line="
          + safe_int_string(item["line_number"], "0")
          + " "
          + safe_string(item["kind"], "")
          + ": "
          + safe_string(item["detail"], ""),
      )
    }
  }
  print_list(harness, "warnings", warnings)
  print_list(harness, "errors", errors)
  let result = if safe_bool(report["ok"], false) {
    "PASS"
  } else {
    "FAIL"
  }
  harness.stdio.println("  result: " + result)
}

fn __render_json(report: dict) -> string {
  let ok = safe_bool(report["ok"], false)
  let envelope = if ok {
    cli_json_envelope({schema_version: 1, ok: true, data: report})
  } else {
    cli_json_envelope(
      {
        schema_version: 1,
        ok: false,
        error: {
          code: "lora_preflight_failed",
          message: "LoRA corpus preflight failed readiness checks.",
          details: report,
        },
      },
    )
  }
  return json_stringify_pretty(envelope)
}

fn main(harness: Harness) -> int {
  let raw = harness.env.get_or("HARN_MODELS_LORA_PREFLIGHT_PAYLOAD_JSON", "")
  if raw == "" {
    harness.stdio.eprintln("internal error: HARN_MODELS_LORA_PREFLIGHT_PAYLOAD_JSON not set")
    return 70
  }
  let report = try {
    json_parse(raw)
  } catch (e) {
    harness.stdio.eprintln("internal error: failed to parse LoRA preflight payload: " + to_string(e))
    return 70
  }
  let ok = safe_bool(report["ok"], false)
  let request = safe_dict(report["request"])
  let check = safe_bool(request["check"], false)
  let json_mode = harness.env.get_or("HARN_OUTPUT_JSON", "0") == "1"
  if json_mode {
    harness.stdio.println(__render_json(report))
  } else {
    __render_human(harness, report)
  }
  if ok || !check {
    return 0
  }
  return 1
}