harn-stdlib 0.8.52

Embedded Harn standard library source catalog
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
/**
 * `harn eval prompt` reporting layer ported to .harn — see harn#2305
 * (W5).
 *
 * **Pragmatic partial port.** The full `eval prompt` Rust handler is
 * ~1000 LOC and is tightly entangled with VM internals (it pushes
 * `LlmRenderContext` guards around `render_template_to_string`, builds
 * tempfile harn pipelines to drive `llm_call`/judge fanout, reads the
 * provider catalog through `llm_config`, and routes context-fixture
 * evaluation through `harn_vm::orchestration::assemble_context`). None
 * of that is reachable from script-land today without exposing a wider
 * VM surface than W5 should land.
 *
 * What this script owns: the **rendering / reporting layer** — the
 * three output formats (terminal, JSON, HTML) and writing them to
 * stdout or `--out-file`. The Rust shim does the fleet resolution,
 * fleet rendering, run/judge fanout, and context-fixture evaluation,
 * collects the result into a `PromptReport`, serialises it to JSON,
 * and hands it off here.
 *
 * The wider port (replacing `eval_prompt.rs` with a thin shim) is
 * deferred until G4 (#2297) exposes the missing host capabilities
 * (template render with `LlmRenderContext`, provider-catalog
 * resolution, `llm_call` parameterised by provider/model). That work
 * is intentionally scoped out of W5 to keep this PR reviewable.
 *
 * Inputs (from the dispatch shim in crates/harn-cli/src/commands/eval_prompt.rs):
 *   HARN_EVAL_PROMPT_REPORT_JSON — serialised `PromptReport` (see the
 *     struct in eval_prompt.rs for the canonical shape). Top-level
 *     keys: `template_path`, `mode`, `renders`, `runs` (optional),
 *     `judge` (optional), `context_eval` (optional).
 *   HARN_EVAL_PROMPT_OUTPUT      — "terminal" | "json" | "html".
 *
 * The Rust shim serialises both env vars under a tokio Mutex so
 * concurrent in-process callers (the existing eval_prompt_cli tests)
 * don't clobber each other mid-dispatch.
 *
 * The script always emits its rendered payload to stdout. `--out-file`
 * is handled on the Rust side after dispatch returns: the script runs
 * under the standard `harn run` sandbox where `harness.fs.write_text`
 * is restricted to `workspace_roots`, but users invoke `--out-file
 * /tmp/...` all the time. Capturing the script's stdout and writing it
 * from the unsandboxed shim preserves that behavior.
 */
fn __safe_string(value, fallback: string) -> string {
  if type_of(value) == "string" {
    return value
  }
  return fallback
}

fn __safe_list(value) -> list {
  if type_of(value) == "list" {
    return value
  }
  return []
}

fn __safe_dict(value) -> dict {
  if type_of(value) == "dict" {
    return value
  }
  return {}
}

fn __ends_with_newline(s: string) -> bool {
  if len(s) == 0 {
    return false
  }
  return s[len(s) - 1] == "\n"
}

/**
 * Format a float as `"X.YYY"` (3 decimal places, always padded with
 * trailing zeros). Mirrors Rust's `format!("{:.3}", x)` rounding /
 * padding behavior — needed because Harn's `to_string` for `Float`
 * uses `{n}` / `{n:.1}` which doesn't pad.
 */
fn __format_float_3(value) -> string {
  let f = to_float(value) ?? 0.0
  let negative = f < 0.0
  let abs_f = if negative {
    -f
  } else {
    f
  }
  // Round half-up to 3 decimals via int math.
  let scaled = to_int(round(abs_f * 1000.0)) ?? 0
  let whole = scaled / 1000
  let frac = scaled - whole * 1000
  var frac_str = to_string(frac)
  while len(frac_str) < 3 {
    frac_str = "0" + frac_str
  }
  let sign = if negative && (whole != 0 || frac != 0) {
    "-"
  } else {
    ""
  }
  return sign + to_string(whole) + "." + frac_str
}

/**
 * ─── Terminal rendering ───────────────────────────────────────────────────
 */
fn __line_diff_summary(baseline, candidate) -> string {
  // Match the Rust impl's BTreeSet semantics: count *unique* lines on
  // either side, not raw line counts.
  var baseline_set = {}
  for line in baseline {
    baseline_set = baseline_set + {[line]: true}
  }
  var candidate_set = {}
  for line in candidate {
    candidate_set = candidate_set + {[line]: true}
  }
  var only_in_baseline = 0
  for k in keys(baseline_set) {
    if candidate_set[k] == nil {
      only_in_baseline = only_in_baseline + 1
    }
  }
  var only_in_candidate = 0
  for k in keys(candidate_set) {
    if baseline_set[k] == nil {
      only_in_candidate = only_in_candidate + 1
    }
  }
  if only_in_baseline == 0 && only_in_candidate == 0 {
    let total_baseline = len(baseline)
    let total_candidate = len(candidate)
    if total_baseline == total_candidate {
      return ""
    }
    return to_string(total_baseline) + " vs " + to_string(total_candidate)
      + " lines (same content set, different ordering or repeats)"
  }
  return to_string(only_in_baseline) + " line(s) only in baseline, "
    + to_string(only_in_candidate)
    + " line(s) only here"
}

fn __first_rendered(renders) -> string {
  for r in renders {
    let rd = __safe_dict(r)
    if type_of(rd["rendered"]) == "string" {
      return rd["rendered"]
    }
  }
  return ""
}

fn __terminal_render_section(render, idx, baseline_lines) -> string {
  var out = "## [" + to_string(idx) + "] "
    + __safe_string(render["selector"], "")
    + " ("
    + __safe_string(render["provider"], "")
    + "/"
    + __safe_string(render["model"], "")
    + ")  family="
    + __safe_string(render["family"], "")
    + "\n"
  let auth_available = render["auth_available"] ?? true
  if !auth_available {
    out = out + "    auth: not configured\n"
  }
  let error = render["error"]
  if type_of(error) == "string" {
    out = out + "    render error: " + error + "\n\n"
    return out
  }
  let rendered = render["rendered"]
  if type_of(rendered) != "string" {
    return out
  }
  out = out + "---\n"
  out = out + rendered
  if !__ends_with_newline(rendered) {
    out = out + "\n"
  }
  out = out + "---\n"
  if idx > 0 && len(baseline_lines) > 0 {
    let candidate_lines = split(rendered, "\n")
    // Rust uses `lines()` which drops a trailing empty entry from a
    // terminal "\n"; mirror that here so the diff summary matches.
    var candidate_trim = candidate_lines
    if len(candidate_trim) > 0 && candidate_trim[len(candidate_trim) - 1] == "" {
      candidate_trim = candidate_trim[0:len(candidate_trim) - 1]
    }
    let summary = __line_diff_summary(baseline_lines, candidate_trim)
    if summary != "" {
      out = out + "    diff vs #0: " + summary + "\n"
    }
  }
  out = out + "\n"
  return out
}

fn __terminal_render_context_eval(context_eval) -> string {
  var out = "\n# Context fixture gates: "
    + to_string(context_eval["passed"] ?? 0)
    + " passed / "
    + to_string(context_eval["total"] ?? 0)
    + " total\n"
  for fixture in __safe_list(context_eval["fixtures"]) {
    let fx = __safe_dict(fixture)
    out = out + "\n## " + __safe_string(fx["path"], "")
      + " ("
      + to_string(fx["passed"] ?? 0)
      + " passed / "
      + to_string(fx["total"] ?? 0)
      + " total)\n"
    for case in __safe_list(fx["cases"]) {
      let cs = __safe_dict(case)
      let score = __safe_dict(cs["score"])
      let budget = __safe_dict(cs["budget"])
      let selected = __safe_list(cs["selected_artifact_ids"])
      let pass_label = if cs["pass"] ?? false {
        "pass"
      } else {
        "fail"
      }
      let overall = score["overall"] ?? 0.0
      out = out + "- " + __safe_string(cs["id"], "")
        + ": "
        + pass_label
        + " score="
        + __format_float_3(overall)
        + " selected=["
        + join(selected, ", ")
        + "] tokens="
        + to_string(budget["total_tokens"] ?? 0)
        + "/"
        + to_string(budget["budget_tokens"] ?? 0)
        + "\n"
      for failure in __safe_list(cs["failures"]) {
        out = out + "    failure: " + to_string(failure) + "\n"
      }
    }
  }
  return out
}

fn __terminal_render_runs(report) -> string {
  let runs = __safe_dict(report["runs"])
  if len(keys(runs)) == 0 {
    return ""
  }
  var out = "\n# Model responses\n"
  for render in __safe_list(report["renders"]) {
    let rd = __safe_dict(render)
    let selector = __safe_string(rd["selector"], "")
    let run = runs[selector]
    if run == nil {
      continue
    }
    let run_d = __safe_dict(run)
    out = out + "\n## " + selector
      + " ("
      + __safe_string(rd["model"], "")
      + ")\n"
    if run_d["skipped"] ?? false {
      out = out + "    skipped: unauthenticated provider\n"
      continue
    }
    let error = run_d["error"]
    if type_of(error) == "string" {
      out = out + "    error: " + error + "\n"
      continue
    }
    let response = run_d["response"]
    if type_of(response) == "string" {
      out = out + "---\n" + response
      if !__ends_with_newline(response) {
        out = out + "\n"
      }
      out = out + "---\n"
    }
  }
  return out
}

fn __render_terminal(report) -> string {
  var out = "# harn eval prompt — "
    + __safe_string(report["template_path"], "")
    + " (mode: "
    + __safe_string(report["mode"], "")
    + ")\n\n"
  let renders = __safe_list(report["renders"])
  let baseline_text = __first_rendered(renders)
  var baseline_lines = []
  if baseline_text != "" {
    baseline_lines = split(baseline_text, "\n")
    if len(baseline_lines) > 0 && baseline_lines[len(baseline_lines) - 1] == "" {
      baseline_lines = baseline_lines[0:len(baseline_lines) - 1]
    }
  }
  var idx = 0
  for render in renders {
    out = out + __terminal_render_section(__safe_dict(render), idx, baseline_lines)
    idx = idx + 1
  }
  let context_eval = report["context_eval"]
  if type_of(context_eval) == "dict" {
    out = out + __terminal_render_context_eval(context_eval)
  }
  out = out + __terminal_render_runs(report)
  let judge = report["judge"]
  if type_of(judge) == "dict" {
    out = out + "\n# Judge verdict ("
      + __safe_string(judge["judge_model"], "")
      + "): \n"
      + __safe_string(judge["verdict"], "")
      + "\n"
  }
  return out
}

/**
 * ─── JSON rendering ───────────────────────────────────────────────────────
 */
fn __render_json(report) -> string {
  // The Rust impl emits serde_json's pretty form of the PromptReport
  // verbatim. Harn's `json_stringify_pretty` sorts keys alphabetically,
  // so the wire byte order differs from serde's struct-field order —
  // but parsing both sides into a serde_json::Value compares equal.
  // The dispatch parity tests assert structural equality, not byte
  // identity, for the JSON path. The trailing newline matches Rust's
  // `format!("{s}\n")`.
  return json_stringify_pretty(report) + "\n"
}

/**
 * ─── HTML rendering ───────────────────────────────────────────────────────
 */
fn __html_escape(s: string) -> string {
  // The ampersand replace must run first to avoid double-encoding the
  // ampersands that the other replacements introduce.
  return s
    .replace("&", "&amp;")
    .replace("<", "&lt;")
    .replace(">", "&gt;")
    .replace("\"", "&quot;")
    .replace("'", "&#39;")
}

fn __html_render_card(render, runs) -> string {
  var out = "<div class=\"card\"><h2>"
    + __html_escape(__safe_string(render["selector"], ""))
    + " <span class=\"meta\">("
    + __html_escape(__safe_string(render["provider"], ""))
    + " / "
    + __html_escape(__safe_string(render["model"], ""))
    + " · "
    + __html_escape(__safe_string(render["family"], ""))
    + ")</span></h2>"
  if !(render["auth_available"] ?? true) {
    out = out + "<p class=\"skip\">auth: not configured</p>"
  }
  let error = render["error"]
  let rendered = render["rendered"]
  if type_of(error) == "string" {
    out = out + "<p class=\"err\">render error: " + __html_escape(error) + "</p>"
  } else if type_of(rendered) == "string" {
    out = out + "<pre>" + __html_escape(rendered) + "</pre>"
  }
  let selector = __safe_string(render["selector"], "")
  let run = runs[selector]
  if type_of(run) == "dict" {
    if run["skipped"] ?? false {
      out = out + "<p class=\"skip\">run: skipped (no credentials)</p>"
    } else {
      let run_error = run["error"]
      let response = run["response"]
      if type_of(run_error) == "string" {
        out = out + "<p class=\"err\">run error: " + __html_escape(run_error) + "</p>"
      } else if type_of(response) == "string" {
        out = out + "<h3>response</h3><pre>" + __html_escape(response) + "</pre>"
      }
    }
  }
  out = out + "</div>"
  return out
}

fn __render_html(report) -> string {
  var out = "<!doctype html><html><head><meta charset=\"utf-8\"><title>"
    + "harn eval prompt report</title>"
  out = out + "<style>body{font-family:system-ui,sans-serif;margin:2rem;color:#222}h1{margin-bottom:0}"
  out = out + ".meta{color:#666;font-size:0.9rem;margin-bottom:1.5rem}"
  out = out + ".grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(28rem,1fr));gap:1rem}"
  out = out + ".card{border:1px solid #ddd;border-radius:6px;padding:1rem;background:#fafafa}"
  out = out + ".card h2{margin-top:0;font-size:1rem}"
  out = out
    + "pre{background:#fff;border:1px solid #eee;padding:0.75rem;overflow:auto;white-space:pre-wrap;font-size:0.85rem}"
  out = out + ".err{color:#b00}.skip{color:#888;font-style:italic}"
  out = out + "</style></head><body>"
  out = out + "<h1>harn eval prompt</h1><div class=\"meta\">"
    + __html_escape(__safe_string(report["template_path"], ""))
    + " · mode: "
    + __safe_string(report["mode"], "")
    + "</div>"
  out = out + "<div class=\"grid\">"
  let runs = __safe_dict(report["runs"])
  for render in __safe_list(report["renders"]) {
    out = out + __html_render_card(__safe_dict(render), runs)
  }
  out = out + "</div>"
  let context_eval = report["context_eval"]
  if type_of(context_eval) == "dict" {
    out = out + "<h2>Context fixture gates</h2><p>"
      + to_string(context_eval["passed"] ?? 0)
      + " passed / "
      + to_string(context_eval["total"] ?? 0)
      + " total</p>"
    for fixture in __safe_list(context_eval["fixtures"]) {
      let fx = __safe_dict(fixture)
      out = out + "<h3>" + __html_escape(__safe_string(fx["path"], "")) + "</h3><ul>"
      for case in __safe_list(fx["cases"]) {
        let cs = __safe_dict(case)
        let score = __safe_dict(cs["score"])
        let budget = __safe_dict(cs["budget"])
        let pass_label = if cs["pass"] ?? false {
          "pass"
        } else {
          "fail"
        }
        let selected = __safe_list(cs["selected_artifact_ids"])
        out = out + "<li><strong>" + __html_escape(__safe_string(cs["id"], ""))
          + "</strong>: "
          + pass_label
          + " · score "
          + __format_float_3(score["overall"] ?? 0.0)
          + " · selected ["
          + __html_escape(join(selected, ", "))
          + "] · tokens "
          + to_string(budget["total_tokens"] ?? 0)
          + "/"
          + to_string(budget["budget_tokens"] ?? 0)
          + "</li>"
      }
      out = out + "</ul>"
    }
  }
  let judge = report["judge"]
  if type_of(judge) == "dict" {
    out = out + "<h2>Judge (" + __html_escape(__safe_string(judge["judge_model"], "")) + ")</h2>"
      + "<pre>"
      + __html_escape(__safe_string(judge["verdict"], ""))
      + "</pre>"
  }
  out = out + "</body></html>\n"
  return out
}

/**
 * ─── Entrypoint ───────────────────────────────────────────────────────────
 */
fn main(harness: Harness) {
  let raw = harness.env.get_or("HARN_EVAL_PROMPT_REPORT_JSON", "")
  if raw == "" {
    harness.stdio
      .eprintln("internal error: HARN_EVAL_PROMPT_REPORT_JSON not set by dispatch shim")
    exit(70)
  }
  let report = try {
    json_parse(raw)
  } catch (e) {
    harness.stdio.eprintln("internal error: failed to parse PromptReport: " + to_string(e))
    exit(70)
  }
  let output = harness.env.get_or("HARN_EVAL_PROMPT_OUTPUT", "terminal")
  let payload = if output == "json" {
    __render_json(report)
  } else if output == "html" {
    __render_html(report)
  } else {
    __render_terminal(report)
  }
  // Always print to stdout (no trailing newline — the rendered payloads
  // already include their own terminating newline). The Rust shim
  // intercepts captured stdout and writes it to `--out-file` if set.
  __io_print(payload)
}