harn-stdlib 0.8.82

// std/agent/transcript — canonical transcript normalization helpers.
//
// Agent transcripts are persisted by the Harn runtime, so Harn owns the
// compatibility layer for reading them. Downstream eval/reporting tools should
// consume these normalized rows instead of guessing at provider-specific or
// historical JSONL shapes.
import { read_jsonl } from "std/jsonl"

const TRANSCRIPT_ROW_SCHEMA = "harn.agent.transcript.row.v1"

const ERROR_MARKERS = [
  "\\berror\\b",
  "\\bfailed\\b",
  "\\bfailure\\b",
  "\\bexception\\b",
  "\\btraceback\\b",
  "\\bnot found\\b",
  "\\bno such file\\b",
  "\\bcannot\\b",
  "\\bdoes not exist\\b",
  "\\bcompile error\\b",
  "\\bcompilation failed\\b",
  "\\btest failed\\b",
  "\\bpanic:",
]

fn __first_text(values: list) -> string {
  for value in values {
    if type_of(value) == "string" && value != "" {
      return value
    }
  }
  return ""
}

/**
 * Convert message content variants into plain analysis text.
 *
 * Supports provider message strings, OpenAI-style content part lists, and
 * Harn block dicts with `text` or `content`.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_text(message.content)
 */
pub fn agent_transcript_text(value) -> string {
  let kind = type_of(value)
  if kind == "string" {
    return value
  }
  if kind == "list" {
    var parts = []
    for item in value {
      let text = if type_of(item) == "dict" {
        agent_transcript_text(item?.text ?? item?.content ?? "")
      } else {
        agent_transcript_text(item)
      }
      if text != "" {
        parts = parts + [text]
      }
    }
    return join(parts, "\n")
  }
  if kind == "dict" {
    return __first_text([value?.text, value?.content, value?.body])
  }
  return ""
}

fn __json_or_raw(raw) {
  if type_of(raw) != "string" {
    return raw ?? {}
  }
  let parsed = try {
    json_parse(raw)
  }
  if is_ok(parsed) {
    return unwrap(parsed)
  }
  return {_raw: raw}
}

/**
 * Return the canonical tool name for Harn, OpenAI, or legacy tool-call dicts.
 *
 * @effects: []
 * @allocation: stack-only
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_call_name(call)
 */
pub fn agent_transcript_tool_call_name(call) -> string {
  if type_of(call) != "dict" {
    return ""
  }
  return __first_text([call?.name, call?.function?.name, call?.tool_name])
}

/**
 * Return canonical tool-call arguments as a dict when possible.
 *
 * `arguments` may be an object or a JSON string. Unparseable strings are kept
 * as `{_raw}` so consumers do not silently lose evidence.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_call_args(call)
 */
pub fn agent_transcript_tool_call_args(call) {
  if type_of(call) != "dict" {
    return {}
  }
  if call?.args != nil {
    return call.args
  }
  if call?.arguments != nil {
    return __json_or_raw(call.arguments)
  }
  if call?.function?.arguments != nil {
    return __json_or_raw(call.function.arguments)
  }
  return {}
}

/**
 * Normalize one tool-call dict to `{id, name, args, raw}`.
 *
 * Unknown fields stay available in `raw` for audit/replay consumers.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_call(call)
 */
pub fn agent_transcript_tool_call(call) {
  let name = agent_transcript_tool_call_name(call)
  if name == "" {
    return nil
  }
  return {
    id: __first_text([call?.id, call?.call_id, call?.tool_call_id]),
    name: name,
    args: agent_transcript_tool_call_args(call),
    raw: call,
  }
}

/**
 * Normalize a list of tool-call dicts, dropping only entries without a name.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_calls(calls)
 */
pub fn agent_transcript_tool_calls(calls) -> list {
  var out = []
  if type_of(calls) != "list" {
    return out
  }
  for call in calls {
    let normalized = agent_transcript_tool_call(call)
    if normalized != nil {
      out = out + [normalized]
    }
  }
  return out
}

fn __role(record) -> string {
  return __first_text([record?.role, record?.message?.role])
}

fn __assistant_row(record, index: int, iteration: int) -> dict {
  return {
    schema: TRANSCRIPT_ROW_SCHEMA,
    kind: "assistant",
    role: "assistant",
    iteration: iteration,
    index: index,
    text: agent_transcript_text(record?.text ?? record?.content ?? record?.message?.content),
    tool_calls: agent_transcript_tool_calls(record?.tool_calls ?? record?.message?.tool_calls ?? []),
    provider: record?.provider ?? record?.llm?.provider,
    model: record?.model ?? record?.llm?.model ?? record?.message?.model,
    usage: {
      input_tokens: record?.input_tokens ?? record?.usage?.input_tokens ?? record?.llm?.input_tokens ?? 0,
      output_tokens: record?.output_tokens ?? record?.usage?.output_tokens ?? record?.llm?.output_tokens ?? 0,
      cache_read_tokens: record?.cache_read_tokens ?? record?.usage?.cache_read_tokens ?? 0,
      cache_write_tokens: record?.cache_write_tokens ?? record?.usage?.cache_write_tokens ?? 0,
      response_ms: record?.response_ms ?? record?.latency_ms ?? 0,
    },
    raw_type: record?.type,
    raw: record,
  }
}

fn __user_row(record, index: int, iteration: int) -> dict {
  return {
    schema: TRANSCRIPT_ROW_SCHEMA,
    kind: "user",
    role: "user",
    iteration: iteration,
    index: index,
    text: agent_transcript_text(record?.content ?? record?.message?.content),
    raw_type: record?.type,
    raw: record,
  }
}

fn __tool_result_is_error(text: string) -> bool {
  for pattern in ERROR_MARKERS {
    if regex_match(pattern, text ?? "", "i") != nil {
      return true
    }
  }
  return false
}

fn __tool_result_row(
  name: string,
  text: string,
  call_id: string,
  record,
  index: int,
  iteration: int,
) -> dict {
  return {
    schema: TRANSCRIPT_ROW_SCHEMA,
    kind: "tool_result",
    role: "tool",
    iteration: iteration,
    index: index,
    name: if name == "" {
      "unknown"
    } else {
      name
    },
    tool_call_id: call_id,
    text: text,
    is_error: __tool_result_is_error(text),
    raw_type: record?.type,
    raw: record,
  }
}

fn __tool_message_row(record, index: int, iteration: int) -> dict {
  return __tool_result_row(
    __first_text([record?.name, record?.message?.name, record?.tool_name]),
    agent_transcript_text(record?.content ?? record?.message?.content),
    __first_text([record?.tool_call_id, record?.message?.tool_call_id, record?.id]),
    record,
    index,
    iteration,
  )
}

fn __parse_tool_result_attrs(attrs: string) -> dict {
  let captures = regex_captures("([a-zA-Z_][a-zA-Z0-9_]*)\\s*=\\s*[\"']?([^\"'\\s>]+)", attrs ?? "")
    ?? []
  var out = {}
  for capture in captures {
    let groups = capture?.groups ?? []
    if len(groups) >= 2 {
      out = out + {[groups[0]]: groups[1]}
    }
  }
  return out
}

/**
 * Parse textual `<tool_result ...>...</tool_result>` blocks into rows.
 *
 * This keeps older/cumulative request-envelope transcripts analyzable without
 * requiring downstream consumers to parse rendered transcript text.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_result_blocks(text)
 */
pub fn agent_transcript_tool_result_blocks(text: string) -> list {
  var out = []
  for capture in regex_captures("(?i)<tool_result\\b([^>]*)>([\\s\\S]*?)</tool_result>", text ?? "")
    ?? [] {
    let groups = capture?.groups ?? []
    let attrs = if len(groups) > 0 {
      __parse_tool_result_attrs(groups[0])
    } else {
      {}
    }
    let body = if len(groups) > 1 {
      trim(groups[1])
    } else {
      ""
    }
    out = out
      + [{name: attrs?.name ?? "unknown", tool_call_id: attrs?.id ?? attrs?.tool_call_id ?? "", text: body}]
  }
  return out
}

fn __legacy_request_tool_result_rows(record, index: int, previous_count: int) -> dict {
  var all = []
  for message in record?.messages ?? [] {
    if message?.role == "user" {
      all = all + agent_transcript_tool_result_blocks(agent_transcript_text(message?.content))
    }
  }
  let fresh = if len(all) > previous_count {
    all[previous_count:]
  } else {
    all
  }
  var rows = []
  for result in fresh {
    rows = rows
      + [
      __tool_result_row(
        result?.name ?? "unknown",
        result?.text ?? "",
        result?.tool_call_id ?? "",
        record,
        index,
        max(0, to_int(record?.iteration) ?? 0 - 1),
      ),
    ]
  }
  return {rows: rows, count: len(all)}
}

/**
 * Normalize JSONL transcript records to canonical analysis rows.
 *
 * Accepted inputs include:
 * - modern Harn message rows: `{type:"message", role, message?, content?}`
 * - older scorer rows: `{type:"response", tool_calls, text, ...}`
 * - older request rows with cumulative `<tool_result>` blocks
 * - plain provider/session messages with only `role` and `content`
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_normalize(records)
 */
pub fn agent_transcript_normalize(records) -> list {
  var rows = []
  var next_iteration = 0
  var last_assistant_iteration = -1
  var previous_legacy_result_count = 0
  var index = 0
  for record in records ?? [] {
    let record_type = record?.type
    let role = __role(record)
    if record_type == "response" || role == "assistant" {
      let iteration = if record?.iteration != nil {
        to_int(record.iteration) ?? next_iteration
      } else {
        next_iteration
      }
      rows = rows + [__assistant_row(record, index, iteration)]
      next_iteration = max(next_iteration, iteration + 1)
      last_assistant_iteration = iteration
    } else if role == "tool" || role == "tool_result" {
      rows = rows + [__tool_message_row(record, index, max(0, last_assistant_iteration))]
    } else if record_type == "request" {
      let extracted = __legacy_request_tool_result_rows(record, index, previous_legacy_result_count)
      rows = rows + extracted.rows
      previous_legacy_result_count = extracted.count
    } else if role == "user" {
      rows = rows + [__user_row(record, index, max(0, last_assistant_iteration + 1))]
    }
    index = index + 1
  }
  return rows
}

/**
 * Read a transcript JSONL file and normalize it to canonical rows.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_read(path)
 */
pub fn agent_transcript_read(path: string, options = {}) -> list {
  return agent_transcript_normalize(read_jsonl(path, options ?? {}))
}

/**
 * Return only assistant tool-call rows from a normalized or raw transcript.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_events(records)
 */
pub fn agent_transcript_tool_events(records) -> list {
  let rows = agent_transcript_normalize(records)
  var out = []
  for row in rows {
    if row?.kind != "assistant" {
      continue
    }
    var call_index = 0
    for call in row?.tool_calls ?? [] {
      out = out
        + [
        {
          schema: "harn.agent.transcript.tool_event.v1",
          kind: "tool_call",
          iteration: row.iteration,
          row_index: row.index,
          call_index: call_index,
          id: call?.id ?? "",
          name: call?.name ?? "",
          args: call?.args ?? {},
          call: call,
        },
      ]
      call_index = call_index + 1
    }
  }
  return out
}

/**
 * Return only tool-result rows from a normalized or raw transcript.
 *
 * @effects: []
 * @allocation: heap
 * @errors: []
 * @api_stability: experimental
 * @example: agent_transcript_tool_results(records)
 */
pub fn agent_transcript_tool_results(records) -> list {
  let rows = agent_transcript_normalize(records)
  var out = []
  for row in rows {
    if row?.kind == "tool_result" {
      out = out + [row]
    }
  }
  return out
}