harn-modules 0.7.32

// std/prompt_library — reusable prompt fragments and hotspot consolidation.
//
// Import with: import "std/prompt_library"
import { filter_nil } from "std/collections"
import { euclidean_distance, kmeans } from "std/math"

fn __empty_library() {
  return {_type: "prompt_library", fragments: []}
}

fn __require_library(library, name = "prompt_library") {
  require library != nil && library._type == "prompt_library", name + ": expected a prompt library"
  return library.fragments ?? []
}

fn __estimate_tokens_text(text) {
  return to_int(ceil(len(text) * 1.0 / 4.0))
}

fn __fragment_id_from_path(path) {
  let base = basename(path)
  if ends_with(base, ".harn.prompt") {
    return substring(base, 0, len(base) - len(".harn.prompt"))
  }
  let ext = extname(base)
  if ext != "" {
    return substring(base, 0, len(base) - len(ext))
  }
  return base
}

fn __normalize_words(text) {
  let normalized = regex_replace("[^A-Za-z0-9_./:-]+", " ", lowercase(text))
  return split(trim(normalized), " ").filter({ word -> word != "" })
}

fn __token_prefix(text, max_tokens) {
  let words = __normalize_words(text)
  if max_tokens == nil || max_tokens <= 0 || len(words) <= max_tokens {
    return join(words, " ")
  }
  return join(words[:max_tokens], " ")
}

fn __fragment_from_config(config, base_dir = nil) {
  var body = config?.body ?? config?.prompt ?? config?.text
  var path = config?.path
  if body == nil && path != nil {
    let full_path = if base_dir != nil && !starts_with(path, "/") {
      path_join(base_dir, path)
    } else {
      path
    }
    body = read_file(full_path)
    path = full_path
  }
  require body != nil, "prompt_fragment: fragment requires body, prompt, text, or path"
  let id = config?.id ?? if path != nil {
    __fragment_id_from_path(path)
  } else {
    nil
  }
  require id != nil && id != "", "prompt_fragment: fragment id must be a non-empty string"
  return prompt_fragment(id, body, config + {path: path})
}

fn __front_matter_fragment(path, text) {
  if !starts_with(text, "---\n") {
    return prompt_fragment(__fragment_id_from_path(path), text, {path: path, source_path: path})
  }
  let rest = substring(text, 4)
  let marker = rest.index_of("\n---\n")
  if marker < 0 {
    return prompt_fragment(__fragment_id_from_path(path), text, {path: path, source_path: path})
  }
  let meta_text = substring(rest, 0, marker)
  let body = substring(rest, marker + len("\n---\n"))
  let meta = toml_parse(meta_text)
  return __fragment_from_config(meta + {body: body, path: path, source_path: path}, dirname(path))
}

fn __load_one(path) {
  let text = read_file(path)
  let parsed = try {
    toml_parse(text)
  }
  if is_ok(parsed) {
    let data = unwrap(parsed)
    if data?.prompt_fragments != nil {
      let base_dir = dirname(path)
      var library = __empty_library()
      for fragment_config in data.prompt_fragments {
        library = prompt_library_define(library, __fragment_from_config(fragment_config, base_dir))
      }
      return library
    }
  }
  return prompt_library([__front_matter_fragment(path, text)])
}

fn __render_fragment(fragment, bindings = nil) {
  let body = fragment?.body ?? fragment?.prompt ?? fragment?.text
  require body != nil, "prompt_library_inject: fragment has no body"
  return render_string(body, bindings ?? {})
}

fn __matches_filters(fragment, filters) {
  if filters == nil {
    return true
  }
  if filters?.id != nil && fragment.id != filters.id {
    return false
  }
  if filters?.tenant_id != nil && fragment?.tenant_id != filters.tenant_id {
    return false
  }
  if filters?.provenance != nil && fragment?.provenance != filters.provenance {
    return false
  }
  if filters?.status != nil && fragment?.status != filters.status {
    return false
  }
  if filters?.tag != nil && !(fragment?.tags ?? []).contains(filters.tag) {
    return false
  }
  if filters?.tags != nil {
    for tag in filters.tags {
      if !(fragment?.tags ?? []).contains(tag) {
        return false
      }
    }
  }
  return true
}

fn __score_fragment(fragment, ctx) {
  var score = 0
  let tags = fragment?.tags ?? []
  for tag in ctx?.tags ?? [] {
    if tags.contains(tag) {
      score = score + 10
    }
  }
  let query = trim(lowercase(ctx?.query ?? ctx?.text ?? ""))
  if query != "" {
    let fragment_id = fragment?.id ?? ""
    let fragment_title = fragment?.title ?? ""
    let fragment_body = fragment?.body ?? ""
    let haystack = lowercase(fragment_id + " " + fragment_title + " " + fragment_body)
    for word in __normalize_words(query) {
      if contains(haystack, word) {
        score = score + 1
      }
    }
  }
  return score
}

fn __top_scored(scored, limit) {
  var remaining = scored
  var out = []
  while len(remaining) > 0 && (limit == nil || len(out) < limit) {
    var best_index = 0
    var best_score = remaining[0].score
    var idx = 1
    while idx < len(remaining) {
      if remaining[idx].score > best_score {
        best_index = idx
        best_score = remaining[idx].score
      }
      idx = idx + 1
    }
    out = out.push(remaining[best_index].fragment)
    remaining = remaining[:best_index] + remaining[best_index + 1:]
  }
  return out
}

fn __conversation_text(conversation) {
  if type_of(conversation) == "string" {
    return conversation
  }
  return conversation?.prefix
    ?? conversation?.text
    ?? conversation?.prompt
    ?? conversation?.system
    ?? conversation?.content
    ?? json_stringify(conversation)
}

fn __conversation_id(conversation, index) {
  if type_of(conversation) == "dict" && conversation?.id != nil {
    return conversation.id
  }
  return "conversation-" + to_string(index)
}

fn __conversation_embedding(conversation, text) {
  if type_of(conversation) == "dict" && conversation?.embedding != nil {
    return conversation.embedding
  }
  let words = __normalize_words(text)
  let vocab = [
    "system",
    "developer",
    "tool",
    "agent",
    "context",
    "repo",
    "workspace",
    "rust",
    "test",
    "error",
    "fix",
    "issue",
    "github",
    "docs",
    "api",
    "prompt",
  ]
  var vector = []
  for term in vocab {
    var count = 0
    for word in words {
      if word == term {
        count = count + 1
      }
    }
    vector = vector.push(count * 1.0)
  }
  vector = vector.push(len(words) * 1.0)
  return vector
}

fn __avg_distance(point, points) {
  if len(points) == 0 {
    return 0.0
  }
  var total = 0.0
  for other in points {
    total = total + euclidean_distance(point, other)
  }
  return total / (len(points) * 1.0)
}

fn __silhouette(points, assignments, k) {
  if k <= 1 || len(points) <= 1 {
    return 0.0
  }
  var total = 0.0
  var idx = 0
  while idx < len(points) {
    let cluster = assignments[idx]
    var same = []
    var other_clusters = []
    var c = 0
    while c < k {
      other_clusters = other_clusters.push([])
      c = c + 1
    }
    var j = 0
    while j < len(points) {
      if j != idx {
        if assignments[j] == cluster {
          same = same.push(points[j])
        } else {
          let other = assignments[j]
          other_clusters[other] = other_clusters[other].push(points[j])
        }
      }
      j = j + 1
    }
    let a = __avg_distance(points[idx], same)
    var b = nil
    for group in other_clusters {
      if len(group) > 0 {
        let d = __avg_distance(points[idx], group)
        if b == nil || d < b {
          b = d
        }
      }
    }
    if b != nil {
      let denom = if a > b {
        a
      } else {
        b
      }
      if denom > 0 {
        total = total + (b - a) / denom
      }
    }
    idx = idx + 1
  }
  return total / (len(points) * 1.0)
}

fn __choose_kmeans(points, options) {
  let n = len(points)
  let requested = options?.k
  if requested != nil {
    let result = kmeans(points, requested, {max_iterations: options?.max_iterations ?? 100})
    return {k: requested, result: result, silhouette: __silhouette(points, result.assignments, requested)}
  }
  if n < 4 {
    let result = kmeans(points, 1, {max_iterations: options?.max_iterations ?? 100})
    return {k: 1, result: result, silhouette: 0.0}
  }
  let max_k = if options?.max_k != nil && options.max_k < n {
    options.max_k
  } else {
    n
  }
  var k = 2
  var best = nil
  while k <= max_k && k <= 6 {
    let result = kmeans(points, k, {max_iterations: options?.max_iterations ?? 100})
    let score = __silhouette(points, result.assignments, k)
    if best == nil || score > best.silhouette {
      best = {k: k, result: result, silhouette: score}
    }
    k = k + 1
  }
  return best
}

fn __prefix_count(snippets, prefix) {
  var count = 0
  for snippet in snippets {
    if starts_with(snippet, prefix) {
      count = count + 1
    }
  }
  return count
}

fn __shared_prefix(snippets, min_fraction, min_tokens) {
  var best = {text: "", tokens: 0, support: 0}
  for snippet in snippets {
    let words = split(snippet, " ").filter({ word -> word != "" })
    var n = len(words)
    while n >= min_tokens {
      let prefix = join(words[:n], " ")
      let support = __prefix_count(snippets, prefix)
      if support / (len(snippets) * 1.0) >= min_fraction && n > best.tokens {
        best = {text: prefix, tokens: n, support: support}
        break
      }
      n = n - 1
    }
  }
  return best
}

/** Create a prompt library from a fragment list. */
pub fn prompt_library(fragments = nil) {
  var library = __empty_library()
  for fragment in fragments ?? [] {
    library = prompt_library_define(library, fragment)
  }
  return library
}

/** Normalize one prompt fragment. */
pub fn prompt_fragment(id, body, config = nil) {
  require id != nil && id != "", "prompt_fragment: id must be a non-empty string"
  require body != nil, "prompt_fragment: body must not be nil"
  let provenance = config?.provenance ?? "manual"
  let status = config?.status ?? if provenance == "kmeans" {
    "pending_review"
  } else {
    "accepted"
  }
  return filter_nil(
    {
    id: id,
    title: config?.title ?? id,
    tags: config?.tags ?? [],
    token_budget: config?.token_budget ?? __estimate_tokens_text(body),
    cache_ttl: config?.cache_ttl ?? "5m",
    cache_control: config?.cache_control ?? {type: "ephemeral"},
    body: body,
    path: config?.path,
    source_path: config?.source_path,
    provenance: provenance,
    status: status,
    tenant_id: config?.tenant_id,
    score: config?.score,
    members: config?.members,
    support: config?.support,
    tokens_saved: config?.tokens_saved,
    monthly_savings_usd: config?.monthly_savings_usd,
  },
  )
}

/** Add or replace a fragment by id. */
pub fn prompt_library_define(library, fragment) {
  let fragments = __require_library(library, "prompt_library_define")
  require fragment?.id != nil && fragment.id != "", "prompt_library_define: fragment requires id"
  var out = []
  var replaced = false
  for existing in fragments {
    if existing.id == fragment.id {
      out = out.push(fragment)
      replaced = true
    } else {
      out = out.push(existing)
    }
  }
  if !replaced {
    out = out.push(fragment)
  }
  return {_type: "prompt_library", fragments: out}
}

/** Load fragments from a TOML catalog or `.harn.prompt` fragment file. */
pub fn prompt_library_load(path_or_paths) {
  if type_of(path_or_paths) == "list" {
    var library = __empty_library()
    for path in path_or_paths {
      let loaded = prompt_library_load(path)
      for fragment in loaded.fragments {
        library = prompt_library_define(library, fragment)
      }
    }
    return library
  }
  return __load_one(path_or_paths)
}

/** List fragments, optionally filtered by id, tag/tags, tenant, provenance, or status. */
pub fn prompt_library_list(library, filters = nil) {
  let fragments = __require_library(library, "prompt_library_list")
  return fragments.filter({ fragment -> __matches_filters(fragment, filters) })
}

/** Find one fragment by id, returning nil when it is absent. */
pub fn prompt_library_find(library, id) {
  for fragment in __require_library(library, "prompt_library_find") {
    if fragment.id == id {
      return fragment
    }
  }
  return nil
}

/** Render one fragment to text. */
pub fn prompt_library_inject(library, id, bindings = nil) {
  let fragment = prompt_library_find(library, id)
  require fragment != nil, "prompt_library_inject: unknown fragment '" + id + "'"
  return __render_fragment(fragment, bindings)
}

/** Render one fragment with cache metadata for hosts that consume prompt blocks. */
pub fn prompt_library_payload(library, id, bindings = nil) {
  let fragment = prompt_library_find(library, id)
  require fragment != nil, "prompt_library_payload: unknown fragment '" + id + "'"
  return {
    fragment_id: fragment.id,
    text: __render_fragment(fragment, bindings),
    cache_control: fragment?.cache_control ?? {type: "ephemeral"},
    cache_ttl: fragment?.cache_ttl ?? "5m",
    token_budget: fragment?.token_budget,
    provenance: fragment?.provenance,
  }
}

/** Render all matching fragments until `max_tokens` would be exceeded. */
pub fn prompt_library_inject_cluster(library, filters = nil, bindings = nil) {
  let max_tokens = filters?.max_tokens
  var used = 0
  var blocks = []
  for fragment in prompt_library_list(library, filters) {
    let budget = fragment?.token_budget ?? __estimate_tokens_text(fragment?.body ?? "")
    if max_tokens != nil && used + budget > max_tokens {
      continue
    }
    blocks = blocks.push(__render_fragment(fragment, bindings))
    used = used + budget
  }
  return join(blocks, "\n\n")
}

/** Score and return likely useful fragments for a context. */
pub fn prompt_library_suggest(library, ctx = nil) {
  var scored = []
  for fragment in __require_library(library, "prompt_library_suggest") {
    let score = __score_fragment(fragment, ctx ?? {})
    if score > 0 || (ctx?.query == nil && ctx?.tags == nil) {
      scored = scored.push({score: score, fragment: fragment})
    }
  }
  return __top_scored(scored, ctx?.limit ?? ctx?.top_n ?? 5)
}

/** Return a closure-backed namespace for `library.inject(...)` style calls. */
pub fn prompt_library_api(library) {
  return {
    inject: fn(id, bindings = nil) { return prompt_library_inject(library, id, bindings) },
    inject_cluster: fn(filters = nil, bindings = nil) { return prompt_library_inject_cluster(library, filters, bindings) },
    payload: fn(id, bindings = nil) { return prompt_library_payload(library, id, bindings) },
    suggest: fn(ctx = nil) { return prompt_library_suggest(library, ctx) },
    list: fn(filters = nil) { return prompt_library_list(library, filters) },
    find: fn(id) { return prompt_library_find(library, id) },
  }
}

/** Build k-means prompt-hotspot fragment proposals from tenant-scoped conversations. */
pub fn prompt_library_hotspots(conversations, options = nil) {
  let tenant = options?.tenant_id
  let max_prefix_tokens = options?.max_prefix_tokens ?? 1200
  let min_fraction = options?.min_fraction ?? 0.8
  let min_shared_tokens = options?.min_shared_tokens ?? 8
  let daily_invocations = options?.daily_invocation_count ?? 1
  let dollars_per_token = options?.dollars_per_token ?? 0.0
  let min_monthly_savings = options?.min_monthly_savings_usd ?? 0.0
  var records = []
  var points = []
  var index = 0
  for conversation in conversations {
    if tenant == nil || type_of(conversation) != "dict" || conversation?.tenant_id == tenant {
      let text = __conversation_text(conversation)
      let snippet = __token_prefix(text, max_prefix_tokens)
      records = records
        .push(
        {
        id: __conversation_id(conversation, index),
        tenant_id: if type_of(conversation) == "dict" {
        conversation?.tenant_id
      } else {
        nil
      },
        snippet: snippet,
        embedding: __conversation_embedding(conversation, snippet),
      },
      )
      points = points.push(records[-1].embedding)
    }
    index = index + 1
  }
  if len(records) == 0 {
    return []
  }
  let chosen = __choose_kmeans(points, options ?? {})
  var snippets_by_cluster = []
  var members_by_cluster = []
  var k = 0
  while k < chosen.k {
    snippets_by_cluster = snippets_by_cluster.push([])
    members_by_cluster = members_by_cluster.push([])
    k = k + 1
  }
  var idx = 0
  while idx < len(records) {
    let cluster = chosen.result.assignments[idx]
    snippets_by_cluster[cluster] = snippets_by_cluster[cluster].push(records[idx].snippet)
    members_by_cluster[cluster] = members_by_cluster[cluster].push(records[idx].id)
    idx = idx + 1
  }
  var proposals = []
  var cluster_id = 0
  while cluster_id < len(snippets_by_cluster) {
    let snippets = snippets_by_cluster[cluster_id]
    if len(snippets) > 0 {
      let prefix = __shared_prefix(snippets, min_fraction, min_shared_tokens)
      if prefix.tokens >= min_shared_tokens {
        let tokens_saved = prefix.tokens * (prefix.support - 1)
        let monthly = tokens_saved * daily_invocations * 30.0 * dollars_per_token
        if monthly >= min_monthly_savings {
          let scope = tenant ?? "default"
          let id = "kmeans-" + scope + "-" + substring(sha256(prefix.text), 0, 12)
          proposals = proposals
            .push(
            prompt_fragment(
            id,
            prefix.text,
            {
            title: "K-means hotspot " + to_string(cluster_id),
            tags: options?.tags ?? ["hotspot"],
            token_budget: prefix.tokens,
            provenance: "kmeans",
            tenant_id: tenant,
            status: "pending_review",
            score: chosen.silhouette,
            members: members_by_cluster[cluster_id],
            support: prefix.support,
            tokens_saved: tokens_saved,
            monthly_savings_usd: monthly,
          },
          ),
          )
        }
      }
    }
    cluster_id = cluster_id + 1
  }
  return proposals
}

/** Return pending k-means proposals in the shape expected by review UIs. */
pub fn prompt_library_review_queue(library, filters = nil) {
  var queue = []
  let base_filters = filters ?? {}
  let review_status = filters?.status ?? "pending_review"
  let review_filters = base_filters + {provenance: "kmeans", status: review_status}
  for fragment in prompt_library_list(library, review_filters) {
    queue = queue
      .push(
      {
      id: fragment.id,
      title: fragment.title,
      tenant_id: fragment?.tenant_id,
      text: fragment.body,
      token_budget: fragment?.token_budget,
      support: fragment?.support,
      members: fragment?.members ?? [],
      tokens_saved: fragment?.tokens_saved ?? 0,
      monthly_savings_usd: fragment?.monthly_savings_usd ?? 0.0,
      status: fragment.status,
    },
    )
  }
  return queue
}