// std/text — Text processing utilities for LLM output and code analysis.
// Convert an integer-compatible value into a decimal string with explicit
// stdlib import ergonomics for Harn-authored libraries.
/**
* int_to_string.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: int_to_string(value)
*/
pub fn int_to_string(value) {
let parsed = to_int(value)
require parsed != nil, "int_to_string expects an integer-compatible value"
return to_string(parsed)
}
// Convert a float-compatible value into a string.
/**
* float_to_string.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: float_to_string(value)
*/
pub fn float_to_string(value) {
let parsed = to_float(value)
require parsed != nil, "float_to_string expects a float-compatible value"
return to_string(parsed)
}
// Parse an integer-like value, returning fallback when conversion fails.
/**
* parse_int_or.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: parse_int_or(value, fallback)
*/
pub fn parse_int_or(value, fallback) {
return to_int(value) ?? fallback
}
// Parse a float-like value, returning fallback when conversion fails.
/**
* parse_float_or.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: parse_float_or(value, fallback)
*/
pub fn parse_float_or(value, fallback) {
return to_float(value) ?? fallback
}
// Extract file paths from text.
// Splits on newlines, skips comment lines, extracts path-like words,
// validates extensions, and deduplicates.
/**
* extract_paths.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: extract_paths(text)
*/
pub fn extract_paths(text) {
let lines = split(text, "\n")
.filter(
{ line ->
let t = trim(line)
return contains(t, "/")
&& !starts_with(t, "//")
&& !starts_with(t, "#")
},
)
var seen = []
return lines
.flat_map(
{ line -> return split(trim(line), " ")
.map(
{ word ->
var clean = regex_replace("^[\"'`,;:()\\[\\]{}><]+|[\"'`,;:()\\[\\]{}><]+$", "", trim(word))
while ends_with(clean, ".") || ends_with(clean, ",") || ends_with(clean, ":")
|| ends_with(clean, ";") {
clean = substring(clean, 0, len(clean) - 1)
}
return clean
},
)
.filter(
{ clean ->
if !contains(clean, "/") && !contains(clean, ".") {
return false
}
let ext = extname(clean)
return ext != "" && len(ext) <= 11
},
) },
)
.filter(
{ p ->
if seen.contains(p) {
return false
}
seen = seen + [p]
return true
},
).sort()
}
// Parse fenced code blocks from LLM response text.
// Returns list of {type: "code"|"call", lang: string|nil, code: string}.
// State machine — not easily expressed as pure map/filter.
/**
* parse_cells.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: parse_cells(response)
*/
pub fn parse_cells(response) {
let lines = split(response, "\n")
var cells = []
var in_block = false
var block_type = "code"
var block_lang = nil
var current_lines = []
var trimmed = ""
var lang_part = ""
for line in lines {
trimmed = trim(line)
if starts_with(trimmed, "```") && in_block {
cells = cells + [{type: block_type, lang: block_lang, code: join(current_lines, "\n")}]
in_block = false
current_lines = []
} else if starts_with(trimmed, "```") && !in_block {
lang_part = trim(substring(trimmed, 3))
if lang_part == "call" || starts_with(lang_part, "call ") {
block_type = "call"
if starts_with(lang_part, "call ") {
block_lang = trim(substring(lang_part, 5))
} else {
block_lang = nil
}
} else {
block_type = "code"
if lang_part == "" {
block_lang = nil
} else {
block_lang = lang_part
}
}
in_block = true
current_lines = []
} else if in_block {
current_lines = current_lines + [line]
}
}
return cells
}
// Filter parsed cells to keep test-relevant ones.
// Keeps type="code" cells and type="call" cells containing "write_file".
// If target_file is provided, only write_file calls mentioning that file are kept.
/**
* filter_test_cells.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: filter_test_cells(cells, target_file)
*/
pub fn filter_test_cells(cells, target_file) {
return cells
.filter(
{ cell ->
if cell.type == "call" && contains(cell.code, "write_file") {
if target_file != nil {
return contains(cell.code, target_file)
}
return true
}
return cell.type == "code"
},
)
}
// Keep first n and last n lines of text, with an omission marker in between.
/**
* truncate_head_tail.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: truncate_head_tail(text, n)
*/
pub fn truncate_head_tail(text, n) {
let lines = split(text, "\n")
if len(lines) <= n * 2 {
return text
}
let skipped = len(lines) - n * 2
return join(lines[:n], "\n")
+ "\n... ("
+ to_string(skipped)
+ " lines omitted) ...\n"
+ join(lines[-n:], "\n")
}
/**
* Truncate text to `max_chars`, appending a deterministic marker when clipped.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: truncate_text(text, max_chars, marker)
*/
pub fn truncate_text(text, max_chars = 4000, marker = nil) {
let value = text ?? ""
let limit = max_chars ?? 4000
if limit <= 0 || len(value) <= limit {
return value
}
let suffix = marker ?? ("\n... truncated " + to_string(len(value) - limit) + " chars ...")
return substring(value, 0, limit) + suffix
}
/**
* Keep both ends of long text, with a marker in the middle.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: truncate_middle(text, max_chars, marker)
*/
pub fn truncate_middle(text, max_chars = 4000, marker = "...") {
let value = text ?? ""
let limit = max_chars ?? 4000
if limit <= 0 || len(value) <= limit {
return value
}
let suffix = marker ?? "..."
if limit <= len(suffix) {
return substring(suffix, 0, limit)
}
let keep = limit - len(suffix)
// Bias the head larger when `keep` is odd. Plain `ceil(keep / 2)` would
// be a no-op here because integer `/` already truncates, so the head
// landed smaller than the tail — the opposite of the intent.
let head = (keep + 1) / 2
let tail = keep - head
return substring(value, 0, head) + suffix + substring(value, len(value) - tail)
}
/**
* Collapse whitespace for one-line report fields, returning fallback for blank input.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: single_line_or(value, fallback)
*/
pub fn single_line_or(value, fallback = "") {
let text = trim(value ?? "")
if text == "" {
return fallback
}
return regex_replace_all("\\s+", " ", text)
}
/**
* Prefix each line in text.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: prefix_lines(text, prefix)
*/
pub fn prefix_lines(text, prefix = " ") {
let p = prefix ?? ""
return split(text ?? "", "\n").map({ line -> p + line }).join("\n")
}
/**
* Indent text by `spaces` spaces.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: indent(text, spaces)
*/
pub fn indent(text, spaces = 2) {
return prefix_lines(text, " ".repeat(max(spaces ?? 0, 0)))
}
// Check if compiler/build output contains compile error indicators.
// Case-sensitive match; recognises Python, Rust, Go, and generic patterns.
/**
* detect_compile_error.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: detect_compile_error(output)
*/
pub fn detect_compile_error(output) {
return contains(output, "SyntaxError")
|| contains(output, "IndentationError")
|| contains(output, "ImportError")
|| contains(output, "ModuleNotFoundError")
|| contains(output, "compile error")
|| contains(output, "cannot find")
}
// Check if test output contains both got/actual AND want/expected indicators.
// Returns true only if BOTH sides are present.
/**
* has_got_want.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: has_got_want(output)
*/
pub fn has_got_want(output) {
let has_got = contains(output, "got:")
|| contains(output, "Got:")
|| contains(output, "actual:")
|| contains(output, "Actual:")
let has_want = contains(output, "want:")
|| contains(output, "Want:")
|| contains(output, "expected:")
|| contains(output, "Expected:")
return has_got && has_want
}
// Extract error-relevant lines from test output.
// Filters for lines containing error keywords, returns first 20 joined with newlines.
/**
* format_test_errors.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: format_test_errors(output)
*/
pub fn format_test_errors(output) {
let error_lines = split(output, "\n")
.filter(
{ line -> return contains(line, "FAIL")
|| contains(line, "Error")
|| contains(line, "error")
|| contains(line, "AssertionError")
|| contains(line, "assert") },
)
if len(error_lines) > 20 {
return join(error_lines[:20], "\n")
}
return join(error_lines, "\n")
}
/**
* Right-pad `value` with `fill` until the rendered length reaches `width`.
* Values already at or beyond `width` are returned unchanged. `fill`
* defaults to a single space; multi-character fills use the first
* grapheme. Coerces `value` to a string, so callers don't need to wrap
* numbers or nil with `to_string`.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: pad_right("api", 6)
*/
pub fn pad_right(value, width: int, fill: string = " ") -> string {
return to_string(value ?? "").pad_right(width, fill ?? " ")
}
/**
* Left-pad `value` with `fill` until the rendered length reaches `width`.
* The common cases are zero-padded numeric sort keys (`pad_left(rev, 18,
* "0")`) and right-aligned table cells. Coerces `value` to a string.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: pad_left(7, 4, "0")
*/
pub fn pad_left(value, width: int, fill: string = " ") -> string {
return to_string(value ?? "").pad_left(width, fill ?? " ")
}
/**
* Repeat `text` `count` times. Negative or zero counts return the empty
* string. Useful for table separators (`repeat_string("-", width)`) where
* the call site is clearer than `" ".repeat(width)` or a `while` loop.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: repeat_string("-", 5)
*/
pub fn repeat_string(text, count: int) -> string {
if count == nil || count <= 0 {
return ""
}
return to_string(text ?? "").repeat(count)
}
/**
* Return the first capture group of the first match of `pattern` against
* `text`, or `nil` when the pattern doesn't match or has no groups. This
* is the most common regex shape in reporting and parsing scripts; it
* replaces the four-line `regex_captures + len-check + group-index`
* dance per call site.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: regex_first_capture("v(\\d+\\.\\d+\\.\\d+)", tag)
*/
pub fn regex_first_capture(pattern: string, text) {
let captures = regex_captures(pattern, to_string(text ?? "")) ?? []
if len(captures) == 0 {
return nil
}
let groups = captures[0]?.groups ?? []
if len(groups) == 0 {
return nil
}
return groups[0]
}
/**
* Return every capture group of the first match of `pattern` against
* `text`. Returns the empty list when the pattern doesn't match or has
* no groups. Useful when several groups need to be destructured at once.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: regex_capture_groups("(\\d+)\\.(\\d+)\\.(\\d+)", "1.2.3")
*/
pub fn regex_capture_groups(pattern: string, text) -> list {
let captures = regex_captures(pattern, to_string(text ?? "")) ?? []
if len(captures) == 0 {
return []
}
return captures[0]?.groups ?? []
}
/**
* Return the first capture group of every match of `pattern` against
* `text`. Returns the empty list when the pattern doesn't match. Common
* for sweep-and-extract patterns like pulling every `#NNNN` issue
* reference out of a changelog.
*
* @effects: []
* @allocation: heap
* @errors: []
* @api_stability: stable
* @example: regex_all_first_captures("#(\\d+)", "fixes #12 and #34")
*/
pub fn regex_all_first_captures(pattern: string, text) -> list {
var out = []
for capture in regex_captures(pattern, to_string(text ?? "")) ?? [] {
let groups = capture?.groups ?? []
if len(groups) > 0 {
out = out + [groups[0]]
}
}
return out
}