dirge-agent 0.7.3

Minimalistic coding agent written in Rust, optimized for memory footprint and performance
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
//! Central soft-wrap helper for any long text the UI prints.
//!
//! Single chokepoint so every render path (question prompts, chamber
//! rows, free-form messages) shares the same wrap policy: word-aware
//! when whitespace is available, character-fallback for unbreakable
//! runs (URLs, paths, code), display-width-aware (CJK / emoji), and
//! continuation-indent support so wrapped option text lines up under
//! its first character.
//!
//! Policy:
//!   - Width is measured with `UnicodeWidthStr` so wide glyphs count
//!     correctly; the result is suitable for fixed-column layouts
//!     like chamber rows or aligned bullet lists.
//!   - Hard newlines in the input are preserved as line breaks.
//!   - Continuation lines (every wrapped line after the first) are
//!     prefixed with `continuation_indent` so a wrapped option's
//!     extra lines visually align under the option's body.
//!   - When a single word is longer than the width budget, break it
//!     at the width boundary (display-width aware) rather than
//!     overflowing.

use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

/// Display width of `s` with ANSI SGR escape sequences treated as
/// zero-width. The terminal interprets `\x1b[…m` as state changes
/// (color / bold / underline) without emitting visible characters,
/// so they must not count toward wrap budgets or chamber padding.
/// `UnicodeWidthStr::width` alone counts `[31m` as 4 visible cells
/// because the ESC byte gets width 0 but the bracketed payload is
/// ordinary ASCII — that miscount caused chamber rows with embedded
/// SGR (diff backgrounds, syntax highlighting) to wrap or pad wrong.
pub fn visible_width(s: &str) -> usize {
    let bytes = s.as_bytes();
    let mut total = 0usize;
    let mut i = 0;
    while i < bytes.len() {
        if bytes[i] == 0x1b && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
            // CSI sequence. Skip to a final byte (0x40..=0x7E).
            let mut j = i + 2;
            while j < bytes.len() && !(0x40..=0x7e).contains(&bytes[j]) {
                j += 1;
            }
            i = j.saturating_add(1).min(bytes.len());
            continue;
        }
        let step = match bytes[i] {
            b if b < 0x80 => 1,
            b if b < 0xC0 => 1,
            b if b < 0xE0 => 2,
            b if b < 0xF0 => 3,
            _ => 4,
        };
        let end = (i + step).min(bytes.len());
        if let Some(ch) = s[i..end].chars().next() {
            total += ch.width().unwrap_or(0);
        }
        i = end;
    }
    total
}

/// Soft-wrap `text` to `max_width` columns. Returns one entry per
/// visual line. Hard newlines in the input become hard line breaks
/// in the output.
///
/// `continuation_indent` is prepended to every line after the first
/// of each logical line; pass `""` for no indent. The indent's own
/// width counts against `max_width`, so the wrapping accounts for
/// it. Passing an indent wider than `max_width` is treated as no
/// indent (degenerate config).
pub fn soft_wrap(text: &str, max_width: usize, continuation_indent: &str) -> Vec<String> {
    if max_width == 0 {
        return text.lines().map(|l| l.to_string()).collect();
    }
    // Wide-glyph floor: a CJK char or emoji has display width 2.
    // With `max_width == 1`, `break_long_token` can't place such a
    // glyph anywhere without overflowing the budget — the row-full
    // flush fires but the glyph is still pushed onto the
    // freshly-emptied row, producing a width-2 row on a width-1
    // budget. Bump the effective budget to 2 so wide glyphs always
    // have a home. Callers that genuinely want width 1 lose nothing
    // — a 1-cell terminal is unusable anyway.
    let max_width = max_width.max(2);
    let cont_w = UnicodeWidthStr::width(continuation_indent);
    let effective_indent = if cont_w >= max_width {
        ""
    } else {
        continuation_indent
    };
    let cont_w = UnicodeWidthStr::width(effective_indent);

    let mut out: Vec<String> = Vec::new();
    for raw_logical in text.split('\n') {
        // Strip a trailing `\r` so CRLF tool output (Windows logs,
        // some MCP servers) doesn't leak a carriage return into the
        // line buffer. A bare `\r` mid-line interpreted by the
        // terminal redraws the row from column 0, producing visible
        // overwrite artifacts; the selection char-offset math also
        // miscounts (`\r` counts as a char but draws 0 cells).
        let logical = raw_logical.strip_suffix('\r').unwrap_or(raw_logical);
        if logical.is_empty() {
            out.push(String::new());
            continue;
        }
        wrap_logical_line(logical, max_width, effective_indent, cont_w, &mut out);
    }
    out
}

fn wrap_logical_line(
    line: &str,
    max_width: usize,
    cont_indent: &str,
    cont_w: usize,
    out: &mut Vec<String>,
) {
    // Width budget on each output row depends on whether it's the
    // first row of this logical line (no indent) or a continuation
    // (indent counted).
    let mut current = String::new();
    let mut current_w = 0usize;
    let mut is_first_row = true;

    let push_row = |out: &mut Vec<String>, current: &mut String, is_first: &mut bool| {
        if *is_first {
            out.push(std::mem::take(current));
            *is_first = false;
        } else {
            let mut s = String::with_capacity(cont_indent.len() + current.len());
            s.push_str(cont_indent);
            s.push_str(current);
            out.push(s);
            current.clear();
        }
    };

    // Tokenize on whitespace runs. Each token carries its leading
    // whitespace (if any) so we can decide whether to break on the
    // space when the token is the first of a new row.
    let tokens = tokenize(line);
    for token in tokens {
        let budget = if is_first_row {
            max_width
        } else {
            max_width.saturating_sub(cont_w)
        };

        // ANSI-aware widths: `visible_width` skips `\x1b[…m` SGR
        // sequences so a token like `\x1b[48;5;52m-text\x1b[49m`
        // contributes only its visible character cells to the wrap
        // budget. Chamber rows that embed bg-color escapes were
        // being over-counted by 15+ cells per row and falsely
        // wrapped (which broke the escape from its closer, leaving
        // literal `[48;5;52m` text visible on the next row).
        let tok_w = visible_width(token.text);
        let ws_w = visible_width(token.leading_ws);

        // Start of a new row. Preserve leading whitespace on the
        // FIRST row of a logical line (callers commonly indent their
        // text by prepending spaces — option markers, confirmation
        // lines — and dropping that indent silently changes the
        // visual layout). On CONTINUATION rows (`!is_first_row`),
        // the cont_indent already supplies the leading visual
        // indent; an additional token-level leading_ws would
        // double-indent, so drop it there.
        if current.is_empty() {
            // Carry leading whitespace only on the first row of the
            // logical line. Empty-text tokens (trailing-only or
            // all-whitespace lines) ALSO go through here — we want
            // the ws preserved so an indented blank-padded line
            // doesn't collapse to an empty row (review #8).
            if is_first_row {
                if ws_w + tok_w <= budget {
                    current.push_str(token.leading_ws);
                    current.push_str(token.text);
                    current_w = ws_w + tok_w;
                } else if tok_w <= budget {
                    // ws_w pushed token over the budget; drop ws to
                    // preserve the token rather than break-and-lose.
                    current.push_str(token.text);
                    current_w = tok_w;
                } else {
                    break_long_token(
                        token.text,
                        budget,
                        max_width.saturating_sub(cont_w).max(1),
                        &mut current,
                        &mut current_w,
                        out,
                        cont_indent,
                        &mut is_first_row,
                    );
                }
            } else if tok_w <= budget {
                current.push_str(token.text);
                current_w = tok_w;
            } else {
                break_long_token(
                    token.text,
                    budget,
                    max_width.saturating_sub(cont_w).max(1),
                    &mut current,
                    &mut current_w,
                    out,
                    cont_indent,
                    &mut is_first_row,
                );
            }
            continue;
        }

        // Does the token (with its leading whitespace) fit on the
        // current row?
        if current_w + ws_w + tok_w <= budget {
            current.push_str(token.leading_ws);
            current.push_str(token.text);
            current_w += ws_w + tok_w;
        } else {
            // Flush the row, start a new continuation row.
            push_row(out, &mut current, &mut is_first_row);
            current_w = 0;
            let new_budget = max_width.saturating_sub(cont_w).max(1);
            if tok_w <= new_budget {
                current.push_str(token.text);
                current_w = tok_w;
            } else {
                break_long_token(
                    token.text,
                    new_budget,
                    new_budget,
                    &mut current,
                    &mut current_w,
                    out,
                    cont_indent,
                    &mut is_first_row,
                );
            }
        }
    }

    if !current.is_empty() || out.is_empty() {
        push_row(out, &mut current, &mut is_first_row);
    }
}

struct Token<'a> {
    leading_ws: &'a str,
    text: &'a str,
}

fn tokenize(line: &str) -> Vec<Token<'_>> {
    let mut tokens = Vec::new();
    let bytes = line.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        let ws_start = i;
        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
            i += 1;
        }
        let ws_end = i;
        let word_start = i;
        while i < bytes.len() && bytes[i] != b' ' && bytes[i] != b'\t' {
            // Advance by char boundary; bytes might be multi-byte.
            let ch_len = utf8_char_len(bytes[i]);
            i += ch_len;
            if i > bytes.len() {
                i = bytes.len();
            }
        }
        if word_start < bytes.len() {
            tokens.push(Token {
                leading_ws: &line[ws_start..ws_end],
                text: &line[word_start..i],
            });
        } else if ws_start < ws_end {
            // Trailing whitespace (no word follows). Treat as an
            // empty-text token so the indent isn't lost on the row.
            tokens.push(Token {
                leading_ws: &line[ws_start..ws_end],
                text: "",
            });
        }
    }
    tokens
}

fn utf8_char_len(first_byte: u8) -> usize {
    if first_byte < 0xC0 {
        // ASCII (<0x80), or a stray continuation byte (shouldn't
        // appear in a valid str): advance by 1 in both cases.
        1
    } else if first_byte < 0xE0 {
        2
    } else if first_byte < 0xF0 {
        3
    } else {
        4
    }
}

/// Break a single token wider than the row budget across rows.
/// Walks chars summing display widths so a multi-cell glyph never
/// overflows. The first row uses `first_budget`, every subsequent
/// row uses `continuation_budget` (which already excludes the
/// continuation indent width).
///
/// ANSI-aware: when we see an `\x1b[…m` SGR sequence, the whole
/// sequence is appended to `current` as zero-width content. This
/// keeps escape sequences atomic — a wrap mid-escape would leave
/// the terminal with a broken open SGR and the closing payload
/// would render as literal text on the next row.
#[allow(clippy::too_many_arguments)]
fn break_long_token(
    token: &str,
    first_budget: usize,
    continuation_budget: usize,
    current: &mut String,
    current_w: &mut usize,
    out: &mut Vec<String>,
    cont_indent: &str,
    is_first_row: &mut bool,
) {
    let bytes = token.as_bytes();
    let mut remaining_budget = first_budget;
    let mut i = 0;
    while i < bytes.len() {
        // Atomic ANSI SGR copy: ESC `[` … final-byte.
        if bytes[i] == 0x1b && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
            let mut j = i + 2;
            while j < bytes.len() && !(0x40..=0x7e).contains(&bytes[j]) {
                j += 1;
            }
            let end = j.saturating_add(1).min(bytes.len());
            current.push_str(&token[i..end]);
            i = end;
            continue;
        }
        let ch = token[i..].chars().next().unwrap_or('\u{FFFD}');
        let cw = ch.width().unwrap_or(0);
        if cw > remaining_budget {
            if *is_first_row {
                out.push(std::mem::take(current));
                *is_first_row = false;
            } else {
                let mut s = String::with_capacity(cont_indent.len() + current.len());
                s.push_str(cont_indent);
                s.push_str(current);
                out.push(s);
                current.clear();
            }
            *current_w = 0;
            remaining_budget = continuation_budget;
        }
        current.push(ch);
        *current_w += cw;
        remaining_budget = remaining_budget.saturating_sub(cw);
        i += ch.len_utf8();
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn short_line_returns_unchanged() {
        let out = soft_wrap("hello world", 80, "");
        assert_eq!(out, vec!["hello world"]);
    }

    #[test]
    fn wraps_on_word_boundary_not_midword() {
        let out = soft_wrap("the quick brown fox jumps", 12, "");
        // Each line must NOT split mid-word.
        for line in &out {
            for word in line.split_whitespace() {
                assert!(word.len() <= 12, "word {word:?} fits its row");
            }
        }
        // First row should be "the quick" (9 chars), not "the quick br".
        assert_eq!(out[0], "the quick");
    }

    #[test]
    fn preserves_hard_newlines() {
        let out = soft_wrap("line one\nline two", 80, "");
        assert_eq!(out, vec!["line one", "line two"]);
    }

    #[test]
    fn applies_continuation_indent() {
        let out = soft_wrap("aaa bbb ccc ddd", 7, "  ");
        // First row no indent; subsequent rows get "  " prefix.
        assert_eq!(out[0], "aaa bbb");
        for line in &out[1..] {
            assert!(line.starts_with("  "));
        }
    }

    #[test]
    fn hard_breaks_unbreakable_long_token() {
        let out = soft_wrap("aaaaaaaaaaaaaaaaaa", 5, "");
        // 18-char token across 5-wide rows = 4 rows.
        assert_eq!(out.len(), 4);
        assert!(out.iter().all(|l| l.len() <= 5));
    }

    #[test]
    fn empty_input_returns_one_empty_row() {
        let out = soft_wrap("", 80, "");
        assert_eq!(out, vec![""]);
    }

    #[test]
    fn zero_width_returns_unwrapped_lines() {
        let out = soft_wrap("anything goes", 0, "");
        assert_eq!(out, vec!["anything goes"]);
    }

    /// CJK glyphs are width-2. A 6-cell budget fits 3 CJK chars, not 6.
    #[test]
    fn respects_display_width_for_cjk() {
        let out = soft_wrap("中文测试abc", 6, "");
        // First row: at most 3 CJK chars (6 cells), or 2 CJK + " " + "ab".
        // What we care about: no row's display width exceeds 6.
        for line in &out {
            assert!(
                UnicodeWidthStr::width(line.as_str()) <= 6,
                "row {line:?} width = {} <= 6",
                UnicodeWidthStr::width(line.as_str()),
            );
        }
    }

    /// Pathological indent wider than budget should fall back to no
    /// indent rather than spinning or panicking.
    #[test]
    fn indent_wider_than_width_degrades_gracefully() {
        let out = soft_wrap("aaa bbb ccc", 4, "            ");
        // Should still produce output and not panic.
        assert!(!out.is_empty());
    }

    /// Review #1: leading whitespace on the FIRST row must be
    /// preserved. Option-line rendering relies on `"  ▶ label"`
    /// keeping its `  ` margin; dropping it silently flattens the
    /// visual layout.
    #[test]
    fn preserves_leading_whitespace_on_first_row() {
        let out = soft_wrap("  ▶ hello world", 80, "");
        assert_eq!(out[0], "  ▶ hello world");
    }

    /// Review #4: a CRLF logical line yields the same wrap result
    /// as the LF variant; no `\r` in the output.
    #[test]
    fn strips_carriage_returns_from_crlf_input() {
        let out_lf = soft_wrap("first\nsecond", 80, "");
        let out_crlf = soft_wrap("first\r\nsecond", 80, "");
        assert_eq!(out_lf, out_crlf);
        for row in &out_crlf {
            assert!(!row.contains('\r'), "row {row:?} should have no CR");
        }
    }

    /// Review #7: documented floor on max_width — wide glyph must
    /// not produce a row exceeding the budget.
    #[test]
    fn wide_glyph_respects_max_width_at_floor() {
        let out = soft_wrap("中文", 1, "");
        for line in &out {
            assert!(
                UnicodeWidthStr::width(line.as_str()) <= 2,
                "row {line:?} should not exceed effective floor of 2"
            );
        }
    }

    /// Review #8: an indented blank-padded logical line keeps its
    /// indentation. `"  "` (two spaces, no word) should NOT collapse
    /// to an empty row when it's the FIRST row.
    #[test]
    fn preserves_leading_whitespace_only_line() {
        let out = soft_wrap("  ", 80, "");
        assert_eq!(out[0], "  ");
    }

    /// User bug: chamber_row_with_bg embeds `\x1b[48;5;52m…\x1b[49m`.
    /// `UnicodeWidthStr::width` counts the bracketed payload as
    /// visible cells, so soft_wrap previously thought the row was
    /// 15+ cells wider than max_width and split it — breaking the
    /// escape from its closer and leaving literal `[49m` text on
    /// the next row.
    #[test]
    fn visible_width_skips_ansi_sgr() {
        // Plain text reference.
        let plain = "-    // ng_max = ceil(32 / 8) = 4";
        // Same content wrapped in bg-color SGR + reset.
        let styled = format!("\x1b[48;5;52m{}\x1b[49m", plain);
        let pw = visible_width(plain);
        let sw = visible_width(&styled);
        assert_eq!(pw, sw, "SGR escapes must not contribute to width");
    }

    /// Soft-wrap a chamber-row-shaped string that's already padded
    /// to exactly `max_width` cells of VISIBLE content but carries
    /// 15+ chars of SGR escapes. It must NOT split into two rows.
    #[test]
    fn ansi_padded_row_does_not_overwrap() {
        let inner = 60;
        // Visible width: 1 (`-`) + 4 (pad) + visible text + trailing
        // pad = exactly `inner`. Wrap into 60-wide rows.
        let visible_content: String = format!("-text{}", " ".repeat(inner - 5));
        assert_eq!(visible_width(&visible_content), inner);
        let row = format!("\x1b[48;5;52m{}\x1b[49m │", visible_content);
        // The full chamber row is `inner + 4` cells wide (visible).
        let out = soft_wrap(&row, inner + 4, "");
        assert_eq!(
            out.len(),
            1,
            "row with embedded SGR must not split: got {} rows",
            out.len()
        );
    }
}