ccd-cli 1.0.0-beta.1

Bootstrap and validate Continuous Context Development repositories
//! Stderr-safe rendering for user-controlled identifiers.
//!
//! `actor_id`, `owner_id`, `supervisor_id`, and `session_id` are accepted as
//! unrestricted `String` upstream. When those values are interpolated verbatim
//! into `bail!` / `format!` / `eprintln!` text, a caller can supply newlines
//! or terminal control bytes (including ANSI escapes) and have them echoed
//! into logs, CI captures, and copy-pastable error text.
//!
//! [`display_safe`] wraps an `&str` so that `Display`-formatting it emits
//! control bytes, newlines, tabs, DEL, and backslash in escaped form — without
//! adding surrounding quotes. Current error sites wrap identifiers in
//! backticks for the operator's eye (`` `{actor}` ``); the no-quotes rule
//! keeps that convention intact.
//!
//! JSON field values are not routed through this helper: `--output json` is
//! already escaped by serde.

use std::fmt;

/// Wrap `s` so `Display` renders it with control bytes escaped.
///
/// See the module-level docs for why this uses escape-without-quoting rather
/// than `{:?}`.
pub(crate) fn display_safe(s: &str) -> DisplaySafe<'_> {
    DisplaySafe(s)
}

/// Display wrapper returned by [`display_safe`].
pub(crate) struct DisplaySafe<'a>(&'a str);

impl fmt::Display for DisplaySafe<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        for c in self.0.chars() {
            match c {
                '\n' => f.write_str(r"\n")?,
                '\r' => f.write_str(r"\r")?,
                '\t' => f.write_str(r"\t")?,
                '\\' => f.write_str(r"\\")?,
                // All other C0 control bytes plus DEL render as `\xNN`.
                // ESC (0x1b) is included here — it is the lead byte of every
                // ANSI escape sequence, so a malicious id containing
                // `\x1b[2J\x1b[H` cannot clear the terminal mid-log.
                c if (c as u32) < 0x20 || c == '\x7f' => write!(f, "\\x{:02x}", c as u32)?,
                // C1 controls (0x80..=0x9f) include NEL (U+0085), which
                // several terminals and log parsers treat as a line
                // break. The Unicode line separators U+2028 and
                // U+2029 have the same effect in tools that normalize
                // UTF-8 line breaks. Escape all of them as `\u{NNNN}`
                // so a UTF-8 id cannot inject a log/terminal break
                // that the ASCII-only C0 check would miss.
                c if matches!(c as u32, 0x80..=0x9f) || c == '\u{2028}' || c == '\u{2029}' => {
                    write!(f, "\\u{{{:04x}}}", c as u32)?
                }
                c => f.write_fmt(format_args!("{c}"))?,
            }
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::display_safe;

    #[test]
    fn plain_ascii_is_unchanged() {
        assert_eq!(format!("{}", display_safe("alice")), "alice");
    }

    #[test]
    fn newline_is_escaped() {
        let rendered = format!("{}", display_safe("foo\nbar"));
        assert_eq!(rendered, r"foo\nbar");
        assert!(!rendered.contains('\n'));
    }

    #[test]
    fn carriage_return_and_tab_are_escaped() {
        assert_eq!(format!("{}", display_safe("a\rb\tc")), r"a\rb\tc");
    }

    #[test]
    fn backslash_is_escaped_so_round_trip_does_not_re_interpret() {
        // Without escaping `\\`, a payload containing `\n` (two literal chars)
        // would render indistinguishably from a real newline after escaping.
        assert_eq!(format!("{}", display_safe(r"raw\n")), r"raw\\n");
    }

    #[test]
    fn ansi_escape_sequence_cannot_clear_terminal() {
        let hostile = "\x1b[2J\x1b[Halice";
        let rendered = format!("{}", display_safe(hostile));
        assert!(!rendered.contains('\x1b'));
        assert!(rendered.contains(r"\x1b"));
        assert!(rendered.ends_with("alice"));
    }

    #[test]
    fn del_and_nul_are_escaped() {
        assert_eq!(format!("{}", display_safe("\x7f")), r"\x7f");
        assert_eq!(format!("{}", display_safe("\0")), r"\x00");
    }

    #[test]
    fn unicode_identifiers_render_as_is() {
        // Non-ASCII graphical characters are not control bytes and should
        // render unchanged; escaping them would break legitimate identifiers.
        assert_eq!(format!("{}", display_safe("用户-α")), "用户-α");
    }

    /// Codex review follow-up: C1 controls (NEL, U+0085) and the Unicode
    /// line separators (U+2028, U+2029) can split logs the same way
    /// `\n` does; the ASCII-only C0 check missed them. These must
    /// escape as `\u{NNNN}` so attacker-controlled UTF-8 ids cannot
    /// inject breaks into stderr or CI capture.
    #[test]
    fn c1_controls_and_unicode_line_separators_are_escaped() {
        let rendered = format!("{}", display_safe("a\u{0085}b\u{2028}c\u{2029}d"));
        assert!(!rendered.contains('\u{0085}'));
        assert!(!rendered.contains('\u{2028}'));
        assert!(!rendered.contains('\u{2029}'));
        assert_eq!(rendered, r"a\u{0085}b\u{2028}c\u{2029}d");
    }

    #[test]
    fn other_c1_controls_are_escaped() {
        // CSI (U+009B) is a single-byte equivalent of ESC '[' in C1
        // terminals and can drive ANSI control even when bare ESC is
        // filtered out. All of 0x80..=0x9f must escape.
        let rendered = format!("{}", display_safe("\u{009b}[2J"));
        assert!(!rendered.contains('\u{009b}'));
        assert_eq!(rendered, r"\u{009b}[2J");
    }
}