lifeloop-cli 0.1.0

Provider-neutral lifecycle abstraction and normalizer for AI harnesses
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
//! Kernel-purity gate (issue #24).
//!
//! Lifeloop's product boundary is "lifecycle reach is neutral; client
//! semantics live above". This integration test scans core source
//! files (everything under `src/` except an explicit allowlist of
//! compat zones) for tokens that would imply client-owned product
//! semantics have leaked back into the kernel. It complements
//! `tests/spec_alignment.rs` (structural drift between code and spec)
//! and the wire-contract tests (wire shape stability).
//!
//! The spec body's "Conformance Expectations" section names this gate
//! by path:
//! `tests/kernel_purity.rs::lifeloop_static_boundary_proof_keeps_client_vocabulary_out`.
//!
//! # Forbidden tokens
//!
//! Word-boundary case-insensitive matches against the following names
//! anywhere in core code (rustdoc included). Each token is a client
//! product or memory/recall vocabulary that Lifeloop must not name
//! beyond compat zones:
//!
//! * `ccd` — the first client; allowed only in compat zones.
//! * `rlm` — recursive-language-model client.
//! * `reforge` — memory-family client.
//! * `fixity` — repeated-signal-capture client.
//! * `continuity` — a CCD product term.
//! * `memory` — a Reforge/CCD product term. Lifeloop transports
//!   payloads, not "memory". Implementation idioms like
//!   "in-memory" / "in memory" are excluded as documented
//!   carve-outs (see `MEMORY_PHRASE_CARVE_OUTS`).
//! * `recall` — Reforge product term.
//! * `promotion` — Reforge product term. Tombstone/schema/lib.rs
//!   promotion idioms are excluded as carve-outs (see
//!   `PROMOTION_PHRASE_CARVE_OUTS`).
//! * `compaction` — Reforge/CCD product term. Harness-emitted lifecycle
//!   evidence ("compaction signal", "compaction summary",
//!   "compaction/compression") is neutral telemetry, not client product
//!   semantics; documented carve-outs cover those phrasings (see
//!   `COMPACTION_PHRASE_CARVE_OUTS`). The Claude wire token
//!   `PreCompact` and the literal `compaction-notice` hook arg also
//!   pass via word-boundary scanning since they are not bare
//!   `compaction`.
//!
//! # Allowlist
//!
//! The allowlist is the small set of files/directories that own
//! cross-cutting compat labels. Each entry names the reason in code so
//! the audit trail stays in code:
//!
//! * `src/host_assets.rs` — owns `CCD_COMPAT_*` command-prefix labels
//!   and the host integration assets that target CCD-shipped binaries.
//!   This is the canonical place for CCD-flavored host-asset compat.
//!   Allowed tokens: all forbidden tokens (the whole module is a
//!   declared compat zone per `AGENTS.md` and per the
//!   `crate::host_assets` rustdoc).
//! * `src/source_files/` — adapter source-file rendering names the
//!   harness hook arg `on-compaction-notice`, mirrors that managed
//!   sections may be re-rendered by clients (e.g. CCD's `CLAUDE.md`
//!   mirror), and is in the issue #23 boundary alongside `host_assets`.
//!   Allowed tokens: all forbidden tokens.
//! * `src/telemetry/` — owns the `LIFELOOP_*` ↔ `CCD_*` env-var alias
//!   tombstone documented in `docs/tombstones/lifeloop.v0.md`. Reading
//!   harness telemetry inherently names harness-side compaction
//!   evidence (Claude / Gemini / OpenCode logs surface compaction
//!   markers as neutral lifecycle facts). Allowed tokens: all
//!   forbidden tokens.
//! * `src/protocol/mod.rs` and `src/protocol/claude.rs` — own the
//!   harness-defined wire tokens (`PreCompact`, `SessionStart`, etc.)
//!   that are the harness's own vocabulary, not Lifeloop semantics.
//!   The protocol module's boundary docs also re-state the
//!   non-ownership of client vocabulary. Allowed tokens: all forbidden
//!   tokens.
//! * `src/cli/asset.rs` — thin CLI shim over `host_assets`; its
//!   rustdoc names CCD/RLM as illustrative callers. In the issue #23
//!   boundary. Allowed tokens: all forbidden tokens.
//! * `src/bin/lifeloop-fake-ccd-client.rs` — by name a CCD-shaped
//!   test client used by the subprocess invoker tests. Allowed
//!   tokens: all forbidden tokens.
//!
//! Adding a new compat zone requires adding it here AND naming the
//! reason in the rustdoc comment, so the audit trail stays in code.

use std::fs;
use std::path::{Path, PathBuf};

/// Forbidden tokens. Each is matched case-insensitively at word
/// boundaries (i.e. surrounded by non-`[A-Za-z0-9_]` characters or by
/// the start/end of the file). Carve-out phrases for the noisy
/// implementation-idiom matches are listed below.
const FORBIDDEN_TOKENS: &[&str] = &[
    "ccd",
    "rlm",
    "reforge",
    "fixity",
    "continuity",
    "memory",
    "recall",
    "promotion",
    "compaction",
];

/// Allowlisted paths (relative to the crate root). A file path matches
/// if it starts with one of these prefixes. The whole compat zone gets
/// a free pass for every forbidden token; the policy is "this file
/// owns cross-cutting compat naming", not "this file owns one specific
/// token".
const ALLOWLISTED_PATH_PREFIXES: &[&str] = &[
    "src/host_assets.rs",
    "src/source_files/",
    "src/telemetry/",
    "src/protocol/mod.rs",
    "src/protocol/claude.rs",
    "src/cli/asset.rs",
    "src/bin/lifeloop-fake-ccd-client.rs",
];

/// Carve-out substrings for the `memory` token. These are
/// implementation-idiom phrasings (RAM-resident state) that the
/// kernel-purity gate accepts without flagging — they are not client
/// product semantics.
const MEMORY_PHRASE_CARVE_OUTS: &[&str] = &["in-memory", "in memory", "InMemory"];

/// Carve-out substrings for the `promotion` token. These are
/// schema/tombstone-tooling idioms that match the bare token but
/// describe Lifeloop-internal mechanics (`lifeloop-bump-schema`'s
/// `lib.rs` promotion step), not client product semantics.
const PROMOTION_PHRASE_CARVE_OUTS: &[&str] = &[
    "tombstone promotion",
    "lib.rs promotion",
    "schema promotion",
];

/// Carve-out substrings for the `compaction` token. These are neutral
/// telemetry / harness-fact references: when a harness emits a
/// compaction signal, Lifeloop reports it as a lifecycle fact through
/// the telemetry layer. Naming the harness-side fact is not the same
/// as adopting "compaction" as a Lifeloop product semantic.
const COMPACTION_PHRASE_CARVE_OUTS: &[&str] = &[
    "compaction signal",
    "compaction summary",
    "compaction/compression",
    "on-compaction-notice",
];

#[test]
fn lifeloop_static_boundary_proof_keeps_client_vocabulary_out() {
    let crate_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
    let src_root = crate_root.join("src");

    let mut violations = Vec::new();
    let mut files_scanned: usize = 0;
    walk_rs(&src_root, &mut |path| {
        files_scanned += 1;
        let rel = path
            .strip_prefix(&crate_root)
            .expect("path under crate root");
        let rel_str = rel.to_string_lossy().replace('\\', "/");
        if is_path_allowlisted(&rel_str) {
            return;
        }
        let bytes = match fs::read(path) {
            Ok(b) => b,
            Err(err) => {
                violations.push(Violation {
                    path: rel_str.clone(),
                    line: 0,
                    token: "<read-error>",
                    excerpt: format!("failed to read file: {err}"),
                });
                return;
            }
        };
        let contents = match std::str::from_utf8(&bytes) {
            Ok(s) => s,
            Err(_) => {
                // Non-UTF8 source files in `src/` would be unusual; flag
                // them so an operator notices.
                violations.push(Violation {
                    path: rel_str.clone(),
                    line: 0,
                    token: "<non-utf8>",
                    excerpt: "file is not valid UTF-8".to_string(),
                });
                return;
            }
        };
        scan_text(&rel_str, contents, &mut violations);
    });

    assert!(
        files_scanned > 0,
        "kernel-purity gate scanned zero files; expected at least the crate root `src/` tree at {}",
        src_root.display(),
    );

    assert!(
        violations.is_empty(),
        "\nKernel-purity gate (issue #24) found {} violation(s).\n\
         Lifeloop core must not name client-owned product vocabulary.\n\
         See `tests/kernel_purity.rs` rustdoc for the forbidden token\n\
         list, the allowlist of compat zones, and the carve-out phrases.\n\
         Either neutralize the offending name in core code, or — if the\n\
         file is a legitimate compat zone — extend the allowlist *and*\n\
         document the reason in the rustdoc.\n\n\
         Violations:\n{}\n",
        violations.len(),
        format_violations(&violations),
    );
}

#[derive(Debug)]
struct Violation {
    path: String,
    line: usize,
    token: &'static str,
    excerpt: String,
}

fn format_violations(violations: &[Violation]) -> String {
    let mut out = String::new();
    for v in violations {
        out.push_str(&format!(
            "  {}:{}  [{}]  {}\n",
            v.path, v.line, v.token, v.excerpt,
        ));
    }
    out
}

fn is_path_allowlisted(rel_path: &str) -> bool {
    ALLOWLISTED_PATH_PREFIXES
        .iter()
        .any(|prefix| rel_path == *prefix || rel_path.starts_with(prefix))
}

fn scan_text(rel_path: &str, contents: &str, violations: &mut Vec<Violation>) {
    for (line_idx, line) in contents.lines().enumerate() {
        let line_lower = line.to_ascii_lowercase();
        for &token in FORBIDDEN_TOKENS {
            for span in word_boundary_matches(&line_lower, token) {
                if has_carve_out_at(line, span, token) {
                    continue;
                }
                violations.push(Violation {
                    path: rel_path.to_string(),
                    line: line_idx + 1,
                    token,
                    excerpt: line.trim().to_string(),
                });
                // One violation per token per line is enough signal;
                // operators don't need duplicate reports for repeated
                // matches in the same line.
                break;
            }
        }
    }
}

/// Return the byte spans (start, end) where `token` appears in `text`
/// at a word boundary. `text` must already be lowercased; `token` must
/// be lowercase.
fn word_boundary_matches(text: &str, token: &str) -> Vec<(usize, usize)> {
    let mut spans = Vec::new();
    let bytes = text.as_bytes();
    let needle = token.as_bytes();
    let mut start = 0usize;
    while start + needle.len() <= bytes.len() {
        if bytes[start..start + needle.len()] == *needle {
            let end = start + needle.len();
            let before_ok = start == 0 || !is_word_byte(bytes[start - 1]);
            let after_ok = end == bytes.len() || !is_word_byte(bytes[end]);
            if before_ok && after_ok {
                spans.push((start, end));
            }
        }
        start += 1;
    }
    spans
}

fn is_word_byte(b: u8) -> bool {
    b.is_ascii_alphanumeric() || b == b'_'
}

/// Decide whether the match at `span` in the original (case-preserving)
/// `line` falls inside a documented carve-out phrase for `token`.
///
/// Span-aware: a carve-out only suppresses the hit when the matched
/// token's span lies entirely within an instance of the carve-out
/// phrase on the same line. Substring-anywhere matching would let a
/// line containing both an allowed idiom (e.g. `in-memory`) and a
/// separate bare forbidden token (e.g. `memory`) be silently exempted —
/// the bare hit must still surface.
fn has_carve_out_at(line: &str, span: (usize, usize), token: &str) -> bool {
    let phrases: &[&str] = match token {
        "memory" => MEMORY_PHRASE_CARVE_OUTS,
        "promotion" => PROMOTION_PHRASE_CARVE_OUTS,
        "compaction" => COMPACTION_PHRASE_CARVE_OUTS,
        _ => return false,
    };
    let (token_start, token_end) = span;
    let line_lower = line.to_ascii_lowercase();
    for phrase in phrases {
        let phrase_lower = phrase.to_ascii_lowercase();
        let phrase_len = phrase_lower.len();
        if phrase_len == 0 {
            continue;
        }
        let mut search_start = 0usize;
        while let Some(rel) = line_lower[search_start..].find(&phrase_lower) {
            let phrase_start = search_start + rel;
            let phrase_end = phrase_start + phrase_len;
            if phrase_start <= token_start && token_end <= phrase_end {
                return true;
            }
            // Advance by one to find overlapping occurrences; finding
            // the same start twice is impossible because we move past it.
            search_start = phrase_start + 1;
        }
    }
    false
}

/// Recursive `*.rs` walker. We avoid pulling in `walkdir` because the
/// dependency surface for a static gate should stay minimal.
fn walk_rs(dir: &Path, visit: &mut dyn FnMut(&Path)) {
    let entries = match fs::read_dir(dir) {
        Ok(it) => it,
        Err(_) => return,
    };
    for entry in entries.flatten() {
        let path = entry.path();
        let file_type = match entry.file_type() {
            Ok(t) => t,
            Err(_) => continue,
        };
        if file_type.is_dir() {
            walk_rs(&path, visit);
        } else if file_type.is_file()
            && path
                .extension()
                .and_then(|s| s.to_str())
                .map(|s| s.eq_ignore_ascii_case("rs"))
                .unwrap_or(false)
        {
            visit(&path);
        }
    }
}

// ===========================================================================
// Self-tests for the gate's primitives
// ===========================================================================

#[test]
fn word_boundary_matches_finds_token_at_boundaries() {
    let line = "the ccd compat label, also \"ccd\" and ccd, but not preccdfix".to_lowercase();
    let spans = word_boundary_matches(&line, "ccd");
    // Three matches: `ccd compat`, `"ccd"`, `ccd,` — but not `preccdfix`.
    assert_eq!(spans.len(), 3, "spans = {spans:?} for line `{line}`");
}

#[test]
fn word_boundary_matches_skips_substring_only_hits() {
    let line = "compactor compactness compactify".to_lowercase();
    let spans = word_boundary_matches(&line, "compaction");
    assert!(
        spans.is_empty(),
        "should not match `compaction` inside other words: spans = {spans:?}",
    );
}

/// Compute the first word-boundary span of `token` in `line`, panicking
/// on a test-setup miss. Used to drive the span-aware carve-out tests
/// without hard-coding byte positions.
fn first_span(line: &str, token: &str) -> (usize, usize) {
    let lower = line.to_ascii_lowercase();
    *word_boundary_matches(&lower, token)
        .first()
        .unwrap_or_else(|| {
            panic!("test setup: token `{token}` not found at a word boundary in `{line}`")
        })
}

#[test]
fn memory_carve_outs_suppress_in_memory_idiom() {
    // `InMemoryIdempotencyStore`-style identifiers never reach the
    // carve-out: `memory` inside such a name is not at a word boundary
    // (no separator char before/after), so `word_boundary_matches`
    // returns no spans and `has_carve_out_at` is never invoked. The
    // `InMemory` entry in MEMORY_PHRASE_CARVE_OUTS is kept as belt-
    // and-suspenders documentation; the cases below exercise the
    // hyphen/space variants that DO match at word boundaries.
    let cases: &[(&str, bool)] = &[
        ("/// In-memory reference IdempotencyStore", true),
        ("//! callers that already have the log in memory.", true),
        ("//! Lifeloop transports payloads, not memory.", false),
    ];
    for (line, expect_carved) in cases {
        let span = first_span(line, "memory");
        assert_eq!(
            has_carve_out_at(line, span, "memory"),
            *expect_carved,
            "carve-out mismatch for `{line}` (span={span:?})",
        );
    }
}

#[test]
fn compaction_carve_outs_suppress_telemetry_phrasings() {
    let cases: &[(&str, bool)] = &[
        (
            "//! and a compaction signal. No prompt or message bodies are",
            true,
        ),
        (
            "//! the adapter signaled a compaction/compression event",
            true,
        ),
        ("    \"on-compaction-notice -> context.pressure\",", true),
        ("//! Lifeloop does not own compaction.", false),
    ];
    for (line, expect_carved) in cases {
        let span = first_span(line, "compaction");
        assert_eq!(
            has_carve_out_at(line, span, "compaction"),
            *expect_carved,
            "carve-out mismatch for `{line}` (span={span:?})",
        );
    }
}

#[test]
fn promotion_carve_outs_suppress_schema_tooling_idioms() {
    let cases: &[(&str, bool)] = &[
        (
            "// Snapshot lib.rs so we can restore it if the tombstone promotion",
            true,
        ),
        ("// fails after the lib.rs promotion succeeds.", true),
        ("//! Lifeloop does not own promotion.", false),
    ];
    for (line, expect_carved) in cases {
        let span = first_span(line, "promotion");
        assert_eq!(
            has_carve_out_at(line, span, "promotion"),
            *expect_carved,
            "carve-out mismatch for `{line}` (span={span:?})",
        );
    }
}

#[test]
fn carve_out_does_not_mask_separate_bare_token_on_same_line() {
    // Defends against the AI review finding on !30: a line containing
    // both an allowed idiom and an unrelated bare forbidden token must
    // still surface the bare hit — the carve-out is span-aware, not
    // line-wide.
    let line = "the in-memory cache and a bare memory leak";
    let lower = line.to_ascii_lowercase();
    let spans = word_boundary_matches(&lower, "memory");
    assert_eq!(
        spans.len(),
        2,
        "expected two `memory` spans in `{line}`, got {spans:?}",
    );

    let kept: Vec<_> = spans
        .iter()
        .copied()
        .filter(|&span| !has_carve_out_at(line, span, "memory"))
        .collect();
    assert_eq!(
        kept.len(),
        1,
        "expected the bare `memory` to survive carve-out filtering, got kept={kept:?}",
    );
    let (start, end) = kept[0];
    assert_eq!(
        &line[start..end],
        "memory",
        "kept span should be the bare `memory` occurrence",
    );
}

#[test]
fn allowlist_path_match_is_prefix_based() {
    assert!(is_path_allowlisted("src/host_assets.rs"));
    assert!(is_path_allowlisted("src/source_files/mod.rs"));
    assert!(is_path_allowlisted("src/source_files/adapters.rs"));
    assert!(is_path_allowlisted("src/telemetry/claude.rs"));
    assert!(is_path_allowlisted("src/telemetry/mod.rs"));
    assert!(is_path_allowlisted("src/protocol/mod.rs"));
    assert!(is_path_allowlisted("src/protocol/claude.rs"));
    assert!(is_path_allowlisted("src/cli/asset.rs"));
    assert!(is_path_allowlisted("src/bin/lifeloop-fake-ccd-client.rs"));

    assert!(!is_path_allowlisted("src/lib.rs"));
    assert!(!is_path_allowlisted("src/router/mod.rs"));
    assert!(!is_path_allowlisted("src/router/subprocess.rs"));
    assert!(!is_path_allowlisted("src/protocol/codex.rs"));
    assert!(!is_path_allowlisted("src/cli/event.rs"));
    assert!(!is_path_allowlisted("src/bin/lifeloop-bump-schema.rs"));
}