drip-cli 0.1.1

Delta Read Interception Proxy — sends only file diffs to your LLM agent
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
//! Hybrid content storage: small files inline in SQLite, large files in
//! `<DRIP_DATA_DIR>/cache/<sha256>.bin`. Verifies the threshold logic,
//! deduplication on identical content, missing-cache fallback,
//! `drip cache gc` / `drip cache stats`, and that the diff/unchanged
//! contract is unaffected by the storage choice.

use crate::common::Drip;
use serde_json::Value;
use std::fs;
// `PermissionsExt` lives under `std::os::unix` and doesn't exist on
// Windows. The two permission-mode assertions in
// `cache_file_is_chmod_0600` (lines 93/95) are gated below; everything
// else in this module is cross-platform.
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};

fn cache_dir(drip: &Drip) -> PathBuf {
    drip.data_dir.path().join("cache")
}

fn cache_file_count(drip: &Drip) -> usize {
    let dir = cache_dir(drip);
    if !dir.exists() {
        return 0;
    }
    fs::read_dir(&dir)
        .map(|it| {
            it.filter_map(|e| e.ok())
                .filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("bin"))
                .count()
        })
        .unwrap_or(0)
}

fn list_cache_paths(drip: &Drip) -> Vec<PathBuf> {
    let dir = cache_dir(drip);
    if !dir.exists() {
        return Vec::new();
    }
    fs::read_dir(&dir)
        .map(|it| {
            it.filter_map(|e| e.ok())
                .map(|e| e.path())
                .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("bin"))
                .collect()
        })
        .unwrap_or_default()
}

fn write_file(path: &Path, content: &str) {
    fs::write(path, content).unwrap();
}

// ─── Threshold-based routing ───────────────────────────────────────

#[test]
fn small_file_stays_inline_no_cache_file_created() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("small.txt");
    write_file(&f, &"x\n".repeat(100)); // 200 bytes, well under 32 KB

    drip.read_stdout(&f);
    assert_eq!(
        cache_file_count(&drip),
        0,
        "small file must NOT spawn a cache/<hash>.bin",
    );
}

#[test]
fn large_file_creates_a_cache_blob() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("big.txt");
    // 50 KB > default 32 KB threshold
    write_file(&f, &"line of content here\n".repeat(2500));

    let mut cmd = drip.cmd();
    cmd.arg("read").arg(&f);
    let o = cmd.output().unwrap();
    assert!(o.status.success());

    assert_eq!(
        cache_file_count(&drip),
        1,
        "large file must produce exactly one cache/<hash>.bin",
    );

    // The cache file should contain the source bytes verbatim.
    let cached = list_cache_paths(&drip);
    let body = fs::read_to_string(&cached[0]).unwrap();
    assert_eq!(body, fs::read_to_string(&f).unwrap());

    // Permissions: 0600 on Unix (defense in depth — the file lives
    // under a 0700 cache dir and the DB itself is 0600). Windows
    // doesn't have a comparable mode-bit model and the harden_*
    // helpers no-op there, so we skip the assertions on Windows.
    #[cfg(unix)]
    {
        let mode = fs::metadata(&cached[0]).unwrap().permissions().mode() & 0o777;
        assert_eq!(mode, 0o600, "cache file must be chmod 0600, got {mode:o}");
        let dir_mode = fs::metadata(cache_dir(&drip)).unwrap().permissions().mode() & 0o777;
        assert_eq!(
            dir_mode, 0o700,
            "cache dir must be chmod 0700, got {dir_mode:o}"
        );
    }
}

#[test]
fn env_override_zero_pushes_everything_to_cache() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("tiny.txt");
    write_file(&f, "a\n"); // 2 bytes — would normally stay inline

    let mut c = drip.cmd();
    c.env("DRIP_INLINE_MAX_BYTES", "0");
    c.arg("read").arg(&f);
    assert!(c.output().unwrap().status.success());

    assert_eq!(
        cache_file_count(&drip),
        1,
        "DRIP_INLINE_MAX_BYTES=0 must force every read into the cache dir",
    );
}

// ─── Dedup ─────────────────────────────────────────────────────────

#[test]
fn identical_content_two_files_share_one_cache_blob() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let body = "shared content\n".repeat(3000); // > 32 KB
    let a = dir.path().join("a.txt");
    let b = dir.path().join("b.txt");
    write_file(&a, &body);
    write_file(&b, &body);

    drip.read_stdout(&a);
    drip.read_stdout(&b);

    // Two `reads` rows, but the cache is keyed by content_hash so
    // there's only one .bin on disk — that's the whole win of
    // hash-addressed storage.
    assert_eq!(
        cache_file_count(&drip),
        1,
        "identical-content files must share one cache blob",
    );
}

// ─── Missing-cache fallback ────────────────────────────────────────

#[test]
fn missing_cache_file_falls_back_to_full_read() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("rehydrate.txt");
    write_file(&f, &"L\n".repeat(20_000)); // > 32 KB

    let first = drip.read_stdout(&f);
    assert!(first.contains("[DRIP: full read"), "first: {first}");

    // Manually delete every cache blob — simulates user `rm` or a
    // partial restore. The next read MUST NOT crash; it should
    // gracefully treat the file as freshly seen.
    for p in list_cache_paths(&drip) {
        fs::remove_file(p).unwrap();
    }

    let second = drip.read_stdout(&f);
    assert!(
        second.contains("[DRIP: full read"),
        "missing cache must trigger fall-back to full read, got: {second}",
    );

    // Cache blob is rewritten on the rehydrate read.
    assert_eq!(cache_file_count(&drip), 1);
}

// ─── Functional parity with inline ─────────────────────────────────

#[test]
fn diff_works_identically_when_content_is_in_cache_file() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("evolves.txt");
    let v1: String = (0..2500).map(|i| format!("line {i}\n")).collect(); // > 32 KB
    write_file(&f, &v1);

    let first = drip.read_stdout(&f);
    assert!(first.contains("[DRIP: full read"), "first: {first}");

    // One-line edit deep into the file. The diff is computed by
    // `differ::diff(prev, new)` — `prev` lives in the cache blob,
    // not inline. If the cache load is broken, we'd get 'first read'
    // again instead of a delta.
    let v2 = v1.replace("line 1234\n", "LINE_1234_MUTATED\n");
    write_file(&f, &v2);

    let second = drip.read_stdout(&f);
    assert!(
        second.contains("[DRIP: delta only"),
        "expected delta — cache-backed baseline failed to load? got: {second}",
    );
    assert!(second.contains("-line 1234"));
    assert!(second.contains("+LINE_1234_MUTATED"));
}

#[test]
fn unchanged_works_identically_when_baseline_is_in_cache() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("stable.txt");
    write_file(&f, &"x\n".repeat(20_000));

    drip.read_stdout(&f);
    let second = drip.read_stdout(&f);
    assert!(
        second.contains("unchanged"),
        "second read with no edits must report unchanged: {second}",
    );
}

// ─── drip cache gc ─────────────────────────────────────────────────

#[test]
fn cache_gc_removes_orphans_and_keeps_active() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();

    // Active blob: a real `reads` row will reference it.
    let active = dir.path().join("active.txt");
    write_file(&active, &"A\n".repeat(20_000));
    drip.read_stdout(&active);

    // Orphan blob: drop a `.bin` that no `reads` row points to.
    let cache = cache_dir(&drip);
    let orphan = cache.join("0000000000000000000000000000000000000000000000000000000000000000.bin");
    fs::write(&orphan, b"orphaned content").unwrap();

    assert!(orphan.exists());
    assert_eq!(cache_file_count(&drip), 2);

    let mut c = drip.cmd();
    c.args(["cache", "gc"]);
    let o = c.output().unwrap();
    assert!(
        o.status.success(),
        "stderr={}",
        String::from_utf8_lossy(&o.stderr)
    );
    let report = String::from_utf8_lossy(&o.stdout);
    assert!(
        report.contains("1 file") || report.contains("removed"),
        "gc should report what it removed, got: {report}",
    );

    assert!(!orphan.exists(), "orphan blob must be deleted");
    assert_eq!(cache_file_count(&drip), 1, "active blob must survive GC",);
}

#[test]
fn purge_drops_cache_blobs_of_expired_sessions() {
    // Regression: purge_stale_sessions used to delete the `reads`
    // rows of expired sessions but left the corresponding `.bin`
    // files on disk, so users had to remember `drip cache gc` to
    // reclaim them. Now the purge cascades into the cache directory
    // and removes any blob whose last referencing row just got
    // deleted (dedup-aware: a hash still alive on a surviving
    // session keeps its blob).
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();

    // Populate the cache via a real `drip read` so the file ends up
    // hash-named in `<DRIP_DATA_DIR>/cache/`.
    let big = dir.path().join("big.txt");
    write_file(&big, &"L\n".repeat(20_000));
    drip.read_stdout(&big);
    assert_eq!(
        cache_file_count(&drip),
        1,
        "expected one cache blob after first read"
    );

    // Backdate the session well past SESSION_TTL_SECS so purge will
    // sweep it. The constant lives in core::session and isn't part
    // of the public API; using last_active = 1 (epoch + 1) is a
    // safe lower bound on any sane cutoff.
    let db = drip.data_dir.path().join("sessions.db");
    let conn = rusqlite::Connection::open(&db).unwrap();
    conn.execute(
        "UPDATE sessions SET last_active = 1 WHERE session_id = ?1",
        rusqlite::params![drip.session_id],
    )
    .unwrap();
    drop(conn);

    // Trigger purge by opening a NEW session — Session::open_inner
    // calls purge_stale_sessions on every open. We use a different
    // DRIP_SESSION_ID so the backdated session is the only one
    // eligible for sweep.
    let trigger = dir.path().join("trigger.txt");
    write_file(&trigger, "x\n");
    let o = drip
        .cmd_in_session("purge-trigger")
        .arg("read")
        .arg(&trigger)
        .output()
        .expect("drip read in fresh session");
    assert!(
        o.status.success(),
        "trigger read failed: {}",
        String::from_utf8_lossy(&o.stderr)
    );

    // After session purge, the blob *survives* — the cross-session
    // file_registry (v4) still references it so a future session
    // can offer the "↔ unchanged since last session" hint without
    // re-staging the file. The blob only dies once the registry
    // entry is also gone.
    assert_eq!(
        cache_file_count(&drip),
        1,
        "registry should keep the blob alive after session purge"
    );
    let o = drip.cmd().args(["cache", "stats"]).output().unwrap();
    assert!(o.status.success());
    let s = String::from_utf8_lossy(&o.stdout);
    assert!(
        s.contains("Orphan blobs     : 0"),
        "blob is still referenced by the registry, must NOT be flagged orphan: {s}"
    );

    // Now sweep the registry — `--older-than 0s` matches everything,
    // since the rows were just written and have last_seen_at = now.
    // After this and another session open (which re-runs the purge
    // path that sweeps blobs whose final reference just disappeared),
    // the blob should be gone.
    let o = drip
        .cmd()
        .args(["registry", "gc", "--older-than", "0s"])
        .output()
        .unwrap();
    assert!(o.status.success(), "{}", String::from_utf8_lossy(&o.stderr));
    assert_eq!(
        cache_file_count(&drip),
        0,
        "blob should be reclaimed once both reads + registry references are gone"
    );
}

#[test]
fn upsert_drops_old_blob_when_file_content_changes() {
    // Regression: editing a file that lives in file storage (i.e.
    // > DRIP_INLINE_MAX_BYTES) used to leave the previous blob as an
    // orphan in the cache directory after every re-read with new
    // content. On a heavy dev-workflow (lots of hash churn on >32KB
    // files) the cache grew unboundedly between `drip cache gc` runs
    // — the dominant source of the "75 orphan blobs" warning we saw
    // in the wild.
    //
    // upsert_read_with_compression now snapshots the current row's
    // hash before the upsert and GCs the blob if the new write made
    // it unreferenced. This test verifies one cache file remains
    // after three different content-versions of the same file.
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("big.txt");

    write_file(&f, &"v1\n".repeat(20_000));
    drip.read_stdout(&f);
    assert_eq!(cache_file_count(&drip), 1, "first read should write 1 blob");

    write_file(&f, &"v2\n".repeat(20_000));
    drip.read_stdout(&f);
    assert_eq!(
        cache_file_count(&drip),
        1,
        "second read of mutated file should drop the v1 blob"
    );

    write_file(&f, &"v3\n".repeat(20_000));
    drip.read_stdout(&f);
    assert_eq!(
        cache_file_count(&drip),
        1,
        "third read should drop v2 too — only the current version's blob lives"
    );
}

#[test]
fn refresh_drops_orphan_blob() {
    // Regression: `drip refresh <file>` deletes the reads row but
    // used to leave the blob behind. Catches the small leak path
    // for users who refresh frequently after out-of-band edits.
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("big.txt");
    write_file(&f, &"x\n".repeat(20_000));
    drip.read_stdout(&f);
    assert_eq!(cache_file_count(&drip), 1);

    let o = drip.cmd().arg("refresh").arg(&f).output().unwrap();
    assert!(o.status.success(), "{}", String::from_utf8_lossy(&o.stderr));

    // Registry still references the blob (cross-session orientation),
    // so it survives until `registry gc` runs — same contract as the
    // session-purge path.
    assert_eq!(
        cache_file_count(&drip),
        1,
        "registry keeps blob alive after refresh"
    );
    let o = drip
        .cmd()
        .args(["registry", "gc", "--older-than", "0s"])
        .output()
        .unwrap();
    assert!(o.status.success());
    assert_eq!(
        cache_file_count(&drip),
        0,
        "after registry GC the blob is fully reclaimed"
    );
}

#[test]
fn cache_gc_on_empty_dir_is_a_noop() {
    let drip = Drip::new();
    // Don't even open a session; just run GC against a fresh data dir.
    let mut c = drip.cmd();
    c.args(["cache", "gc"]);
    let o = c.output().unwrap();
    assert!(o.status.success());
}

// ─── drip cache stats ──────────────────────────────────────────────

#[test]
fn cache_stats_reports_inline_and_file_breakdown() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();

    let small = dir.path().join("small.txt");
    write_file(&small, "tiny\n");
    drip.read_stdout(&small);

    let big = dir.path().join("big.txt");
    write_file(&big, &"L\n".repeat(20_000));
    drip.read_stdout(&big);

    let mut c = drip.cmd();
    c.args(["cache", "stats"]);
    let o = c.output().unwrap();
    assert!(o.status.success());
    let s = String::from_utf8_lossy(&o.stdout);

    // Numbers and labels both visible.
    assert!(
        s.contains("Inline rows") || s.contains("inline"),
        "got: {s}"
    );
    assert!(
        s.contains("Cached files") || s.contains("cache"),
        "got: {s}"
    );
}

// ─── drip meter --json: storage block ───────────────────────────────

#[test]
fn drip_meter_json_exposes_storage_block() {
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("big.txt");
    write_file(&f, &"X\n".repeat(20_000));
    drip.read_stdout(&f);

    let o = drip.cmd().arg("meter").arg("--json").output().unwrap();
    let v: Value = serde_json::from_slice(&o.stdout).unwrap();
    let storage = v
        .get("storage")
        .expect("meter --json must surface a `storage` block when there's data");
    assert!(storage["cache_files"].as_i64().unwrap() >= 1);
    assert!(storage["cache_size_bytes"].as_i64().unwrap() > 0);
}

// ─── drip cache compact ────────────────────────────────────────────

#[test]
fn cache_compact_hoists_oversized_inline_rows_to_file_cache() {
    // Simulate the user's real-world scenario: an existing DB carries
    // big inline payloads (from a v1 binary, or from a prior threshold
    // value, or from rows whose content grew over time). After
    // `cache compact` they should live as cache blobs and the DB
    // should shrink correspondingly.
    let drip = Drip::new();
    let db_path = drip.data_dir.path().join("sessions.db");

    // Pre-create a v1-style row by hand: huge inline content,
    // content_storage='inline'. We seed via raw SQL so the test
    // doesn't depend on flipping DRIP_INLINE_MAX_BYTES mid-run.
    let conn = rusqlite::Connection::open(&db_path).unwrap();
    conn.execute_batch(
        "
        CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
        CREATE TABLE reads (
            session_id      TEXT NOT NULL,
            file_path       TEXT NOT NULL,
            content_hash    TEXT NOT NULL,
            content         TEXT NOT NULL,
            read_at         INTEGER NOT NULL,
            reads_count     INTEGER NOT NULL DEFAULT 1,
            tokens_full     INTEGER NOT NULL,
            tokens_sent     INTEGER NOT NULL,
            content_storage TEXT NOT NULL DEFAULT 'inline',
            PRIMARY KEY (session_id, file_path)
        );
        CREATE TABLE sessions (
            session_id  TEXT PRIMARY KEY,
            started_at  INTEGER NOT NULL,
            last_active INTEGER NOT NULL,
            cwd         TEXT
        );
        INSERT INTO meta(key, value) VALUES ('schema_version', '2');
        ",
    )
    .unwrap();
    // Use `now` for last_active so purge_stale_sessions (2 h TTL)
    // doesn't wipe the seeded data before compact gets to it.
    let now = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap()
        .as_secs() as i64;
    conn.execute(
        "INSERT INTO sessions VALUES ('s1', ?1, ?1, '/tmp')",
        rusqlite::params![now],
    )
    .unwrap();
    // Big enough that VACUUM's reclaim dwarfs the schema overhead
    // even after future v8/v9/… migrations add new tables. Earlier
    // 120 KB was within ~4 SQLite pages of the schema cost, so any
    // additive migration would flip the post>pre assertion.
    let huge: String = "X".repeat(1_200_000); // 1.2 MB, well above 32 KB
    let small: String = "y\n".repeat(10);
    // 64-char hex hashes — `core::cache::is_valid_hash` rejects
    // anything else, so the seed has to look like a real SHA-256.
    // Different prefixes ('a' vs 'b') so the two rows don't collide.
    let huge_hash = "a".repeat(64);
    let small_hash = "b".repeat(64);
    conn.execute(
        "INSERT INTO reads VALUES ('s1', '/tmp/huge.txt', ?1, ?2, ?3, 1, 1, 1, 'inline')",
        rusqlite::params![huge_hash, huge, now],
    )
    .unwrap();
    conn.execute(
        "INSERT INTO reads VALUES ('s1', '/tmp/small.txt', ?1, ?2, ?3, 1, 1, 1, 'inline')",
        rusqlite::params![small_hash, small, now],
    )
    .unwrap();
    drop(conn);

    let pre_size = fs::metadata(&db_path).unwrap().len();

    let mut c = drip.cmd();
    c.args(["cache", "compact"]);
    let o = c.output().unwrap();
    assert!(
        o.status.success(),
        "compact failed: stderr={}",
        String::from_utf8_lossy(&o.stderr),
    );
    let report = String::from_utf8_lossy(&o.stdout);
    assert!(
        report.contains("1 row") || report.contains("Compacted") || report.contains("compact"),
        "compact must report what it moved: {report}",
    );

    // The huge row is now in the cache; the small row stays inline.
    let conn = rusqlite::Connection::open(&db_path).unwrap();
    let huge_storage: String = conn
        .query_row(
            "SELECT content_storage FROM reads WHERE file_path = '/tmp/huge.txt'",
            [],
            |r| r.get(0),
        )
        .unwrap();
    let huge_content: String = conn
        .query_row(
            "SELECT content FROM reads WHERE file_path = '/tmp/huge.txt'",
            [],
            |r| r.get(0),
        )
        .unwrap();
    let small_storage: String = conn
        .query_row(
            "SELECT content_storage FROM reads WHERE file_path = '/tmp/small.txt'",
            [],
            |r| r.get(0),
        )
        .unwrap();
    drop(conn);

    assert_eq!(
        huge_storage, "file",
        "huge row must be hoisted to file cache"
    );
    assert!(
        huge_content.is_empty(),
        "hoisted row's content column must be cleared"
    );
    assert_eq!(small_storage, "inline", "small row must stay inline");

    // Cache blob exists.
    assert_eq!(
        cache_file_count(&drip),
        1,
        "compact must materialise the blob on disk",
    );

    // VACUUM ran → DB file actually shrank. SQLite pages and overhead
    // mean we don't reclaim the full 120 KB byte-for-byte (other rows
    // and indexes share pages), so we just assert "meaningfully
    // smaller", not an exact target. The strong guarantees are
    // covered by the storage/content checks above (blob on disk,
    // content column emptied, content_storage flipped).
    let post_size = fs::metadata(&db_path).unwrap().len();
    assert!(
        post_size < pre_size,
        "compact should run VACUUM to reclaim space — pre={pre_size} post={post_size}",
    );
}

#[test]
fn cache_compact_reads_baseline_correctly_after_hoisting() {
    // Functional integrity: after compact, the agent must still see
    // the same baseline content. A subsequent `drip read` of an
    // unchanged file must report `unchanged` (proves the hoisted
    // blob round-trips through get_read).
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("inline_then_compact.txt");
    let body: String = "L\n".repeat(20_000);
    write_file(&f, &body);

    // Force the first read to land inline by raising the threshold
    // above the file size, then read.
    let mut c = drip.cmd();
    c.env("DRIP_INLINE_MAX_BYTES", "10000000"); // 10 MB
    c.args(["read", f.to_str().unwrap()]);
    let o = c.output().unwrap();
    let body1 = String::from_utf8_lossy(&o.stdout);
    assert!(body1.contains("[DRIP: full read"), "first: {body1}");

    // Now compact at the default threshold (32 KB) — the row is over
    // it and should migrate to the cache.
    let o = drip.cmd().args(["cache", "compact"]).output().unwrap();
    assert!(o.status.success());
    assert_eq!(
        cache_file_count(&drip),
        1,
        "compact should produce one blob",
    );

    // Re-read with no edit. Default threshold; baseline is now in cache.
    let body2 = drip.read_stdout(&f);
    assert!(
        body2.contains("unchanged"),
        "post-compact re-read must hit the cached baseline as Unchanged: {body2}",
    );
}

#[test]
fn cache_compact_is_idempotent_when_no_oversize_inline_rows() {
    // Empty / clean DB: compact is a no-op and exits cleanly.
    let drip = Drip::new();
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("tiny.txt");
    write_file(&f, "x\n");
    drip.read_stdout(&f);

    let o = drip.cmd().args(["cache", "compact"]).output().unwrap();
    assert!(o.status.success());
    assert_eq!(
        cache_file_count(&drip),
        0,
        "no oversized rows → no blobs created",
    );
}

#[test]
fn cache_stats_hints_when_inline_bloat_detected() {
    // When `reads` contains inline rows individually larger than the
    // current threshold, `drip cache stats` should suggest running
    // `drip cache compact` so the user notices the wasted DB space.
    let drip = Drip::new();
    let db_path = drip.data_dir.path().join("sessions.db");

    let conn = rusqlite::Connection::open(&db_path).unwrap();
    conn.execute_batch(
        "
        CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
        CREATE TABLE reads (
            session_id      TEXT NOT NULL,
            file_path       TEXT NOT NULL,
            content_hash    TEXT NOT NULL,
            content         TEXT NOT NULL,
            read_at         INTEGER NOT NULL,
            reads_count     INTEGER NOT NULL DEFAULT 1,
            tokens_full     INTEGER NOT NULL,
            tokens_sent     INTEGER NOT NULL,
            content_storage TEXT NOT NULL DEFAULT 'inline',
            PRIMARY KEY (session_id, file_path)
        );
        CREATE TABLE sessions (
            session_id  TEXT PRIMARY KEY,
            started_at  INTEGER NOT NULL,
            last_active INTEGER NOT NULL,
            cwd         TEXT
        );
        INSERT INTO meta(key, value) VALUES ('schema_version', '2');
        ",
    )
    .unwrap();
    let now = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap()
        .as_secs() as i64;
    conn.execute(
        "INSERT INTO sessions VALUES ('s', ?1, ?1, '/tmp')",
        rusqlite::params![now],
    )
    .unwrap();
    let hash = "c".repeat(64);
    conn.execute(
        "INSERT INTO reads VALUES ('s', '/tmp/big.txt', ?1, ?2, ?3, 1, 1, 1, 'inline')",
        rusqlite::params![hash, &"Z".repeat(100_000), now],
    )
    .unwrap();
    drop(conn);

    let o = drip.cmd().args(["cache", "stats"]).output().unwrap();
    let s = String::from_utf8_lossy(&o.stdout);
    assert!(
        s.contains("compact") || s.contains("Compactable"),
        "stats must hint at compactable inline rows: {s}",
    );
}

// ─── Migration v1 → v2 ─────────────────────────────────────────────

#[test]
fn legacy_v1_db_is_migrated_in_place() {
    // Seed a fake v1 sessions.db (no `content_storage` column) and
    // verify drip migrates it on first open without losing data.
    let drip = Drip::new();
    let db_path = drip.data_dir.path().join("sessions.db");

    // Build a minimal v1 schema by hand.
    let conn = rusqlite::Connection::open(&db_path).unwrap();
    conn.execute_batch(
        "
        CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
        CREATE TABLE reads (
            session_id   TEXT NOT NULL,
            file_path    TEXT NOT NULL,
            content_hash TEXT NOT NULL,
            content      TEXT NOT NULL,
            read_at      INTEGER NOT NULL,
            reads_count  INTEGER NOT NULL DEFAULT 1,
            tokens_full  INTEGER NOT NULL,
            tokens_sent  INTEGER NOT NULL,
            PRIMARY KEY (session_id, file_path)
        );
        CREATE TABLE sessions (
            session_id   TEXT PRIMARY KEY,
            started_at   INTEGER NOT NULL,
            last_active  INTEGER NOT NULL,
            cwd          TEXT
        );
        INSERT INTO reads
            (session_id, file_path, content_hash, content, read_at,
             reads_count, tokens_full, tokens_sent)
            VALUES ('legacy', '/tmp/x.txt', 'deadbeef', 'legacy body', 1, 1, 10, 10);
        INSERT INTO meta(key, value) VALUES ('schema_version', '1');
        ",
    )
    .unwrap();
    drop(conn);

    // First post-migration read should not fail. It opens the DB,
    // runs the additive migration, and the existing legacy row keeps
    // working (storage='inline' default).
    let dir = tempfile::tempdir().unwrap();
    let f = dir.path().join("post-migrate.txt");
    write_file(&f, "ok\n");
    drip.read_stdout(&f);

    // Confirm the new column exists and the legacy row has the
    // correct default.
    let conn = rusqlite::Connection::open(&db_path).unwrap();
    let storage: String = conn
        .query_row(
            "SELECT content_storage FROM reads WHERE session_id = 'legacy'",
            [],
            |r| r.get(0),
        )
        .unwrap();
    assert_eq!(
        storage, "inline",
        "legacy rows must default to inline storage post-migration",
    );
}