panproto-git 0.39.0

Bidirectional git ↔ panproto-vcs translation bridge
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
//! Tests for the git bridge.

#![cfg(test)]
#![allow(clippy::unwrap_used)]

use std::path::Path;

use panproto_vcs::{MemStore, Store};

use crate::export::export_to_git;
use crate::import::{
    BlobSchemaCache, import_git_repo, import_git_repo_incremental, import_git_repo_persistent,
    import_git_repo_with_cache, load_blob_cache, save_blob_cache,
};

/// Create a temporary git repository with a single commit containing
/// the given files.
fn create_test_git_repo(files: &[(&str, &[u8])]) -> (tempfile::TempDir, git2::Repository) {
    let dir = tempfile::tempdir().unwrap();
    let repo = git2::Repository::init(dir.path()).unwrap();

    // Create files and commit.
    let sig = git2::Signature::new("Test", "test@example.com", &git2::Time::new(1000, 0)).unwrap();

    let mut index = repo.index().unwrap();
    for (path, content) in files {
        let full_path = dir.path().join(path);
        if let Some(parent) = full_path.parent() {
            std::fs::create_dir_all(parent).unwrap();
        }
        std::fs::write(&full_path, content).unwrap();
        index.add_path(Path::new(path)).unwrap();
    }
    index.write().unwrap();

    let tree_oid = index.write_tree().unwrap();

    {
        let tree = repo.find_tree(tree_oid).unwrap();
        repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
            .unwrap();
    }

    (dir, repo)
}

#[test]
fn import_single_typescript_file() {
    let (_dir, git_repo) = create_test_git_repo(&[(
        "main.ts",
        b"function greet(name: string): string { return 'Hello, ' + name; }",
    )]);

    let mut store = MemStore::new();
    let result = import_git_repo(&git_repo, &mut store, "HEAD").unwrap();

    assert_eq!(result.commit_count, 1);
    assert_ne!(result.head_id, panproto_vcs::ObjectId::ZERO);
    assert_eq!(result.oid_map.len(), 1);

    // Verify the commit was stored.
    let commit_obj = store.get(&result.head_id).unwrap();
    match &commit_obj {
        panproto_vcs::Object::Commit(c) => {
            assert_eq!(c.message, "Initial commit");
            assert_eq!(c.author, "Test");
        }
        other => panic!("expected commit, got {}", other.type_name()),
    }
}

#[test]
fn import_multi_file_project() {
    let (_dir, git_repo) = create_test_git_repo(&[
        (
            "src/main.ts",
            b"function main(): void { console.log('hello'); }",
        ),
        (
            "src/utils.ts",
            b"export function add(a: number, b: number): number { return a + b; }",
        ),
        ("README.md", b"# Test Project\n\nA test project.\n"),
    ]);

    let mut store = MemStore::new();
    let result = import_git_repo(&git_repo, &mut store, "HEAD").unwrap();

    assert_eq!(result.commit_count, 1);

    // Verify the schema contains vertices from all files.
    let commit_obj = store.get(&result.head_id).unwrap();
    let commit = match &commit_obj {
        panproto_vcs::Object::Commit(c) => c,
        other => panic!("expected commit, got {}", other.type_name()),
    };

    let schema = panproto_vcs::tree::resolve_commit_schema(&store, commit).unwrap();
    assert!(
        schema.vertices.len() > 5,
        "expected rich project schema, got {} vertices",
        schema.vertices.len()
    );
}

#[test]
fn import_multiple_commits() {
    let dir = tempfile::tempdir().unwrap();
    let repo = git2::Repository::init(dir.path()).unwrap();
    let sig = git2::Signature::new("Dev", "dev@test.com", &git2::Time::new(1000, 0)).unwrap();

    // First commit.
    let file_path = dir.path().join("main.py");
    std::fs::write(&file_path, b"x = 1\n").unwrap();
    let mut index = repo.index().unwrap();
    index.add_path(Path::new("main.py")).unwrap();
    index.write().unwrap();
    let tree_oid = index.write_tree().unwrap();
    let tree = repo.find_tree(tree_oid).unwrap();
    let commit1_oid = repo
        .commit(Some("HEAD"), &sig, &sig, "First", &tree, &[])
        .unwrap();

    // Second commit.
    std::fs::write(&file_path, b"x = 1\ny = 2\n").unwrap();
    let mut index = repo.index().unwrap();
    index.add_path(Path::new("main.py")).unwrap();
    index.write().unwrap();
    let tree_oid = index.write_tree().unwrap();
    let tree = repo.find_tree(tree_oid).unwrap();
    let commit1 = repo.find_commit(commit1_oid).unwrap();
    repo.commit(Some("HEAD"), &sig, &sig, "Second", &tree, &[&commit1])
        .unwrap();

    // Import.
    let mut store = MemStore::new();
    let result = import_git_repo(&repo, &mut store, "HEAD").unwrap();

    assert_eq!(result.commit_count, 2);
    assert_eq!(result.oid_map.len(), 2);

    // Verify second commit has first as parent.
    let second_commit_obj = store.get(&result.head_id).unwrap();
    match &second_commit_obj {
        panproto_vcs::Object::Commit(c) => {
            assert_eq!(c.message, "Second");
            assert_eq!(c.parents.len(), 1);
            // Parent should be the first commit's panproto ID.
            let first_panproto_id = result.oid_map[0].1;
            assert_eq!(c.parents[0], first_panproto_id);
        }
        other => panic!("expected commit, got {}", other.type_name()),
    }
}

/// Build a git repo with `n` sequential commits on a single branch.
fn create_linear_history(n: usize) -> (tempfile::TempDir, git2::Repository, Vec<git2::Oid>) {
    let dir = tempfile::tempdir().unwrap();
    let repo = git2::Repository::init(dir.path()).unwrap();
    let sig = git2::Signature::new("Dev", "dev@test.com", &git2::Time::new(1000, 0)).unwrap();
    let file_path = dir.path().join("main.py");

    let mut commit_oids = Vec::new();
    let mut parent: Option<git2::Oid> = None;

    for i in 0..n {
        std::fs::write(&file_path, format!("x = {i}\n").as_bytes()).unwrap();
        let mut index = repo.index().unwrap();
        index.add_path(Path::new("main.py")).unwrap();
        index.write().unwrap();
        let tree_oid = index.write_tree().unwrap();
        let tree = repo.find_tree(tree_oid).unwrap();

        let parent_commit = parent.map(|p| repo.find_commit(p).unwrap());
        let parents: Vec<&git2::Commit<'_>> = parent_commit.iter().collect();
        let new_oid = repo
            .commit(
                Some("HEAD"),
                &sig,
                &sig,
                &format!("commit {i}"),
                &tree,
                &parents,
            )
            .unwrap();
        commit_oids.push(new_oid);
        parent = Some(new_oid);
    }

    (dir, repo, commit_oids)
}

#[test]
fn incremental_import_skips_known_ancestors() {
    let (_dir, repo, commit_oids) = create_linear_history(3);

    // First: import commits 0..=1 into a store.
    let mut store = MemStore::new();
    let first = import_git_repo(&repo, &mut store, &commit_oids[1].to_string()).unwrap();
    assert_eq!(first.commit_count, 2);

    // Build the known map from the first import's oid_map.
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        first.oid_map.iter().copied().collect();

    // Now incrementally import up to commit 2. Only one new commit should
    // be imported (commit 2), and its parent should be wired to the
    // already-known panproto ID for commit 1.
    let second =
        import_git_repo_incremental(&repo, &mut store, &commit_oids[2].to_string(), &known)
            .unwrap();

    assert_eq!(second.commit_count, 1, "expected only one new commit");
    assert_eq!(second.oid_map.len(), 1);
    assert_eq!(second.oid_map[0].0, commit_oids[2]);

    // The new HEAD commit should have the imported commit 1 as its
    // single panproto parent.
    let head_obj = store.get(&second.head_id).unwrap();
    match &head_obj {
        panproto_vcs::Object::Commit(c) => {
            assert_eq!(c.parents.len(), 1);
            assert_eq!(c.parents[0], known[&commit_oids[1]]);
        }
        other => panic!("expected commit, got {}", other.type_name()),
    }
}

#[test]
fn incremental_import_noop_when_head_is_known() {
    let (_dir, repo, commit_oids) = create_linear_history(2);

    // First: import everything.
    let mut store = MemStore::new();
    let first = import_git_repo(&repo, &mut store, "HEAD").unwrap();
    assert_eq!(first.commit_count, 2);

    // Re-import with HEAD already known: should be a no-op, head_id preserved.
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        first.oid_map.iter().copied().collect();
    let second = import_git_repo_incremental(&repo, &mut store, "HEAD", &known).unwrap();

    assert_eq!(second.commit_count, 0);
    assert_eq!(second.head_id, known[&commit_oids[1]]);
    assert!(second.oid_map.is_empty());
}

#[test]
fn incremental_import_sets_no_local_refs() {
    // The import functions should be pure with respect to refs: only
    // object insertion. Naming the imported tip is the caller's job.
    let (_dir, repo, _oids) = create_linear_history(2);

    let mut store = MemStore::new();
    let result = import_git_repo(&repo, &mut store, "HEAD").unwrap();
    assert!(result.head_id != panproto_vcs::ObjectId::ZERO);

    // No refs should exist under refs/ after an import (regression test
    // for the removed hardcoded "refs/heads/main" write).
    let refs = store.list_refs("refs/").unwrap();
    assert!(
        refs.is_empty(),
        "expected no refs after import, found: {refs:?}"
    );
}

#[test]
fn incremental_import_tolerates_stale_known_entries() {
    // A `known` map entry whose git OID is not reachable from the head
    // being imported should be silently ignored; revwalk.hide returns an
    // error in that case and we swallow it so stale caches don't break
    // subsequent imports.
    let (_dir, repo, commit_oids) = create_linear_history(2);

    // Build a known map containing a fabricated git OID that does not
    // exist in the repo, plus the real first commit (which should still
    // be honored as a skip target).
    let fake_oid = git2::Oid::from_str("0123456789abcdef0123456789abcdef01234567").unwrap();
    let mut first_store = MemStore::new();
    let first = import_git_repo(&repo, &mut first_store, &commit_oids[0].to_string()).unwrap();
    let first_panproto = first.oid_map[0].1;

    let mut known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        rustc_hash::FxHashMap::default();
    known.insert(fake_oid, panproto_vcs::ObjectId::ZERO);
    known.insert(commit_oids[0], first_panproto);

    // Importing the second commit with this mixed known map should
    // succeed, yield exactly one new commit, and wire its parent to the
    // real first_panproto id.
    let mut store = first_store;
    let result = import_git_repo_incremental(&repo, &mut store, "HEAD", &known).unwrap();
    assert_eq!(result.commit_count, 1);
    let head_obj = store.get(&result.head_id).unwrap();
    match &head_obj {
        panproto_vcs::Object::Commit(c) => {
            assert_eq!(c.parents, vec![first_panproto]);
        }
        other => panic!("expected commit, got {}", other.type_name()),
    }
}

/// Build a fresh git repository with no commits, for testing export.
fn empty_git_repo() -> (tempfile::TempDir, git2::Repository) {
    let dir = tempfile::tempdir().unwrap();
    let repo = git2::Repository::init(dir.path()).unwrap();
    (dir, repo)
}

#[test]
fn export_with_update_ref_none_leaves_head_unborn() {
    // Import a single commit into a panproto store.
    let (_src_dir, src_repo, _oids) = create_linear_history(1);
    let mut store = MemStore::new();
    let import_result = import_git_repo(&src_repo, &mut store, "HEAD").unwrap();

    // Export into a fresh repo with update_ref = None. The commit object
    // should exist, but HEAD should not yet have been born.
    let (_dst_dir, dst_repo) = empty_git_repo();
    let parent_map: rustc_hash::FxHashMap<panproto_vcs::ObjectId, git2::Oid> =
        rustc_hash::FxHashMap::default();
    let result =
        export_to_git(&store, &dst_repo, import_result.head_id, &parent_map, None).unwrap();

    // The created commit exists as an object.
    assert!(dst_repo.find_commit(result.git_oid).is_ok());

    // HEAD still points to the unborn initial branch. `revparse_single`
    // on HEAD should fail because no commit has been attached to it.
    assert!(
        dst_repo.head().is_err(),
        "HEAD should remain unborn when update_ref is None"
    );
}

#[test]
fn export_with_update_ref_some_moves_named_ref() {
    let (_src_dir, src_repo, _oids) = create_linear_history(1);
    let mut store = MemStore::new();
    let import_result = import_git_repo(&src_repo, &mut store, "HEAD").unwrap();

    // Export into a fresh repo with update_ref = Some("HEAD"). HEAD
    // should now resolve to the new commit.
    let (_dst_dir, dst_repo) = empty_git_repo();
    let parent_map: rustc_hash::FxHashMap<panproto_vcs::ObjectId, git2::Oid> =
        rustc_hash::FxHashMap::default();
    let result = export_to_git(
        &store,
        &dst_repo,
        import_result.head_id,
        &parent_map,
        Some("HEAD"),
    )
    .unwrap();

    let head = dst_repo.head().unwrap();
    let head_commit = head.peel_to_commit().unwrap();
    assert_eq!(head_commit.id(), result.git_oid);
}

#[test]
fn export_parent_map_links_exported_parent() {
    // Import a 2-commit history so we have two panproto commit IDs with
    // a real parent relationship.
    let (_src_dir, src_repo, _oids) = create_linear_history(2);
    let mut store = MemStore::new();
    let import_result = import_git_repo(&src_repo, &mut store, "HEAD").unwrap();
    let first_panproto = import_result.oid_map[0].1;
    let second_panproto = import_result.oid_map[1].1;

    // Export into a fresh repo: first commit with empty map, second
    // commit with the first-commit mapping populated.
    let (_dst_dir, dst_repo) = empty_git_repo();
    let mut parent_map: rustc_hash::FxHashMap<panproto_vcs::ObjectId, git2::Oid> =
        rustc_hash::FxHashMap::default();

    let first_result = export_to_git(&store, &dst_repo, first_panproto, &parent_map, None).unwrap();
    parent_map.insert(first_panproto, first_result.git_oid);

    let second_result =
        export_to_git(&store, &dst_repo, second_panproto, &parent_map, None).unwrap();

    // The second git commit should have the first as its parent.
    let second_git = dst_repo.find_commit(second_result.git_oid).unwrap();
    assert_eq!(second_git.parent_count(), 1);
    assert_eq!(second_git.parent(0).unwrap().id(), first_result.git_oid);
}

#[test]
fn export_parent_map_empty_produces_root_commit() {
    // A panproto commit with parents, exported with an empty parent_map,
    // should produce a git commit with zero git parents (the panproto
    // parents are "invisible" to the export because they aren't mapped).
    let (_src_dir, src_repo, _oids) = create_linear_history(2);
    let mut store = MemStore::new();
    let import_result = import_git_repo(&src_repo, &mut store, "HEAD").unwrap();
    let second_panproto = import_result.oid_map[1].1;

    let (_dst_dir, dst_repo) = empty_git_repo();
    let parent_map: rustc_hash::FxHashMap<panproto_vcs::ObjectId, git2::Oid> =
        rustc_hash::FxHashMap::default();

    let result = export_to_git(&store, &dst_repo, second_panproto, &parent_map, None).unwrap();

    let git_commit = dst_repo.find_commit(result.git_oid).unwrap();
    assert_eq!(
        git_commit.parent_count(),
        0,
        "unmapped panproto parents should not produce git parents"
    );
}

/// Build a git repo with 3 commits that each touch a different file,
/// while 4 other files remain unchanged across all commits.
fn create_dedup_history() -> (tempfile::TempDir, git2::Repository, Vec<git2::Oid>) {
    let dir = tempfile::tempdir().unwrap();
    let repo = git2::Repository::init(dir.path()).unwrap();
    let sig = git2::Signature::new("Dev", "dev@test.com", &git2::Time::new(1000, 0)).unwrap();

    // Seed the 5 files.
    for (path, content) in [
        ("a.py", "x = 1\n"),
        ("b.py", "y = 2\n"),
        ("c.py", "z = 3\n"),
        ("d.py", "w = 4\n"),
        ("e.py", "v = 5\n"),
    ] {
        std::fs::write(dir.path().join(path), content).unwrap();
    }

    let mut commit_oids = Vec::new();
    let mut parent: Option<git2::Oid> = None;

    // Commit 0: all five files.
    // Commit 1: modify a.py only.
    // Commit 2: modify b.py only.
    let mutations: [&[(&str, &str)]; 3] = [
        &[
            ("a.py", "x = 1\n"),
            ("b.py", "y = 2\n"),
            ("c.py", "z = 3\n"),
            ("d.py", "w = 4\n"),
            ("e.py", "v = 5\n"),
        ],
        &[("a.py", "x = 11\n")],
        &[("b.py", "y = 22\n")],
    ];

    for (i, batch) in mutations.iter().enumerate() {
        for (path, content) in *batch {
            std::fs::write(dir.path().join(path), content).unwrap();
        }
        let mut index = repo.index().unwrap();
        for name in ["a.py", "b.py", "c.py", "d.py", "e.py"] {
            index.add_path(Path::new(name)).unwrap();
        }
        index.write().unwrap();
        let tree_oid = index.write_tree().unwrap();
        let tree = repo.find_tree(tree_oid).unwrap();

        let parent_commit = parent.map(|p| repo.find_commit(p).unwrap());
        let parents: Vec<&git2::Commit<'_>> = parent_commit.iter().collect();
        let new_oid = repo
            .commit(
                Some("HEAD"),
                &sig,
                &sig,
                &format!("commit {i}"),
                &tree,
                &parents,
            )
            .unwrap();
        commit_oids.push(new_oid);
        parent = Some(new_oid);
    }

    (dir, repo, commit_oids)
}

#[test]
fn import_persistent_roundtrips_cache() {
    let (_dir, repo, _oids) = create_dedup_history();
    let mut store = MemStore::new();
    let cache_dir = tempfile::tempdir().unwrap();
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        rustc_hash::FxHashMap::default();

    let r1 =
        import_git_repo_persistent(&repo, &mut store, "HEAD", &known, cache_dir.path()).unwrap();
    assert_eq!(r1.commit_count, 3);

    // The cache file must exist and be non-empty; a second import
    // with the same revspec and matching marks must be a no-op.
    let cache_path = cache_dir.path().join(crate::import::BLOB_CACHE_FILE);
    assert!(cache_path.is_file());
    let loaded = load_blob_cache(&cache_path).unwrap();
    assert_eq!(loaded.len(), 7);

    // Atomic save: writing a new cache must replace, not leak tmp.
    save_blob_cache(&cache_path, &loaded).unwrap();
    let tmp = cache_path.with_extension("tmp");
    assert!(!tmp.exists(), "save_blob_cache must rename atomically");
}

#[test]
fn blob_cache_rejects_corrupt_file() {
    let cache_dir = tempfile::tempdir().unwrap();
    let cache_path = cache_dir.path().join("corrupt");
    std::fs::write(&cache_path, "not a valid cache entry\n").unwrap();
    let err = load_blob_cache(&cache_path).unwrap_err();
    assert!(format!("{err}").contains("corrupt"));
}

#[test]
fn blob_cache_missing_protocol_slot_is_corrupt() {
    // A one-token line must trigger the "missing protocol slot"
    // corrupt branch, not the "missing panproto id" branch.
    let cache_dir = tempfile::tempdir().unwrap();
    let cache_path = cache_dir.path().join("one_token");
    std::fs::write(&cache_path, "0123456789abcdef0123456789abcdef01234567\n").unwrap();
    let err = load_blob_cache(&cache_path).unwrap_err();
    let msg = format!("{err}");
    assert!(
        msg.contains("missing protocol slot"),
        "expected missing-protocol-slot diagnostic, got: {msg}"
    );
    assert!(msg.contains("delete the cache file and reimport"));
}

#[test]
fn save_blob_cache_rejects_empty_protocol() {
    use panproto_vcs::ObjectId;
    let cache_dir = tempfile::tempdir().unwrap();
    let cache_path = cache_dir.path().join("empty_proto");
    let mut cache = BlobSchemaCache::default();
    let blob_oid = git2::Oid::from_str("0123456789abcdef0123456789abcdef01234567").unwrap();
    cache.insert((blob_oid, String::new()), ObjectId::from_bytes([1; 32]));
    let err = save_blob_cache(&cache_path, &cache).unwrap_err();
    assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput);
    let msg = format!("{err}");
    assert!(
        msg.contains("empty protocol"),
        "expected empty-protocol diagnostic, got: {msg}"
    );
}

#[test]
fn blob_cache_close_reopen_rebuilds_state() {
    // import_git_repo_persistent must reload prior cache state after
    // a fresh process would have done a close + reopen. Simulate that
    // by dropping the store between the two import calls.
    let (_dir, repo, _oids) = create_dedup_history();
    let cache_dir = tempfile::tempdir().unwrap();
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        rustc_hash::FxHashMap::default();

    {
        let mut store = MemStore::new();
        let r = import_git_repo_persistent(&repo, &mut store, "HEAD", &known, cache_dir.path())
            .unwrap();
        assert_eq!(r.commit_count, 3);
    }

    // Reload the cache from disk (fresh MemStore, fresh import).
    // No panic, no corrupt diagnostic; re-import must produce the
    // same head id because the imported history is content-addressed.
    let mut store2 = MemStore::new();
    let r2 =
        import_git_repo_persistent(&repo, &mut store2, "HEAD", &known, cache_dir.path()).unwrap();
    assert_eq!(r2.commit_count, 3);

    // Cache file survived the close-reopen round-trip.
    let cache_path = cache_dir.path().join(crate::import::BLOB_CACHE_FILE);
    let loaded = load_blob_cache(&cache_path).unwrap();
    assert_eq!(loaded.len(), 7);
}

#[test]
fn blob_cache_reuses_file_schema_ids_across_commits() {
    // In a 3-commit 5-file history where commits 2 and 3 touch one
    // file each, commits 2 and 3 must each reuse 4 of 5 FileSchema
    // ObjectIds from commit 1. The union of FileSchema ids across
    // the three commits is therefore 5 + 1 + 1 = 7.
    use std::collections::HashSet;
    let (_dir, repo, _oids) = create_dedup_history();
    let mut store = MemStore::new();
    let mut cache = BlobSchemaCache::default();
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        rustc_hash::FxHashMap::default();
    let result = import_git_repo_with_cache(&repo, &mut store, "HEAD", &known, &mut cache).unwrap();
    assert_eq!(result.commit_count, 3);

    let mut per_commit: Vec<HashSet<panproto_vcs::ObjectId>> = Vec::new();
    for (_, commit_id) in &result.oid_map {
        let commit = match store.get(commit_id).unwrap() {
            panproto_vcs::Object::Commit(c) => c,
            other => panic!("expected commit, got {}", other.type_name()),
        };
        let mut ids: HashSet<panproto_vcs::ObjectId> = HashSet::new();
        panproto_vcs::walk_tree(&store, &commit.schema_id, |_, f| {
            ids.insert(panproto_vcs::hash::hash_file_schema(f).unwrap());
            Ok(())
        })
        .unwrap();
        per_commit.push(ids);
    }

    assert_eq!(per_commit[0].len(), 5);
    assert_eq!(per_commit[1].len(), 5);
    assert_eq!(per_commit[2].len(), 5);

    // Commit 2 shares 4 of 5 with commit 1 (only a.py changed).
    // Commit 3 shares 4 of 5 with commit 2 (only b.py changed).
    // Commit 3 shares 3 of 5 with commit 1 (both a.py and b.py differ).
    let shared_01 = per_commit[0].intersection(&per_commit[1]).count();
    let shared_12 = per_commit[1].intersection(&per_commit[2]).count();
    let shared_02 = per_commit[0].intersection(&per_commit[2]).count();
    assert_eq!(
        shared_01, 4,
        "commit 2 must reuse 4 of 5 FileSchemas from commit 1"
    );
    assert_eq!(
        shared_12, 4,
        "commit 3 must reuse 4 of 5 FileSchemas from commit 2"
    );
    assert_eq!(
        shared_02, 3,
        "commit 3 shares the 3 files untouched since commit 1"
    );

    // Total distinct FileSchemas across the three commits: 5 + 2 = 7.
    let mut union: HashSet<panproto_vcs::ObjectId> = HashSet::new();
    for set in &per_commit {
        union.extend(set.iter().copied());
    }
    assert_eq!(union.len(), 7);
}

#[test]
fn blob_cache_key_is_protocol_aware() {
    // Two files with byte-identical content but different extensions
    // must produce two distinct FileSchema ObjectIds: the `.py` file
    // parses through the python protocol, the `.txt` file falls back
    // to raw_file, so the per-file schemas differ.
    let dir = tempfile::tempdir().unwrap();
    let repo = git2::Repository::init(dir.path()).unwrap();
    let sig = git2::Signature::new("Dev", "dev@test.com", &git2::Time::new(1000, 0)).unwrap();

    let content = b"x = 1\n";
    std::fs::write(dir.path().join("a.py"), content).unwrap();
    std::fs::write(dir.path().join("a.txt"), content).unwrap();
    let mut index = repo.index().unwrap();
    index.add_path(Path::new("a.py")).unwrap();
    index.add_path(Path::new("a.txt")).unwrap();
    index.write().unwrap();
    let tree_oid = index.write_tree().unwrap();
    let tree = repo.find_tree(tree_oid).unwrap();
    repo.commit(Some("HEAD"), &sig, &sig, "mixed", &tree, &[])
        .unwrap();

    let mut store = MemStore::new();
    let mut cache = BlobSchemaCache::default();
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        rustc_hash::FxHashMap::default();
    let result = import_git_repo_with_cache(&repo, &mut store, "HEAD", &known, &mut cache).unwrap();
    assert_eq!(result.commit_count, 1);

    // Two distinct (blob, protocol) pairs in the cache even though
    // there is only one blob OID: one for python, one for raw_file.
    let commit = match store.get(&result.head_id).unwrap() {
        panproto_vcs::Object::Commit(c) => c,
        other => panic!("expected commit, got {}", other.type_name()),
    };
    let mut file_ids: Vec<panproto_vcs::ObjectId> = Vec::new();
    panproto_vcs::walk_tree(&store, &commit.schema_id, |_, f| {
        file_ids.push(panproto_vcs::hash::hash_file_schema(f).unwrap());
        Ok(())
    })
    .unwrap();
    assert_eq!(file_ids.len(), 2);
    assert_ne!(
        file_ids[0], file_ids[1],
        "identical bytes under different protocols must produce distinct FileSchema ids"
    );

    // Cache holds one entry per (blob, protocol). Same blob OID, two
    // protocol keys: at least two slots (possibly three if detection
    // and parse disagreed and recorded both).
    let python_count = cache.keys().filter(|(_, proto)| proto == "python").count();
    let raw_count = cache
        .keys()
        .filter(|(_, proto)| proto == "raw_file")
        .count();
    assert!(python_count >= 1, "cache must key python protocol");
    assert!(raw_count >= 1, "cache must key raw_file protocol");
}

#[test]
fn blob_cache_missing_is_empty() {
    let cache_dir = tempfile::tempdir().unwrap();
    let cache = load_blob_cache(&cache_dir.path().join("missing")).unwrap();
    assert!(cache.is_empty());
}

#[test]
fn blob_cache_dedupes_unchanged_files_across_commits() {
    let (_dir, repo, _oids) = create_dedup_history();
    let mut store = MemStore::new();
    let mut cache = BlobSchemaCache::default();
    let known: rustc_hash::FxHashMap<git2::Oid, panproto_vcs::ObjectId> =
        rustc_hash::FxHashMap::default();

    let result = import_git_repo_with_cache(&repo, &mut store, "HEAD", &known, &mut cache).unwrap();
    assert_eq!(result.commit_count, 3);

    // After importing all three commits, the blob cache should contain
    // exactly seven unique blob OIDs: five initial blobs plus two new
    // blobs for the modified a.py and b.py. Four of the initial five
    // are fully deduped (c.py, d.py, e.py carry across; a.py and b.py
    // each have an extra version).
    assert_eq!(
        cache.len(),
        7,
        "expected 7 distinct blob OIDs (5 initial + 2 modifications)"
    );

    // Every commit must point at a SchemaTree, not a flat Schema.
    for (_, panproto_id) in &result.oid_map {
        match store.get(panproto_id).unwrap() {
            panproto_vcs::Object::Commit(c) => match store.get(&c.schema_id).unwrap() {
                panproto_vcs::Object::SchemaTree(_) => {}
                other => panic!(
                    "expected commit schema_id to point at schema_tree, got {}",
                    other.type_name()
                ),
            },
            other => panic!("expected commit, got {}", other.type_name()),
        }
    }
}