use std::path::Path;
use ahash::AHashMap;
use rayon::prelude::*;
use super::{CommitMeta, GitHistoryError, GitHistoryIndex, encoding, keys};
use crate::git::Repo;
use crate::path::RelPath;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RebuildOutcome {
Fresh,
Incremental { added: u32 },
FullRebuild { reason: &'static str, commits: u32 },
}
pub fn sync(
index: &GitHistoryIndex,
repo: &Repo,
basemind_dir: &Path,
) -> Result<RebuildOutcome, GitHistoryError> {
let head = match repo.resolve_rev("HEAD") {
Ok(h) => h,
Err(_) => return Ok(RebuildOutcome::Fresh), };
if index.last_indexed_head_hex().as_deref() == Some(head.as_str()) {
return Ok(RebuildOutcome::Fresh);
}
if let Some(last_hex) = index.last_indexed_head_hex()
&& repo.has_commit(&last_hex)
&& repo.is_ancestor(&last_hex, &head)
&& fingerprint_ok(index, repo, &head)
{
return append_since(index, repo, &last_hex, &head);
}
let reason = if index.is_empty() {
"initial"
} else {
"history-rewrite"
};
index.clear(basemind_dir)?;
rebuild(index, repo, reason)
}
fn fingerprint_ok(index: &GitHistoryIndex, repo: &Repo, head: &str) -> bool {
match index.root_sha() {
None => false,
Some(root_raw) => {
let root_hex = keys::sha_raw_to_hex(&root_raw);
repo.has_commit(&root_hex) && repo.is_ancestor(&root_hex, head)
}
}
}
fn rebuild(
index: &GitHistoryIndex,
repo: &Repo,
reason: &'static str,
) -> Result<RebuildOutcome, GitHistoryError> {
let newest_first = repo.all_commit_shas()?;
if newest_first.is_empty() {
return Ok(RebuildOutcome::FullRebuild { reason, commits: 0 });
}
let head20 = keys::sha_hex_to_raw(&newest_first[0]);
let root20 = keys::sha_hex_to_raw(&newest_first[newest_first.len() - 1]);
let (Some(head20), Some(root20)) = (head20, root20) else {
return Ok(RebuildOutcome::FullRebuild { reason, commits: 0 });
};
let chrono: Vec<&String> = newest_first.iter().rev().collect();
let total = chrono.len() as u32;
let mut interner = PathInterner::new(index, 0);
let mut postings: AHashMap<u32, Vec<u32>> = AHashMap::new();
let mut writer = index.writer();
let written = fold_chunked(
index,
repo,
&chrono,
0,
false,
&mut interner,
&mut postings,
&mut writer,
)?;
for (path_id, ords) in postings {
writer.put_posting(path_id, &encoding::encode_ords(&ords))?;
}
writer.finish_meta(&head20, &root20, total, interner.next_path_id, written)?;
Ok(RebuildOutcome::FullRebuild {
reason,
commits: written,
})
}
fn append_since(
index: &GitHistoryIndex,
repo: &Repo,
last_hex: &str,
head: &str,
) -> Result<RebuildOutcome, GitHistoryError> {
let new_newest_first = repo.new_commit_shas(last_hex)?;
let Some(head20) = keys::sha_hex_to_raw(head) else {
return Ok(RebuildOutcome::Fresh);
};
let root20 = index.root_sha().unwrap_or(head20);
let start_ord = index.next_ord();
if new_newest_first.is_empty() {
let writer = index.writer();
writer.finish_meta(
&head20,
&root20,
start_ord,
index.next_path_id(),
index.commit_count(),
)?;
return Ok(RebuildOutcome::Incremental { added: 0 });
}
let chrono: Vec<&String> = new_newest_first.iter().rev().collect();
let mut interner = PathInterner::new(index, index.next_path_id());
let mut postings: AHashMap<u32, Vec<u32>> = AHashMap::new();
let mut writer = index.writer();
let added = fold_chunked(
index,
repo,
&chrono,
start_ord,
true,
&mut interner,
&mut postings,
&mut writer,
)?;
for (path_id, new_ords) in postings {
let mut all = index
.posting_bytes(path_id)
.map(|b| encoding::decode_ords(&b))
.unwrap_or_default();
all.extend(new_ords);
writer.put_posting(path_id, &encoding::encode_ords(&all))?;
}
let next_ord = start_ord + chrono.len() as u32;
writer.finish_meta(
&head20,
&root20,
next_ord,
interner.next_path_id,
index.commit_count() + added,
)?;
Ok(RebuildOutcome::Incremental { added })
}
const RECORD_CHUNK: usize = 8192;
#[allow(clippy::too_many_arguments)] fn fold_chunked(
index: &GitHistoryIndex,
repo: &Repo,
chrono: &[&String],
start_ord: u32,
dedup: bool,
interner: &mut PathInterner,
postings: &mut AHashMap<u32, Vec<u32>>,
writer: &mut super::GitHistoryWriter,
) -> Result<u32, GitHistoryError> {
let mut written = 0u32;
for (chunk_index, chunk) in chrono.chunks(RECORD_CHUNK).enumerate() {
let records = compute_records(repo, chunk);
let base = chunk_index * RECORD_CHUNK;
for (offset, record) in records.into_iter().enumerate() {
let ord = start_ord + (base + offset) as u32;
let Some(record) = record else { continue };
let Some(sha20) = keys::sha_hex_to_raw(&record.sha) else {
continue;
};
if dedup && index.ord_for_sha(&sha20).is_some() {
continue; }
let files = intern_files(interner, postings, ord, &record.files, writer)?;
let meta = CommitMeta {
sha: record.sha,
summary: record.summary,
author: record.author,
author_time_unix: record.author_time_unix,
files,
};
writer.put_commit_meta(ord, &meta)?;
writer.put_ord_for_sha(&sha20, ord)?;
written += 1;
}
}
Ok(written)
}
fn compute_records(repo: &Repo, chrono: &[&String]) -> Vec<Option<crate::git::CommitInfo>> {
chrono
.par_iter()
.map(|sha| repo.commit_record(sha))
.collect()
}
fn intern_files(
interner: &mut PathInterner,
postings: &mut AHashMap<u32, Vec<u32>>,
ord: u32,
files: &[(RelPath, crate::git::ChangeKind)],
writer: &mut super::GitHistoryWriter,
) -> Result<Vec<(u32, u8)>, GitHistoryError> {
let mut out = Vec::with_capacity(files.len());
for (rel, kind) in files {
let path_id = interner.intern(rel, writer)?;
out.push((path_id, keys::change_kind_byte(*kind)));
postings.entry(path_id).or_default().push(ord);
}
Ok(out)
}
struct PathInterner<'a> {
index: &'a GitHistoryIndex,
cache: AHashMap<RelPath, u32>,
next_path_id: u32,
}
impl<'a> PathInterner<'a> {
fn new(index: &'a GitHistoryIndex, next_path_id: u32) -> Self {
Self {
index,
cache: AHashMap::new(),
next_path_id,
}
}
fn intern(
&mut self,
rel: &RelPath,
writer: &mut super::GitHistoryWriter,
) -> Result<u32, GitHistoryError> {
if let Some(&id) = self.cache.get(rel) {
return Ok(id);
}
if let Some(id) = self.index.path_id(rel) {
self.cache.insert(rel.clone(), id);
return Ok(id);
}
let id = self.next_path_id;
self.next_path_id += 1;
self.cache.insert(rel.clone(), id);
writer.put_path(rel, id)?;
Ok(id)
}
}