pub mod builder;
pub mod encoding;
pub mod keys;
pub mod reader;
use std::path::{Path, PathBuf};
use fjall::{Database, Keyspace, KeyspaceCreateOptions, OwnedWriteBatch};
use thiserror::Error;
use crate::path::RelPath;
pub const GIT_HISTORY_SCHEMA: u32 = crate::version::RELEASE_MINOR as u32 + 4;
const GIT_HISTORY_DIR: &str = "git-history.fjall";
pub fn index_enabled() -> bool {
std::env::var("BASEMIND_GH_INDEX")
.map(|v| v != "0")
.unwrap_or(true)
}
#[derive(Debug, Error)]
pub enum GitHistoryError {
#[error("fjall error: {0}")]
Fjall(#[from] fjall::Error),
#[error("io error on {path}: {source}")]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("msgpack encode error: {0}")]
Encode(#[from] rmp_serde::encode::Error),
#[error("msgpack decode error: {0}")]
Decode(#[from] rmp_serde::decode::Error),
#[error("git error: {0}")]
Git(#[from] crate::git::GitError),
}
#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct CommitMeta {
pub sha: String,
pub summary: String,
pub author: String,
pub author_time_unix: i64,
pub files: Vec<(u32, u8)>,
}
#[derive(Clone)]
pub struct GitHistoryIndex {
db: Database,
meta: Keyspace,
commit_by_ord: Keyspace,
ord_by_sha: Keyspace,
path_id_by_path: Keyspace,
path_by_id: Keyspace,
path_to_ords: Keyspace,
}
impl GitHistoryIndex {
pub fn open(basemind_dir: &Path) -> Result<Self, GitHistoryError> {
let dir = basemind_dir.join(GIT_HISTORY_DIR);
let needs_wipe = matches!(peek_schema(&dir), Some(ver) if ver != GIT_HISTORY_SCHEMA);
if needs_wipe && dir.exists() {
std::fs::remove_dir_all(&dir).map_err(|source| GitHistoryError::Io {
path: dir.clone(),
source,
})?;
}
Self::open_at(&dir)
}
fn open_at(dir: &Path) -> Result<Self, GitHistoryError> {
std::fs::create_dir_all(dir).map_err(|source| GitHistoryError::Io {
path: dir.to_path_buf(),
source,
})?;
let db = Database::builder(dir).open()?;
let meta = db.keyspace("gh_meta", KeyspaceCreateOptions::default)?;
let commit_by_ord = db.keyspace("gh_commit_by_ord", KeyspaceCreateOptions::default)?;
let ord_by_sha = db.keyspace("gh_ord_by_sha", KeyspaceCreateOptions::default)?;
let path_id_by_path = db.keyspace("gh_path_id_by_path", KeyspaceCreateOptions::default)?;
let path_by_id = db.keyspace("gh_path_by_id", KeyspaceCreateOptions::default)?;
let path_to_ords = db.keyspace("gh_path_to_ords", KeyspaceCreateOptions::default)?;
meta.insert(keys::META_SCHEMA_VER, GIT_HISTORY_SCHEMA.to_be_bytes())?;
Ok(Self {
db,
meta,
commit_by_ord,
ord_by_sha,
path_id_by_path,
path_by_id,
path_to_ords,
})
}
pub fn clear(&self, basemind_dir: &Path) -> Result<(), GitHistoryError> {
let dir = basemind_dir.join(GIT_HISTORY_DIR);
for ks in [
&self.commit_by_ord,
&self.ord_by_sha,
&self.path_id_by_path,
&self.path_by_id,
&self.path_to_ords,
&self.meta,
] {
let keys: Vec<_> = ks
.iter()
.filter_map(|g| g.into_inner().ok().map(|(k, _)| k))
.collect();
for k in keys {
ks.remove(k)?;
}
}
let _ = dir; self.meta
.insert(keys::META_SCHEMA_VER, GIT_HISTORY_SCHEMA.to_be_bytes())?;
Ok(())
}
fn compact(&self) -> Result<(), GitHistoryError> {
for keyspace in [
&self.meta,
&self.commit_by_ord,
&self.ord_by_sha,
&self.path_id_by_path,
&self.path_by_id,
&self.path_to_ords,
] {
keyspace.rotate_memtable_and_wait()?;
keyspace.major_compact()?;
}
Ok(())
}
pub fn writer(&self) -> GitHistoryWriter {
GitHistoryWriter {
index: self.clone(),
batch: self.db.batch(),
staged: 0,
}
}
fn meta_u32(&self, key: &[u8]) -> u32 {
self.meta
.get(key)
.ok()
.flatten()
.and_then(|b| keys::parse_u32(&b))
.unwrap_or(0)
}
fn meta_sha(&self, key: &[u8]) -> Option<[u8; 20]> {
let bytes = self.meta.get(key).ok().flatten()?;
<[u8; 20]>::try_from(bytes.as_ref()).ok()
}
pub fn last_indexed_head(&self) -> Option<[u8; 20]> {
self.meta_sha(keys::META_LAST_HEAD)
}
pub fn last_indexed_head_hex(&self) -> Option<String> {
self.last_indexed_head().map(|s| keys::sha_raw_to_hex(&s))
}
pub fn next_ord(&self) -> u32 {
self.meta_u32(keys::META_NEXT_ORD)
}
pub fn next_path_id(&self) -> u32 {
self.meta_u32(keys::META_NEXT_PATH_ID)
}
pub fn root_sha(&self) -> Option<[u8; 20]> {
self.meta_sha(keys::META_ROOT_SHA)
}
pub fn commit_count(&self) -> u32 {
self.meta_u32(keys::META_COMMIT_COUNT)
}
pub fn is_empty(&self) -> bool {
self.last_indexed_head().is_none()
}
pub(crate) fn commit_meta(&self, ord: u32, want_files: bool) -> Option<CommitMeta> {
let bytes = self.commit_by_ord.get(keys::u32_key(ord)).ok().flatten()?;
decode_commit_value(&bytes, want_files)
}
pub(crate) fn ord_for_sha(&self, sha20: &[u8; 20]) -> Option<u32> {
let bytes = self.ord_by_sha.get(sha20).ok().flatten()?;
keys::parse_u32(&bytes)
}
pub(crate) fn path_id(&self, rel: &RelPath) -> Option<u32> {
let key = keys::path_id_by_path_key(rel)?;
let bytes = self.path_id_by_path.get(&key).ok().flatten()?;
keys::parse_u32(&bytes)
}
pub(crate) fn path_for_id(&self, path_id: u32) -> Option<RelPath> {
let bytes = self.path_by_id.get(keys::u32_key(path_id)).ok().flatten()?;
Some(RelPath::from(bytes.as_ref()))
}
pub(crate) fn posting_bytes(&self, path_id: u32) -> Option<fjall::Slice> {
self.path_to_ords.get(keys::u32_key(path_id)).ok().flatten()
}
pub(crate) fn commits_desc(
&self,
want_files: bool,
) -> impl Iterator<Item = (u32, CommitMeta)> + '_ {
self.commit_by_ord.iter().rev().filter_map(move |g| {
let (k, v) = g.into_inner().ok()?;
let ord = keys::parse_u32(&k)?;
let meta = decode_commit_value(&v, want_files)?;
Some((ord, meta))
})
}
}
fn decode_commit_value(bytes: &[u8], want_files: bool) -> Option<CommitMeta> {
if want_files {
let decoded = encoding::decode_commit_meta(bytes)?;
Some(CommitMeta {
sha: keys::sha_raw_to_hex(&decoded.sha20),
summary: String::from_utf8_lossy(decoded.summary).into_owned(),
author: String::from_utf8_lossy(decoded.author).into_owned(),
author_time_unix: decoded.author_time_unix,
files: decoded.files,
})
} else {
let head = encoding::decode_commit_meta_head(bytes)?;
Some(CommitMeta {
sha: keys::sha_raw_to_hex(&head.sha20),
summary: String::from_utf8_lossy(head.summary).into_owned(),
author: String::from_utf8_lossy(head.author).into_owned(),
author_time_unix: head.author_time_unix,
files: Vec::new(),
})
}
}
pub struct GitHistoryWriter {
index: GitHistoryIndex,
batch: OwnedWriteBatch,
staged: usize,
}
const COMMIT_BATCH: usize = 4096;
impl GitHistoryWriter {
pub fn put_commit_meta(&mut self, ord: u32, meta: &CommitMeta) -> Result<(), GitHistoryError> {
let sha20 = keys::sha_hex_to_raw(&meta.sha).unwrap_or([0u8; 20]);
let value = encoding::encode_commit_meta(
&sha20,
meta.author_time_unix,
meta.author.as_bytes(),
meta.summary.as_bytes(),
&meta.files,
);
self.batch
.insert(&self.index.commit_by_ord, keys::u32_key(ord), value);
self.maybe_flush()
}
pub fn put_ord_for_sha(&mut self, sha20: &[u8; 20], ord: u32) -> Result<(), GitHistoryError> {
self.batch
.insert(&self.index.ord_by_sha, *sha20, keys::u32_key(ord));
self.maybe_flush()
}
pub fn put_path(&mut self, rel: &RelPath, path_id: u32) -> Result<(), GitHistoryError> {
if let Some(key) = keys::path_id_by_path_key(rel) {
self.batch
.insert(&self.index.path_id_by_path, key, keys::u32_key(path_id));
}
self.batch.insert(
&self.index.path_by_id,
keys::u32_key(path_id),
rel.as_bytes().to_vec(),
);
self.maybe_flush()
}
pub fn put_posting(&mut self, path_id: u32, encoded: &[u8]) -> Result<(), GitHistoryError> {
self.batch.insert(
&self.index.path_to_ords,
keys::u32_key(path_id),
encoded.to_vec(),
);
self.maybe_flush()
}
fn maybe_flush(&mut self) -> Result<(), GitHistoryError> {
self.staged += 1;
if self.staged >= COMMIT_BATCH {
self.flush()?;
}
Ok(())
}
fn flush(&mut self) -> Result<(), GitHistoryError> {
let batch = std::mem::replace(&mut self.batch, self.index.db.batch());
batch.commit()?;
self.staged = 0;
Ok(())
}
pub fn finish_meta(
mut self,
head: &[u8; 20],
root: &[u8; 20],
next_ord: u32,
next_path_id: u32,
commit_count: u32,
) -> Result<(), GitHistoryError> {
self.flush()?;
let mut meta = self.index.db.batch();
meta.insert(
&self.index.meta,
keys::META_NEXT_ORD,
next_ord.to_be_bytes(),
);
meta.insert(
&self.index.meta,
keys::META_NEXT_PATH_ID,
next_path_id.to_be_bytes(),
);
meta.insert(
&self.index.meta,
keys::META_COMMIT_COUNT,
commit_count.to_be_bytes(),
);
meta.insert(&self.index.meta, keys::META_ROOT_SHA, root.to_vec());
meta.insert(&self.index.meta, keys::META_LAST_HEAD, head.to_vec());
meta.commit()?;
self.index.compact()?;
Ok(())
}
}
fn peek_schema(dir: &Path) -> Option<u32> {
if !dir.exists() {
return None;
}
let db = Database::builder(dir).open().ok()?;
let meta = db
.keyspace("gh_meta", KeyspaceCreateOptions::default)
.ok()?;
let bytes = meta.get(keys::META_SCHEMA_VER).ok().flatten()?;
keys::parse_u32(&bytes)
}