mod commit;
mod remote;
use commit::{build_commit_info, commit_files, commit_touches_path, compute_hunks, decode_path};
pub use remote::{normalize_remote_url, scope_key};
use std::path::{Path, PathBuf};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum GitError {
#[error("not inside a git repository (starting from {0})")]
NotARepo(PathBuf),
#[error("git discover error: {0}")]
Discover(String),
#[error("rev parse failed for {rev}: {msg}")]
RevParse { rev: String, msg: String },
#[error("git error reading {what}: {msg}")]
Read { what: String, msg: String },
#[error("blame skipped: {path} is too large ({bytes} bytes, {lines} lines)")]
BlameTooLarge {
path: crate::path::RelPath,
bytes: u64,
lines: u64,
},
#[error("git error: {0}")]
Other(String),
}
pub struct Repo {
inner: gix::ThreadSafeRepository,
workdir: PathBuf,
is_shallow: bool,
}
#[derive(Debug, Default, Clone)]
pub struct WorkingTreeStatus {
pub staged_added: Vec<crate::path::RelPath>,
pub staged_modified: Vec<crate::path::RelPath>,
pub staged_deleted: Vec<crate::path::RelPath>,
pub modified: Vec<crate::path::RelPath>,
pub untracked: Vec<crate::path::RelPath>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ChangeKind {
Added,
Modified,
Deleted,
Renamed,
}
impl ChangeKind {
pub fn as_str(self) -> &'static str {
match self {
ChangeKind::Added => "added",
ChangeKind::Modified => "modified",
ChangeKind::Deleted => "deleted",
ChangeKind::Renamed => "renamed",
}
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct CommitInfo {
pub sha: String,
pub short_sha: String,
pub summary: String,
pub author: String,
pub author_time_unix: i64,
pub files: Vec<(crate::path::RelPath, ChangeKind)>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Hunk {
pub kind: HunkKind,
pub old_line_start: u32,
pub old_line_count: u32,
pub new_line_start: u32,
pub new_line_count: u32,
pub text: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum HunkKind {
Added,
Removed,
Modified,
}
impl HunkKind {
pub fn as_str(self) -> &'static str {
match self {
HunkKind::Added => "added",
HunkKind::Removed => "removed",
HunkKind::Modified => "modified",
}
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct BlameHunk {
pub commit_sha: String,
pub short_sha: String,
pub start_line: u32,
pub len: u32,
pub source_start_line: u32,
pub author: String,
pub author_time_unix: i64,
pub summary: String,
pub source_path: Option<crate::path::RelPath>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct BlameResult {
pub path: crate::path::RelPath,
pub suspect_sha: String,
pub hunks: Vec<BlameHunk>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub truncated_reason: Option<String>,
}
#[derive(Debug, Clone)]
pub struct RepoInfo {
pub workdir: PathBuf,
pub head_sha: Option<String>,
pub head_short_sha: Option<String>,
pub branch: Option<String>,
}
impl Repo {
pub fn discover(start: &Path) -> Result<Self, GitError> {
let inner = gix::ThreadSafeRepository::discover(start)
.map_err(|_| GitError::NotARepo(start.to_path_buf()))?;
let workdir = inner
.work_dir()
.ok_or_else(|| GitError::Other("bare repositories are not supported".to_string()))?
.to_path_buf();
let is_shallow = inner.path().join("shallow").exists();
Ok(Self {
inner,
workdir,
is_shallow,
})
}
pub fn workdir(&self) -> &Path {
&self.workdir
}
pub fn is_shallow(&self) -> bool {
self.is_shallow
}
pub(super) fn local(&self) -> gix::Repository {
self.inner.to_thread_local()
}
pub fn resolve_rev(&self, rev: &str) -> Result<String, GitError> {
let local = self.local();
let id = local
.rev_parse_single(rev)
.map_err(|e| GitError::RevParse {
rev: rev.to_string(),
msg: e.to_string(),
})?;
Ok(id.to_string())
}
pub fn list_paths_staged(&self) -> Result<Vec<String>, GitError> {
let local = self.local();
let index = local.index().map_err(|e| GitError::Read {
what: "git index".to_string(),
msg: e.to_string(),
})?;
let mut out = Vec::with_capacity(index.entries().len());
for entry in index.entries() {
let path = entry.path(&index);
if let Ok(s) = std::str::from_utf8(path) {
out.push(s.to_string());
}
}
Ok(out)
}
pub fn list_paths_rev(&self, rev_sha: &str) -> Result<Vec<String>, GitError> {
let local = self.local();
let tree = resolve_tree(&local, rev_sha)?;
let recorder = tree
.traverse()
.breadthfirst
.files()
.map_err(|e| GitError::Read {
what: format!("tree {rev_sha}"),
msg: e.to_string(),
})?;
let mut out = Vec::with_capacity(recorder.len());
for entry in recorder {
if !entry.mode.is_blob_or_symlink() {
continue;
}
if let Ok(s) = std::str::from_utf8(&entry.filepath) {
out.push(s.to_string());
}
}
Ok(out)
}
pub fn submodule_paths(&self) -> Vec<crate::path::RelPath> {
let local = self.local();
let iter = match local.submodules() {
Ok(Some(it)) => it,
_ => return Vec::new(),
};
let mut out = Vec::new();
for sm in iter {
let path = match sm.path() {
Ok(cow) => {
let bstr: &gix::bstr::BStr = &cow;
crate::path::RelPath::from(<gix::bstr::BStr as AsRef<[u8]>>::as_ref(bstr))
}
Err(_) => continue,
};
if !path.is_empty() {
out.push(path);
}
}
out.sort();
out.dedup();
out
}
pub fn read_blob_staged(&self, rel: &str) -> Result<Option<Vec<u8>>, GitError> {
let local = self.local();
let index = local.index().map_err(|e| GitError::Read {
what: "git index".to_string(),
msg: e.to_string(),
})?;
let needle = rel.as_bytes();
for entry in index.entries() {
if entry.path(&index) == needle {
let object = local.find_object(entry.id).map_err(|e| GitError::Read {
what: format!("blob {rel}"),
msg: e.to_string(),
})?;
return Ok(Some(object.data.clone()));
}
}
Ok(None)
}
pub fn read_blob_at_rev(
&self,
rev_sha: &str,
rel: impl AsRef<Path>,
) -> Result<Option<Vec<u8>>, GitError> {
Ok(self
.read_blob_at_rev_with_oid(rev_sha, rel)?
.map(|(bytes, _)| bytes))
}
pub fn read_blob_at_rev_with_oid(
&self,
rev_sha: &str,
rel: impl AsRef<Path>,
) -> Result<Option<(Vec<u8>, gix::ObjectId)>, GitError> {
let local = self.local();
let tree = resolve_tree(&local, rev_sha)?;
let rel_ref = rel.as_ref();
let rel_display = rel_ref.display();
let entry = match tree
.lookup_entry_by_path(rel_ref)
.map_err(|e| GitError::Read {
what: format!("{rev_sha}:{rel_display}"),
msg: e.to_string(),
})? {
Some(e) => e,
None => return Ok(None),
};
let oid = entry.object_id();
let object = entry.object().map_err(|e| GitError::Read {
what: format!("{rev_sha}:{rel_display}"),
msg: e.to_string(),
})?;
let blob = object.try_into_blob().map_err(|e| GitError::Read {
what: format!("{rev_sha}:{rel_display}"),
msg: format!("not a blob: {e}"),
})?;
Ok(Some((blob.data.clone(), oid)))
}
pub fn status_porcelain(&self) -> Result<WorkingTreeStatus, GitError> {
let local = self.local();
let mut out = WorkingTreeStatus::default();
let platform = local
.status(gix::progress::Discard)
.map_err(|e| GitError::Read {
what: "status".to_string(),
msg: e.to_string(),
})?;
let iter = platform.into_iter(None).map_err(|e| GitError::Read {
what: "status".to_string(),
msg: e.to_string(),
})?;
for item in iter {
let item = match item {
Ok(i) => i,
Err(_) => continue,
};
classify_status_item(&item, &mut out);
}
Ok(out)
}
pub fn log_paths(
&self,
max_commits: usize,
include_files: bool,
) -> Result<Vec<CommitInfo>, GitError> {
let local = self.local();
let head = local.head_id().map_err(|e| GitError::Read {
what: "HEAD".to_string(),
msg: e.to_string(),
})?;
let walk = local
.rev_walk([head])
.sorting(gix::revision::walk::Sorting::ByCommitTime(
gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
))
.all()
.map_err(|e| GitError::Read {
what: "rev walk".to_string(),
msg: e.to_string(),
})?;
let mut out = Vec::with_capacity(max_commits.min(1024));
for info in walk.take(max_commits) {
let Ok(info) = info else { continue };
if let Some(ci) = build_commit_info(&local, info.id, include_files) {
out.push(ci);
}
}
Ok(out)
}
pub fn log_for_path(
&self,
rel: impl AsRef<Path>,
max_commits: usize,
) -> Result<Vec<CommitInfo>, GitError> {
let rel_bytes_buf: bstr::BString =
bstr::BString::from(rel.as_ref().as_os_str().as_encoded_bytes());
let rel: &[u8] = rel_bytes_buf.as_slice();
let local = self.local();
let head = local.head_id().map_err(|e| GitError::Read {
what: "HEAD".to_string(),
msg: e.to_string(),
})?;
let walk = local
.rev_walk([head])
.sorting(gix::revision::walk::Sorting::ByCommitTime(
gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
))
.all()
.map_err(|e| GitError::Read {
what: "rev walk".to_string(),
msg: e.to_string(),
})?;
let mut out = Vec::new();
for info in walk {
if out.len() >= max_commits {
break;
}
let Ok(info) = info else { continue };
if !commit_touches_path(&local, info.id, rel) {
continue;
}
if let Some(ci) = build_commit_info(&local, info.id, false) {
out.push(ci);
}
}
Ok(out)
}
pub fn blame_file(
&self,
suspect_sha: &str,
path: &crate::path::RelPath,
line_range: Option<(u32, u32)>,
) -> Result<BlameResult, GitError> {
use gix::bstr::BStr;
let local = self.local();
let (size_bytes, line_count) =
blob_size_and_line_count(&local, suspect_sha, path).unwrap_or((0, 0));
let max_bytes = blame_max_bytes_from_env();
let max_lines = blame_max_lines_from_env();
if size_bytes > max_bytes || line_count > max_lines {
return Err(GitError::BlameTooLarge {
path: path.clone(),
bytes: size_bytes,
lines: line_count,
});
}
let suspect = local
.rev_parse_single(suspect_sha)
.map_err(|e| GitError::RevParse {
rev: suspect_sha.to_string(),
msg: e.to_string(),
})?
.detach();
let mut options = gix::repository::blame_file::Options {
diff_algorithm: Some(gix::diff::blob::Algorithm::Histogram),
ranges: gix::blame::BlameRanges::default(),
since: None,
rewrites: Some(gix::diff::Rewrites::default()),
};
if let Some((lo, hi)) = line_range {
options.ranges = gix::blame::BlameRanges::from_one_based_inclusive_range(lo..=hi)
.map_err(|e| GitError::Other(format!("invalid blame range {lo}..={hi}: {e}")))?;
}
let outcome = match local.blame_file(BStr::new(path.as_bytes()), suspect, options) {
Ok(o) => o,
Err(e) => {
if self.is_shallow && looks_like_shallow_blame_error(&e) {
return Ok(BlameResult {
path: path.clone(),
suspect_sha: suspect_sha.to_string(),
hunks: Vec::new(),
truncated_reason: Some("shallow_clone".to_string()),
});
}
return Err(GitError::Read {
what: format!("blame {suspect_sha}:{path}"),
msg: e.to_string(),
});
}
};
let mut author_cache: ahash::AHashMap<gix::ObjectId, (String, i64, String)> =
ahash::AHashMap::new();
let mut hunks = Vec::with_capacity(outcome.entries.len());
for entry in &outcome.entries {
let (author, time, summary) = author_cache
.entry(entry.commit_id)
.or_insert_with(|| {
let commit = match local.find_commit(entry.commit_id) {
Ok(c) => c,
Err(_) => return ("?".to_string(), 0, String::new()),
};
let author = commit
.author()
.ok()
.and_then(|a| std::str::from_utf8(a.name).ok().map(|s| s.to_string()))
.unwrap_or_else(|| "?".to_string());
let time = commit
.author()
.ok()
.and_then(|a| a.time().ok())
.map(|t| t.seconds)
.unwrap_or(0);
let summary = commit
.message()
.ok()
.map(|m| m.summary().to_string())
.unwrap_or_default();
(author, time, summary)
})
.clone();
let sha = entry.commit_id.to_string();
let short_sha = sha[..7.min(sha.len())].to_string();
let source_path = entry
.source_file_name
.as_ref()
.map(|b| crate::path::RelPath::from(b.as_slice()));
hunks.push(BlameHunk {
commit_sha: sha,
short_sha,
start_line: entry.start_in_blamed_file + 1,
len: entry.len.get(),
source_start_line: entry.start_in_source_file + 1,
author,
author_time_unix: time,
summary,
source_path,
});
}
hunks.sort_by_key(|h| h.start_line);
Ok(BlameResult {
path: path.clone(),
suspect_sha: suspect_sha.to_string(),
hunks,
truncated_reason: None,
})
}
pub fn diff_file(
&self,
rev_old: &str,
rev_new: &str,
path: impl AsRef<Path>,
) -> Result<Option<(Vec<Hunk>, bool, bool)>, GitError> {
let path_ref = path.as_ref();
let old_bytes = self.read_blob_at_rev(rev_old, path_ref)?;
let new_bytes = self.read_blob_at_rev(rev_new, path_ref)?;
if old_bytes.is_none() && new_bytes.is_none() {
return Ok(None);
}
let old_buf = old_bytes.clone().unwrap_or_default();
let new_buf = new_bytes.clone().unwrap_or_default();
let hunks = compute_hunks(&old_buf, &new_buf);
Ok(Some((hunks, old_bytes.is_some(), new_bytes.is_some())))
}
pub fn commit_files_uncached(
&self,
commit_sha: &str,
) -> Result<Vec<(crate::path::RelPath, ChangeKind)>, GitError> {
let local = self.local();
let id = local
.rev_parse_single(commit_sha)
.map_err(|e| GitError::RevParse {
rev: commit_sha.to_string(),
msg: e.to_string(),
})?
.detach();
Ok(commit_files(&local, id).unwrap_or_default())
}
pub fn info(&self) -> Result<RepoInfo, GitError> {
let local = self.local();
let head_id = local.head_id().ok();
let head_sha = head_id.as_ref().map(|id| id.to_string());
let head_short_sha = head_sha.as_ref().map(|s| s[..7.min(s.len())].to_string());
let branch = local
.head_name()
.ok()
.flatten()
.and_then(|n| std::str::from_utf8(n.shorten()).ok().map(|s| s.to_string()));
Ok(RepoInfo {
workdir: self.workdir.clone(),
head_sha,
head_short_sha,
branch,
})
}
}
const BLAME_DEFAULT_MAX_BYTES: u64 = 1 << 20;
const BLAME_DEFAULT_MAX_LINES: u64 = 5_000;
fn blame_max_bytes_from_env() -> u64 {
std::env::var("BASEMIND_BLAME_MAX_BYTES")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(BLAME_DEFAULT_MAX_BYTES)
}
fn blame_max_lines_from_env() -> u64 {
std::env::var("BASEMIND_BLAME_MAX_LINES")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(BLAME_DEFAULT_MAX_LINES)
}
fn blob_size_and_line_count(
local: &gix::Repository,
rev: &str,
path: &crate::path::RelPath,
) -> Option<(u64, u64)> {
let id = local.rev_parse_single(rev).ok()?.detach();
let tree = local.find_commit(id).ok()?.tree().ok()?;
let entry = tree.lookup_entry_by_path(path).ok()??;
let obj = entry.object().ok()?;
if obj.kind != gix::object::Kind::Blob {
return None;
}
let data = &obj.data;
let bytes = data.len() as u64;
let nls = memchr::memchr_iter(b'\n', data).count() as u64;
let lines = if bytes == 0 {
0
} else if data.last() == Some(&b'\n') {
nls
} else {
nls + 1
};
Some((bytes, lines))
}
fn looks_like_shallow_blame_error<E: std::fmt::Display>(err: &E) -> bool {
let msg = err.to_string();
msg.contains("Could not find existing iterator over a tree")
|| msg.contains("Could not find commit")
|| msg.contains("shallow")
}
fn resolve_tree<'r>(local: &'r gix::Repository, rev_sha: &str) -> Result<gix::Tree<'r>, GitError> {
let object = local
.rev_parse_single(rev_sha)
.map_err(|e| GitError::RevParse {
rev: rev_sha.to_string(),
msg: e.to_string(),
})?
.object()
.map_err(|e| GitError::Read {
what: rev_sha.to_string(),
msg: e.to_string(),
})?;
let kind = object.kind;
match kind {
gix::object::Kind::Commit => object.try_into_commit().ok().and_then(|c| c.tree().ok()),
gix::object::Kind::Tree => object.try_into_tree().ok(),
_ => None,
}
.ok_or_else(|| GitError::Read {
what: rev_sha.to_string(),
msg: "not a commit or tree".to_string(),
})
}
fn classify_status_item(item: &gix::status::Item, out: &mut WorkingTreeStatus) {
use gix::status::Item;
match item {
Item::IndexWorktree(iw) => classify_index_worktree(iw, out),
Item::TreeIndex(ti) => classify_tree_index(ti, out),
}
}
fn classify_index_worktree(item: &gix::status::index_worktree::Item, out: &mut WorkingTreeStatus) {
use gix::status::index_worktree::Item as I;
match item {
I::Modification { rela_path, .. } => {
out.modified
.push(crate::path::RelPath::from(rela_path.as_slice()));
}
I::DirectoryContents { entry, .. } => {
out.untracked
.push(crate::path::RelPath::from(entry.rela_path.as_slice()));
}
I::Rewrite {
source,
dirwalk_entry,
..
} => {
out.modified.push(crate::path::RelPath::from(
dirwalk_entry.rela_path.as_slice(),
));
let _ = source;
}
}
}
fn classify_tree_index(change: &gix::diff::index::Change, out: &mut WorkingTreeStatus) {
use gix::diff::index::Change as C;
match change {
C::Addition { location, .. } => {
if let Some(p) = decode_path(location) {
out.staged_added.push(p);
}
}
C::Deletion { location, .. } => {
if let Some(p) = decode_path(location) {
out.staged_deleted.push(p);
}
}
C::Modification { location, .. } => {
if let Some(p) = decode_path(location) {
out.staged_modified.push(p);
}
}
C::Rewrite { location, .. } => {
if let Some(p) = decode_path(location) {
out.staged_modified.push(p);
}
}
}
}