use chrono::{DateTime, Duration, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
pub type AuthorId = usize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct CommitId(pub u32);
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CommitInterner {
strings: Vec<String>,
}
impl CommitInterner {
pub fn intern(&mut self, sha: &str) -> CommitId {
if let Some(pos) = self.strings.iter().position(|s| s == sha) {
CommitId(pos as u32)
} else {
let id = CommitId(self.strings.len() as u32);
self.strings.push(sha.to_string());
id
}
}
pub fn resolve(&self, id: CommitId) -> &str {
&self.strings[id.0 as usize]
}
pub fn len(&self) -> usize {
self.strings.len()
}
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum ChangeType {
Added,
Modified,
Deleted,
Renamed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileChange {
pub path: PathBuf,
pub additions: u32,
pub deletions: u32,
pub change_type: ChangeType,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Commit {
pub id: CommitId,
pub author: AuthorId,
pub timestamp: DateTime<Utc>,
pub message: String,
pub files_changed: Vec<FileChange>,
pub is_merge: bool,
pub parent_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileEntry {
pub path: PathBuf,
pub size_bytes: u64,
pub is_binary: bool,
pub depth: usize,
pub blob_oid: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Author {
pub id: AuthorId,
pub name: String,
pub email: String,
}
#[non_exhaustive]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BlameLine {
pub author_id: AuthorId,
pub timestamp: DateTime<Utc>,
pub line_count: usize,
}
impl BlameLine {
pub fn new(author_id: AuthorId, timestamp: DateTime<Utc>) -> Self {
Self {
author_id,
timestamp,
line_count: 1,
}
}
}
pub fn compress_blame(lines: Vec<BlameLine>) -> Vec<BlameLine> {
if lines.is_empty() {
return lines;
}
let mut compressed = Vec::with_capacity(lines.len() / 4);
let mut current = lines[0].clone();
for line in lines.into_iter().skip(1) {
if line.author_id == current.author_id {
current.line_count += line.line_count;
} else {
compressed.push(current);
current = line;
}
}
compressed.push(current);
compressed
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionMetrics {
pub name: String,
pub loc: usize,
pub cyclomatic_complexity: u32,
pub max_nesting_depth: u32,
}
#[non_exhaustive]
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct FileComplexity {
pub total_lines: usize,
pub loc: usize,
pub cyclomatic_complexity: u32,
pub public_methods: u32,
pub properties: u32,
pub functions: Vec<FunctionMetrics>,
pub max_nesting_depth: u32,
pub nesting_variance: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimeWindow {
pub since: Option<DateTime<Utc>>,
pub until: Option<DateTime<Utc>>,
pub default_months: u32,
}
impl Default for TimeWindow {
fn default() -> Self {
let now = Utc::now();
TimeWindow {
since: Some(now - Duration::days(180)),
until: Some(now),
default_months: 6,
}
}
}
impl TimeWindow {
pub fn full_history() -> Self {
TimeWindow {
since: None,
until: None,
default_months: 0,
}
}
pub fn contains(&self, timestamp: &DateTime<Utc>) -> bool {
if let Some(since) = &self.since {
if timestamp < since {
return false;
}
}
if let Some(until) = &self.until {
if timestamp > until {
return false;
}
}
true
}
}
#[non_exhaustive]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepoSnapshot {
pub path: PathBuf,
pub name: String,
pub default_branch: String,
pub time_window: TimeWindow,
pub head_commit: String,
pub created_at: DateTime<Utc>,
pub commits: Vec<Commit>,
pub files: Vec<FileEntry>,
pub authors: Vec<Author>,
pub blame_map: HashMap<PathBuf, Vec<BlameLine>>,
pub commits_by_author: HashMap<AuthorId, Vec<CommitId>>,
pub commits_by_file: HashMap<PathBuf, Vec<CommitId>>,
pub file_change_pairs: Vec<(PathBuf, PathBuf, usize)>,
pub file_metrics: HashMap<PathBuf, FileComplexity>,
pub import_graph: HashMap<PathBuf, Vec<PathBuf>>,
pub commit_interner: CommitInterner,
}
impl RepoSnapshot {
pub fn new(path: PathBuf, name: String, branch: String, window: TimeWindow) -> Self {
RepoSnapshot {
path,
name,
default_branch: branch,
time_window: window,
head_commit: String::new(),
created_at: Utc::now(),
commits: Vec::new(),
files: Vec::new(),
authors: Vec::new(),
blame_map: HashMap::new(),
commits_by_author: HashMap::new(),
commits_by_file: HashMap::new(),
file_change_pairs: Vec::new(),
file_metrics: HashMap::new(),
import_graph: HashMap::new(),
commit_interner: CommitInterner::default(),
}
}
pub fn resolve_commit(&self, id: CommitId) -> &str {
self.commit_interner.resolve(id)
}
pub fn build_indexes(&mut self) {
self.build_commits_by_author();
self.build_commits_by_file();
self.build_file_change_pairs();
}
fn build_commits_by_author(&mut self) {
self.commits_by_author.clear();
for commit in &self.commits {
self.commits_by_author
.entry(commit.author)
.or_default()
.push(commit.id);
}
}
fn build_commits_by_file(&mut self) {
self.commits_by_file.clear();
for commit in &self.commits {
for fc in &commit.files_changed {
self.commits_by_file
.entry(fc.path.clone())
.or_default()
.push(commit.id);
}
}
}
fn build_file_change_pairs(&mut self) {
use std::collections::HashSet;
let known_files: HashSet<&PathBuf> = self.files.iter().map(|f| &f.path).collect();
let mut pairs = count_co_changed_pairs(&self.commits, &known_files);
pairs.sort_by(|a, b| b.2.cmp(&a.2));
self.file_change_pairs = pairs;
}
}
fn count_co_changed_pairs(
commits: &[Commit],
known_files: &std::collections::HashSet<&PathBuf>,
) -> Vec<(PathBuf, PathBuf, usize)> {
use std::collections::HashMap as Map;
let mut pair_counts: Map<(PathBuf, PathBuf), usize> = Map::new();
for commit in commits {
let paths: Vec<&PathBuf> = commit
.files_changed
.iter()
.map(|fc| &fc.path)
.filter(|p| known_files.contains(p))
.collect();
for i in 0..paths.len() {
for j in (i + 1)..paths.len() {
let (a, b) = if paths[i] < paths[j] {
(paths[i].clone(), paths[j].clone())
} else {
(paths[j].clone(), paths[i].clone())
};
*pair_counts.entry((a, b)).or_insert(0) += 1;
}
}
}
pair_counts
.into_iter()
.filter(|&(_, count)| count >= 3)
.map(|((a, b), count)| (a, b, count))
.collect()
}
#[cfg(test)]
mod tests;