use anyhow::Result;
use clap::{Parser, Subcommand};
use organizational_intelligence_plugin::{
analyzer::OrgAnalyzer,
features::{CommitFeatures, FeatureExtractor},
query::{QueryParser, QueryType},
storage::FeatureStore,
};
use std::collections::HashMap;
#[derive(Parser)]
#[command(name = "oip-gpu")]
#[command(about = "GPU-Accelerated Correlation & Pattern Prediction System")]
#[command(version)]
struct Cli {
#[command(subcommand)]
command: Commands,
#[arg(long, global = true, value_enum)]
backend: Option<Backend>,
#[arg(short, long, global = true)]
verbose: bool,
#[arg(long, global = true)]
config: Option<std::path::PathBuf>,
}
#[derive(Clone, Copy, Debug, clap::ValueEnum)]
enum Backend {
Gpu,
Simd,
Cpu,
}
#[derive(Subcommand)]
enum Commands {
Analyze {
#[arg(long, group = "target")]
org: Option<String>,
#[arg(long, group = "target")]
repos: Option<String>,
#[arg(long, group = "target")]
repo: Option<String>,
#[arg(long, group = "target")]
local: Option<std::path::PathBuf>,
#[arg(short, long, default_value = "oip-gpu.db")]
output: std::path::PathBuf,
#[arg(long)]
since: Option<String>,
#[arg(long)]
workers: Option<usize>,
#[arg(long, default_value = "1000")]
max_commits: usize,
},
Correlate {
#[arg(short, long)]
input: std::path::PathBuf,
#[arg(short, long)]
output: std::path::PathBuf,
#[arg(long)]
categories: Option<String>,
#[arg(long)]
lag: Option<u32>,
#[arg(long, value_enum, default_value = "json")]
format: OutputFormat,
#[arg(long)]
threshold: Option<f32>,
},
Predict {
#[arg(long, group = "predict_target")]
pr: Option<String>,
#[arg(long, group = "predict_target")]
files: Option<String>,
#[arg(long, group = "predict_target")]
org: Option<String>,
#[arg(long)]
model: Option<std::path::PathBuf>,
#[arg(long)]
explain: bool,
},
Query {
query: String,
#[arg(short, long, default_value = "oip-gpu.db")]
input: std::path::PathBuf,
#[arg(long, value_enum, default_value = "table")]
format: OutputFormat,
#[arg(long)]
limit: Option<usize>,
#[arg(long)]
export: Option<std::path::PathBuf>,
},
Cluster {
#[arg(short, long)]
input: std::path::PathBuf,
#[arg(short = 'k', long, default_value = "10")]
clusters: usize,
#[arg(short, long)]
output: std::path::PathBuf,
},
Graph {
#[arg(short, long)]
input: std::path::PathBuf,
#[arg(long, value_enum)]
algorithm: GraphAlgorithm,
#[arg(short, long)]
output: std::path::PathBuf,
},
Export {
#[arg(short, long)]
input: std::path::PathBuf,
#[arg(long, value_enum)]
format: ExportFormat,
#[arg(short, long)]
output: std::path::PathBuf,
},
Benchmark {
#[arg(long, value_enum)]
suite: BenchmarkSuite,
#[arg(short, long)]
output: Option<std::path::PathBuf>,
},
}
#[derive(Clone, Copy, Debug, clap::ValueEnum)]
enum OutputFormat {
Json,
Yaml,
Csv,
Table,
}
#[derive(Clone, Copy, Debug, clap::ValueEnum)]
enum ExportFormat {
Json,
Yaml,
Csv,
Parquet,
}
#[derive(Clone, Copy, Debug, clap::ValueEnum)]
enum GraphAlgorithm {
Pagerank,
Betweenness,
Bfs,
}
#[derive(Clone, Copy, Debug, clap::ValueEnum)]
enum BenchmarkSuite {
Correlation,
Clustering,
Graph,
All,
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let log_level = if cli.verbose { "debug" } else { "info" };
tracing_subscriber::fmt().with_env_filter(log_level).init();
if let Some(backend) = cli.backend {
tracing::info!("Forcing backend: {:?}", backend);
}
match cli.command {
Commands::Analyze {
org,
repos,
repo,
local,
output,
since,
workers,
max_commits,
} => {
cmd_analyze(
org,
repos,
repo,
local,
output,
since,
workers,
max_commits,
cli.backend,
)
.await?;
}
Commands::Correlate {
input,
output,
categories,
lag,
format,
threshold,
} => {
cmd_correlate(
input,
output,
categories,
lag,
format,
threshold,
cli.backend,
)
.await?;
}
Commands::Predict {
pr,
files,
org,
model,
explain,
} => {
cmd_predict(pr, files, org, model, explain, cli.backend).await?;
}
Commands::Query {
query,
input,
format,
limit,
export,
} => {
cmd_query(query, input, format, limit, export, cli.backend).await?;
}
Commands::Cluster {
input,
clusters,
output,
} => {
cmd_cluster(input, clusters, output, cli.backend).await?;
}
Commands::Graph {
input,
algorithm,
output,
} => {
cmd_graph(input, algorithm, output, cli.backend).await?;
}
Commands::Export {
input,
format,
output,
} => {
cmd_export(input, format, output).await?;
}
Commands::Benchmark { suite, output } => {
cmd_benchmark(suite, output, cli.backend).await?;
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn cmd_analyze(
org: Option<String>,
repos: Option<String>,
repo: Option<String>,
local: Option<std::path::PathBuf>,
output: std::path::PathBuf,
_since: Option<String>,
_workers: Option<usize>,
max_commits: usize,
backend: Option<Backend>,
) -> Result<()> {
tracing::info!("Starting GPU-accelerated analysis");
if let Some(b) = backend {
println!("âī¸ Backend: {:?}", b);
}
if let Some(local_path) = local {
return cmd_analyze_local(local_path, output, max_commits).await;
}
let target = if let Some(_org_name) = org {
println!("đĻ Organization analysis not yet implemented");
println!("đ Phase 1: Single repository only");
anyhow::bail!("Organization analysis pending (use --repo or --local instead)");
} else if let Some(_repos_list) = repos {
println!("đĻ Multi-repository analysis not yet implemented");
anyhow::bail!("Multi-repo analysis pending (use --repo or --local instead)");
} else if let Some(repo_spec) = repo {
repo_spec
} else {
anyhow::bail!("Must specify --org, --repos, --repo, or --local");
};
let parts: Vec<&str> = target.split('/').collect();
if parts.len() != 2 {
anyhow::bail!("Repository must be in owner/repo format (e.g., rust-lang/rust)");
}
let (owner, repo_name) = (parts[0], parts[1]);
let repo_url = format!("https://github.com/{}/{}", owner, repo_name);
println!("đ Analyzing repository: {}", target);
let cache_dir = std::env::temp_dir().join("oip-gpu-cache");
std::fs::create_dir_all(&cache_dir)?;
let analyzer = OrgAnalyzer::new(&cache_dir);
println!("đ Analyzing commits (max {})...", max_commits);
let patterns = analyzer
.analyze_repository(&repo_url, repo_name, max_commits)
.await?;
println!("â
Found {} defect categories", patterns.len());
println!("đ§ Extracting features for GPU processing...");
let extractor = FeatureExtractor::new();
let mut store = FeatureStore::new()?;
let mut total_features = 0;
for pattern in &patterns {
let category_num = pattern.category as u8;
for instance in &pattern.examples {
let features = extractor.extract(
category_num,
instance.files_affected,
instance.lines_added,
instance.lines_removed,
instance.timestamp,
)?;
store.insert(features)?;
total_features += 1;
}
}
println!("â
Extracted {} feature vectors", total_features);
println!("đž Saving to {}...", output.display());
store.save(&output).await?;
println!("⨠Analysis complete!");
println!(
"đ Features: {} vectors à {} dimensions",
total_features,
CommitFeatures::DIMENSION
);
println!("đ¯ Next: oip-gpu correlate --input {}", output.display());
Ok(())
}
async fn cmd_analyze_local(
local_path: std::path::PathBuf,
output: std::path::PathBuf,
max_commits: usize,
) -> Result<()> {
use organizational_intelligence_plugin::classifier::RuleBasedClassifier;
println!("đ Analyzing local repository: {}", local_path.display());
if !local_path.join(".git").exists() {
anyhow::bail!("Not a git repository: {}", local_path.display());
}
println!("đ Analyzing commits (max {})...", max_commits);
let repo = git2::Repository::open(&local_path)?;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(git2::Sort::TIME)?;
let classifier = RuleBasedClassifier::new();
let extractor = FeatureExtractor::new();
let mut store = FeatureStore::new()?;
let mut commit_count = 0;
let mut feature_count = 0;
let mut category_counts = std::collections::HashMap::new();
for oid in revwalk.take(max_commits) {
let oid = oid?;
let commit = repo.find_commit(oid)?;
let (files_changed, lines_added, lines_deleted) = if commit.parent_count() > 0 {
let parent = commit.parent(0)?;
let diff =
repo.diff_tree_to_tree(Some(&parent.tree()?), Some(&commit.tree()?), None)?;
let stats = diff.stats()?;
(stats.files_changed(), stats.insertions(), stats.deletions())
} else {
(0, 0, 0)
};
let message = commit.message().unwrap_or("");
let category_num = if let Some(classification) = classifier.classify_from_message(message) {
classification.category as u8
} else {
0 };
*category_counts.entry(category_num).or_insert(0usize) += 1;
if let Ok(features) = extractor.extract(
category_num,
files_changed,
lines_added,
lines_deleted,
commit.time().seconds(),
) {
store.insert(features)?;
feature_count += 1;
}
commit_count += 1;
if commit_count % 100 == 0 {
print!("\rđ Processed {} commits...", commit_count);
}
}
println!();
println!("â
Analyzed {} commits", commit_count);
println!("â
Extracted {} feature vectors", feature_count);
println!();
println!("đ Defect category distribution:");
let mut sorted: Vec<_> = category_counts.iter().collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (cat, count) in sorted.iter().take(5) {
let pct = (**count as f32 / commit_count as f32) * 100.0;
println!(" Category {}: {} ({:.1}%)", cat, count, pct);
}
println!();
println!("đž Saving to {}...", output.display());
store.save(&output).await?;
println!("⨠Analysis complete!");
println!(
"đ Features: {} vectors à {} dimensions",
feature_count,
CommitFeatures::DIMENSION
);
println!(
"đ¯ Next: oip-gpu query --input {} \"show all defects\"",
output.display()
);
Ok(())
}
async fn cmd_correlate(
_input: std::path::PathBuf,
_output: std::path::PathBuf,
_categories: Option<String>,
_lag: Option<u32>,
_format: OutputFormat,
_threshold: Option<f32>,
_backend: Option<Backend>,
) -> Result<()> {
println!("Correlate command - not yet implemented");
println!("Phase 1 implementation pending");
Ok(())
}
async fn cmd_predict(
_pr: Option<String>,
_files: Option<String>,
_org: Option<String>,
_model: Option<std::path::PathBuf>,
_explain: bool,
_backend: Option<Backend>,
) -> Result<()> {
println!("Predict command - not yet implemented");
println!("Phase 3 implementation pending");
Ok(())
}
async fn cmd_query(
query: String,
input: std::path::PathBuf,
format: OutputFormat,
limit: Option<usize>,
export: Option<std::path::PathBuf>,
_backend: Option<Backend>,
) -> Result<()> {
println!("đ Executing query: \"{}\"", query);
let parser = QueryParser::new();
let parsed = parser.parse(&query)?;
println!("đ Query type: {:?}", parsed.query_type);
println!();
println!("đ Loading features from {}...", input.display());
let store = FeatureStore::load(&input).await?;
if store.is_empty() {
println!("â ī¸ No features found in store");
println!(
"đĄ Run: oip-gpu analyze --repo owner/repo --output {}",
input.display()
);
return Ok(());
}
println!("â
Loaded {} feature vectors", store.len());
println!();
let result = execute_query(&store, &parsed, limit)?;
match format {
OutputFormat::Table => {
print_table(&result);
}
OutputFormat::Json => {
println!("{}", serde_json::to_string_pretty(&result)?);
}
OutputFormat::Yaml => {
println!("{}", serde_yaml::to_string(&result)?);
}
OutputFormat::Csv => {
print_csv(&result)?;
}
}
if let Some(export_path) = export {
std::fs::write(&export_path, serde_json::to_string_pretty(&result)?)?;
println!();
println!("đž Results exported to: {}", export_path.display());
}
Ok(())
}
fn execute_query(
store: &FeatureStore,
query: &organizational_intelligence_plugin::query::Query,
limit: Option<usize>,
) -> Result<QueryResult> {
match &query.query_type {
QueryType::MostCommonDefect => {
let counts = count_by_category(store);
let mut sorted: Vec<_> = counts.into_iter().collect();
sorted.sort_by(|a, b| b.1.cmp(&a.1));
if let Some(limit) = limit {
sorted.truncate(limit);
}
Ok(QueryResult::CategoryCounts(sorted))
}
QueryType::CountByCategory => {
let counts = count_by_category(store);
let mut sorted: Vec<_> = counts.into_iter().collect();
sorted.sort_by_key(|(cat, _)| *cat);
Ok(QueryResult::CategoryCounts(sorted))
}
QueryType::ListAll => {
let total = store.len();
let counts = count_by_category(store);
Ok(QueryResult::Summary {
total_features: total,
category_counts: counts,
})
}
QueryType::Unknown(q) => {
anyhow::bail!("Unknown query: '{}'\n\nSupported queries:\n - show me most common defect\n - count defects by category\n - show all defects", q)
}
}
}
fn count_by_category(store: &FeatureStore) -> HashMap<u8, usize> {
let mut counts: HashMap<u8, usize> = HashMap::new();
for category in 0..10 {
if let Ok(results) = store.query_by_category(category) {
counts.insert(category, results.len());
}
}
counts
}
#[derive(Debug, serde::Serialize)]
enum QueryResult {
CategoryCounts(Vec<(u8, usize)>),
Summary {
total_features: usize,
category_counts: HashMap<u8, usize>,
},
}
fn print_table(result: &QueryResult) {
match result {
QueryResult::CategoryCounts(counts) => {
println!("ââââââââââââŦââââââââ");
println!("â Category â Count â");
println!("ââââââââââââŧââââââââ¤");
for (cat, count) in counts {
if *count > 0 {
println!("â {:8} â {:5} â", cat, count);
}
}
println!("ââââââââââââ´ââââââââ");
}
QueryResult::Summary {
total_features,
category_counts,
} => {
println!("đ Total features: {}", total_features);
println!();
println!("By category:");
let mut sorted: Vec<_> = category_counts.iter().collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (cat, count) in sorted {
if *count > 0 {
let pct = (*count as f32 / *total_features as f32) * 100.0;
println!(" Category {}: {} ({:.1}%)", cat, count, pct);
}
}
}
}
}
fn print_csv(result: &QueryResult) -> Result<()> {
match result {
QueryResult::CategoryCounts(counts) => {
println!("category,count");
for (cat, count) in counts {
if *count > 0 {
println!("{},{}", cat, count);
}
}
}
QueryResult::Summary {
total_features,
category_counts,
} => {
println!("metric,value");
println!("total_features,{}", total_features);
for (cat, count) in category_counts {
if *count > 0 {
println!("category_{},{}", cat, count);
}
}
}
}
Ok(())
}
async fn cmd_cluster(
_input: std::path::PathBuf,
_clusters: usize,
_output: std::path::PathBuf,
_backend: Option<Backend>,
) -> Result<()> {
println!("Cluster command - not yet implemented");
println!("Phase 3 implementation pending");
Ok(())
}
async fn cmd_graph(
_input: std::path::PathBuf,
_algorithm: GraphAlgorithm,
_output: std::path::PathBuf,
_backend: Option<Backend>,
) -> Result<()> {
println!("Graph command - not yet implemented");
println!("Phase 2 implementation pending");
Ok(())
}
async fn cmd_export(
_input: std::path::PathBuf,
_format: ExportFormat,
_output: std::path::PathBuf,
) -> Result<()> {
println!("Export command - not yet implemented");
Ok(())
}
async fn cmd_benchmark(
suite: BenchmarkSuite,
output: Option<std::path::PathBuf>,
backend: Option<Backend>,
) -> Result<()> {
println!("đ Running GPU benchmark suite");
if let Some(b) = backend {
println!("âī¸ Backend: {:?}", b);
}
let bench_filter = match suite {
BenchmarkSuite::Correlation => "correlation",
BenchmarkSuite::Clustering => "clustering",
BenchmarkSuite::Graph => "graph",
BenchmarkSuite::All => "",
};
println!("đ Suite: {:?}", suite);
println!("đŦ Running criterion benchmarks...");
println!();
let mut cmd = std::process::Command::new("cargo");
cmd.arg("bench").arg("--bench").arg("gpu_benchmarks");
if !bench_filter.is_empty() {
cmd.arg("--").arg(bench_filter);
}
let status = cmd.status()?;
if !status.success() {
anyhow::bail!("Benchmark execution failed");
}
if let Some(output_path) = output {
println!("đž Results saved to: {}", output_path.display());
println!("âšī¸ Note: Criterion results are in target/criterion/");
}
println!();
println!("⨠Benchmarks complete!");
println!("đ See target/criterion/ for detailed results");
Ok(())
}