use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use clap::{Parser, Subcommand};
use ingest::{
GitSource, ImportOptions, ImportScope, ReasoningPipeline, ReasoningPipelineParams, Result,
ShaMap, TranscriptRoots, import_git_into_scoped_with_options, load_transcripts,
pipeline_default_commits,
};
use tracing::info;
#[derive(Debug, Parser)]
#[command(
name = "heddle-ingest",
about = "Import git history into a Heddle repository",
version
)]
struct Cli {
#[command(subcommand)]
command: Command,
#[arg(short, long, action = clap::ArgAction::Count, global = true)]
verbose: u8,
}
#[derive(Debug, Subcommand)]
enum Command {
Map {
path: PathBuf,
#[command(subcommand)]
action: MapAction,
},
Import {
#[arg(long)]
git: PathBuf,
#[arg(long)]
heddle: PathBuf,
#[arg(long)]
lossy: bool,
#[arg(long = "ref", value_name = "REF")]
refs: Vec<String>,
},
Reason {
#[arg(long)]
git: PathBuf,
#[arg(long)]
heddle: PathBuf,
#[arg(long = "commit", value_name = "SHA")]
commits: Vec<String>,
#[arg(long)]
limit: Option<usize>,
#[arg(long)]
since: Option<DateTime<Utc>>,
#[arg(long = "claude-home", value_name = "PATH")]
claude_home: Option<String>,
#[arg(long = "codex-home", value_name = "PATH")]
codex_home: Option<String>,
#[arg(long = "opencode-home", value_name = "PATH")]
opencode_home: Option<String>,
#[arg(long)]
codex_since: Option<DateTime<Utc>>,
#[arg(long, default_value_t = 5)]
max_sessions_per_commit: usize,
#[arg(long, default_value_t = 0.20)]
min_match_confidence: f32,
#[arg(long)]
dry_run: bool,
},
}
#[derive(Debug, Subcommand)]
enum MapAction {
Stats,
LookupGit { sha: String },
LookupHeddle { heddle: String },
}
fn main() -> Result<()> {
let _ = rustls::crypto::ring::default_provider().install_default();
let cli = Cli::parse();
init_tracing(cli.verbose);
match cli.command {
Command::Map { path, action } => run_map(&path, action),
Command::Import {
git,
heddle,
lossy,
refs,
} => run_import(&git, &heddle, lossy, &refs),
Command::Reason {
git,
heddle,
commits,
limit,
since,
claude_home,
codex_home,
opencode_home,
codex_since,
max_sessions_per_commit,
min_match_confidence,
dry_run,
} => run_reason(ReasonArgs {
git: &git,
heddle: &heddle,
commits,
limit,
since,
claude_home: claude_home.as_deref(),
codex_home: codex_home.as_deref(),
opencode_home: opencode_home.as_deref(),
codex_since,
max_sessions_per_commit,
min_match_confidence,
dry_run,
}),
}
}
fn run_import(
git_path: &std::path::Path,
heddle_path: &std::path::Path,
lossy: bool,
refs: &[String],
) -> Result<()> {
let (stats, _map) = import_git_into_scoped_with_options(
git_path,
heddle_path,
ImportOptions { lossy },
ImportScope::refs(refs.to_vec()),
)?;
let r = &stats.refs_seen;
let walked = r.local_branches
+ r.tags
+ r.remote_branches
+ r.symbolic_skipped
+ r.peel_failed
+ r.non_commit_skipped;
let kept = r.local_branches + r.tags + r.remote_branches;
let ignored = r.symbolic_skipped + r.peel_failed + r.non_commit_skipped;
println!("imported from {}", git_path.display());
println!("refs:");
println!(" walked: {walked} kept: {kept} ignored: {ignored}",);
println!(" local branches: {}", r.local_branches);
println!(" tags: {}", r.tags);
println!(" remote branches: {}", r.remote_branches);
if r.symbolic_skipped > 0 {
println!(
" symbolic refs: {} ignored (e.g. origin/HEAD)",
r.symbolic_skipped,
);
}
if r.peel_failed > 0 {
println!(
" peel-failed: {} ignored (dangling targets)",
r.peel_failed,
);
}
if r.non_commit_skipped > 0 {
println!(
" non-commit refs: {} ignored (e.g. annotated tag → blob/tree, like junio-gpg-pub)",
r.non_commit_skipped,
);
}
println!("commits:");
println!(" imported: {}", stats.commits_imported);
println!(" reflog-only: {}", stats.reflog_only_commits);
println!("trees: {}", stats.trees_imported);
println!("blobs: {}", stats.blobs_imported);
println!("threads written: {}", stats.refs.threads_written);
println!("markers written: {}", stats.refs.markers_written);
if !stats.lossy_entries.is_empty() {
println!(
"lossy import accepted for {} tree entries:",
stats.lossy_entries.len()
);
for entry in &stats.lossy_entries {
println!(" {}", entry.summary_line());
}
}
let op = &stats.oplog;
let oplog_total = op.gotos
+ op.thread_creates
+ op.thread_updates
+ op.thread_deletes
+ op.marker_creates
+ op.marker_deletes;
if oplog_total > 0 {
println!(" oplog ops: {}", oplog_total);
println!(" thread create: {}", op.thread_creates);
println!(" thread update: {}", op.thread_updates);
println!(" thread delete: {}", op.thread_deletes);
println!(" marker create: {}", op.marker_creates);
println!(" marker delete: {}", op.marker_deletes);
println!(" goto: {}", op.gotos);
}
if stats.refs.skipped_unmapped > 0 {
eprintln!(
"warning: {} refs skipped (target commit not in sha map)",
stats.refs.skipped_unmapped
);
}
if stats.oplog.skipped_unmapped > 0 {
eprintln!(
"warning: {} reflog entries skipped (target commit not in sha map)",
stats.oplog.skipped_unmapped
);
}
info!("import complete");
Ok(())
}
struct ReasonArgs<'a> {
git: &'a Path,
heddle: &'a Path,
commits: Vec<String>,
limit: Option<usize>,
since: Option<DateTime<Utc>>,
claude_home: Option<&'a str>,
codex_home: Option<&'a str>,
opencode_home: Option<&'a str>,
codex_since: Option<DateTime<Utc>>,
max_sessions_per_commit: usize,
min_match_confidence: f32,
dry_run: bool,
}
fn run_reason(args: ReasonArgs<'_>) -> Result<()> {
let repo = repo::Repository::open(args.heddle)?;
let map_path = repo.heddle_dir().join("ingest").join("sha_map.sqlite");
if !map_path.exists() {
eprintln!(
"error: no sha map at {}.\n\
run `heddle-ingest import --git {} --heddle {}` first.",
map_path.display(),
args.git.display(),
args.heddle.display(),
);
std::process::exit(2);
}
let map = ShaMap::open(&map_path)?;
let git = GitSource::open(args.git)?;
let roots = build_transcript_roots(
args.claude_home,
args.codex_home,
args.opencode_home,
args.codex_since,
);
let transcripts = load_transcripts(args.git, &roots);
let display_root = |p: Option<&PathBuf>| -> String {
p.map(|p| p.display().to_string())
.unwrap_or_else(|| "<disabled>".into())
};
println!(
"loaded {} transcripts (claude_home={}, codex_home={}, opencode_home={})",
transcripts.len(),
display_root(roots.claude.as_ref()),
display_root(roots.codex.as_ref()),
display_root(roots.opencode_home.as_ref()),
);
if transcripts.is_empty() {
eprintln!(
"warning: no transcripts found for {}. \
check that the transcript stores exist and contain \
sessions whose cwd is (or is under) this repo.",
args.git.display()
);
}
let mut commits = if args.commits.is_empty() {
pipeline_default_commits(&map)
} else {
args.commits.clone()
};
if let Some(since) = args.since {
commits.retain(|sha| match git.read_commit(sha) {
Ok(c) => c.authored_at >= since,
Err(_) => {
eprintln!("warning: can't read commit {sha} for --since filter");
false
}
});
}
if let Some(limit) = args.limit {
commits.truncate(limit);
}
println!("processing {} commits", commits.len());
let mut params = ReasoningPipelineParams {
max_sessions_per_commit: args.max_sessions_per_commit,
min_match_confidence: args.min_match_confidence,
..ReasoningPipelineParams::default()
};
if args.dry_run {
params.emit_annotations = false;
}
let mut pipeline =
ReasoningPipeline::new(&repo, &git, &map, args.git, transcripts).with_params(params);
let stats = pipeline.run(&commits)?;
if args.dry_run {
println!("dry-run: not writing annotations");
}
println!("reasoning pass complete");
println!(" commits scanned: {}", stats.commits_scanned);
println!(" commits with matches: {}", stats.commits_with_matches);
println!(" sessions mined: {}", stats.sessions_mined);
println!(" points extracted: {}", stats.points_extracted);
println!(
" points rejected: {}",
stats.points_rejected_quality
);
println!(" points deduped (cross): {}", stats.points_deduped);
println!(" states updated: {}", stats.emit.states_updated);
println!(
" annotations written: {}",
stats.emit.annotations_written
);
if args.dry_run && !pipeline.preview().is_empty() {
println!();
println!("candidate preview:");
for item in pipeline.preview() {
let decision = match item.decision {
ingest::reasoning_pipeline::PreviewDecision::Kept => "kept",
ingest::reasoning_pipeline::PreviewDecision::Rejected => "rejected",
};
println!(
"- {decision} ({}) {} {}",
item.reason,
&item.commit_sha[..item.commit_sha.len().min(8)],
item.target_file
);
println!(" {}", item.text);
}
}
if stats.emit.deduped > 0 {
println!(" annotations deduped: {}", stats.emit.deduped);
}
if stats.skipped_untranslated_tree > 0 {
println!(
" skipped (no tree map): {}",
stats.skipped_untranslated_tree
);
}
if stats.skipped_git_errors > 0 {
println!(" skipped (git errors): {}", stats.skipped_git_errors);
}
if stats.emit.skipped_missing_state + stats.emit.skipped_malformed > 0 {
println!(
" skipped (emit): missing_state={}, malformed={}",
stats.emit.skipped_missing_state, stats.emit.skipped_malformed
);
}
if stats.emit.states_updated > 0 {
println!();
println!(
"annotations attached to {} states. To browse them:",
stats.emit.states_updated
);
println!(
" heddle --repo {} log # find a state id",
args.heddle.display()
);
println!(
" heddle --repo {} context list --ref <state-id>",
args.heddle.display(),
);
println!("or open the web app at /app/repo/<repo>/-/files/<path> to see them inline.");
} else if stats.commits_with_matches > 0 {
println!();
println!(
"matched {} commits but extracted no notecards. Try lowering --min-match-confidence \
below {:.2} or raising --max-sessions-per-commit above {}.",
stats.commits_with_matches, args.min_match_confidence, args.max_sessions_per_commit,
);
} else {
println!();
println!(
"no commits matched any session. Either no transcripts touched this repo, \
or --min-match-confidence ({:.2}) is too strict for this corpus.",
args.min_match_confidence,
);
}
info!("reason complete");
Ok(())
}
fn build_transcript_roots(
claude_override: Option<&str>,
codex_override: Option<&str>,
opencode_override: Option<&str>,
codex_since: Option<DateTime<Utc>>,
) -> TranscriptRoots {
let default = TranscriptRoots::default();
let resolve = |ovr: Option<&str>, fallback: Option<PathBuf>| match ovr {
Some("") => None,
Some(s) => Some(PathBuf::from(s)),
None => fallback,
};
TranscriptRoots {
claude: resolve(claude_override, default.claude),
codex: resolve(codex_override, default.codex),
opencode_home: resolve(opencode_override, default.opencode_home),
codex_since,
}
}
fn run_map(path: &std::path::Path, action: MapAction) -> Result<()> {
let map = ShaMap::open(path)?;
match action {
MapAction::Stats => {
println!("records: {}", map.len());
println!("commits: {}", map.commit_count());
println!("path: {}", path.display());
}
MapAction::LookupGit { sha } => {
if let Some(cid) = map.get_commit(&sha) {
println!("commit {sha} → {}", cid.to_string_full());
} else if let Some(h) = map.get_tree(&sha) {
println!("tree {sha} → {}", h.to_hex());
} else if let Some(h) = map.get_blob(&sha) {
println!("blob {sha} → {}", h.to_hex());
} else {
eprintln!("no mapping for git sha {sha}");
std::process::exit(1);
}
}
MapAction::LookupHeddle { heddle } => match map.get_git_for_heddle(&heddle) {
Some(sha) => println!("{heddle} → git {sha}"),
None => {
eprintln!("no git sha mapped to {heddle}");
std::process::exit(1);
}
},
}
info!("map op complete");
Ok(())
}
fn init_tracing(verbosity: u8) {
let filter = match verbosity {
0 => "warn",
1 => "info",
2 => "debug",
_ => "trace",
};
let _ = tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(filter)),
)
.without_time()
.try_init();
}