use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, TimeZone, Utc};
use globset::{Glob, GlobMatcher};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use crate::chunker::{self, ChunkerConfig};
use crate::sanitize;
use crate::segmentation::semantic_segments;
use crate::sources::{self, ExtractionConfig};
pub use crate::timeline::Kind;
use crate::timeline::{RepoIdentity, SemanticSegment, TimelineEntry};
use crate::validation::is_valid_repo_project_slug;
const PLAN_KEYWORDS: &[&str] = &[
"implementation plan",
"plan:",
"## plan",
"step 1:",
"step 2:",
"step 3:",
"action items",
"milestones",
"roadmap",
"todo list",
"acceptance criteria",
"## steps",
"## phases",
];
const REPORT_KEYWORDS: &[&str] = &[
"## findings",
"## summary",
"## report",
"audit report",
"coverage report",
"test results",
"## metrics",
"## recommendations",
"## conclusion",
"status report",
"incident report",
"pr review",
"code review",
];
pub fn classify_kind(entries: &[TimelineEntry]) -> Kind {
if entries.is_empty() {
return Kind::Other;
}
let mut plan_score: u32 = 0;
let mut report_score: u32 = 0;
let mut has_conversation = false;
for entry in entries {
let lower = entry.message.to_lowercase();
if entry.role == "assistant" {
for kw in PLAN_KEYWORDS {
if lower.contains(kw) {
plan_score += 1;
}
}
for kw in REPORT_KEYWORDS {
if lower.contains(kw) {
report_score += 1;
}
}
}
if entry.role == "user" || entry.role == "assistant" {
has_conversation = true;
}
}
let threshold = 3;
if plan_score >= threshold && plan_score > report_score {
Kind::Plans
} else if report_score >= threshold && report_score > plan_score {
Kind::Reports
} else if has_conversation {
Kind::Conversations
} else {
Kind::Other
}
}
pub fn session_basename(date: &str, agent: &str, session_id: &str, chunk: u32) -> String {
let date_compact = compact_date(date);
let sid = truncate_session_id(session_id);
format!("{}_{}_{}_{:03}.md", date_compact, agent, sid, chunk)
}
pub(crate) fn compact_date(date: &str) -> String {
let digits: String = date.chars().filter(|c| c.is_ascii_digit()).collect();
if digits.len() >= 8 {
format!("{}_{}", &digits[..4], &digits[4..8])
} else {
date.replace('-', "_")
}
}
fn truncate_session_id(session_id: &str) -> String {
let cleaned: String = session_id
.chars()
.filter(|c| c.is_ascii_alphanumeric() || *c == '-')
.collect();
let limit = 12.min(cleaned.len());
cleaned[..limit].to_string()
}
pub const NON_REPOSITORY_CONTEXTS: &str = "non-repository-contexts";
pub const CANONICAL_STORE_DIRNAME: &str = "store";
pub const LEGACY_SALVAGE_DIRNAME: &str = "legacy-store";
pub const AICX_IGNORE_FILENAME: &str = ".aicxignore";
const MIGRATION_DIRNAME: &str = "migration";
const MIGRATION_MANIFEST_FILENAME: &str = "manifest.json";
const MIGRATION_REPORT_FILENAME: &str = "report.md";
#[derive(Debug, Clone)]
struct IgnoreRule {
negate: bool,
matcher: GlobMatcher,
}
#[derive(Debug, Clone, Default)]
pub struct StoreIgnoreMatcher {
base: PathBuf,
rules: Vec<IgnoreRule>,
}
pub fn store_base_dir() -> Result<PathBuf> {
let dir = dirs::home_dir().context("No home directory")?.join(".aicx");
fs::create_dir_all(&dir)
.with_context(|| format!("Failed to create store dir: {}", dir.display()))?;
Ok(dir)
}
pub fn canonical_store_dir() -> Result<PathBuf> {
let dir = store_base_dir()?.join(CANONICAL_STORE_DIRNAME);
fs::create_dir_all(&dir)?;
Ok(dir)
}
pub fn non_repository_contexts_dir() -> Result<PathBuf> {
let dir = store_base_dir()?.join(NON_REPOSITORY_CONTEXTS);
fs::create_dir_all(&dir)?;
Ok(dir)
}
pub fn legacy_store_base_dir() -> Result<PathBuf> {
Ok(dirs::home_dir()
.context("No home directory")?
.join(".ai-contexters"))
}
fn legacy_salvage_dir(base: &Path) -> PathBuf {
base.join(LEGACY_SALVAGE_DIRNAME)
}
fn migration_dir(base: &Path) -> PathBuf {
base.join(MIGRATION_DIRNAME)
}
fn migration_manifest_path(base: &Path) -> PathBuf {
migration_dir(base).join(MIGRATION_MANIFEST_FILENAME)
}
fn migration_report_path(base: &Path) -> PathBuf {
migration_dir(base).join(MIGRATION_REPORT_FILENAME)
}
impl StoreIgnoreMatcher {
fn load(base: &Path) -> Result<Self> {
let path = base.join(AICX_IGNORE_FILENAME);
if !path.exists() {
return Ok(Self {
base: base.to_path_buf(),
rules: Vec::new(),
});
}
let raw = sanitize::read_to_string_validated(&path)
.with_context(|| format!("Failed to read {}", path.display()))?;
let mut rules = Vec::new();
for (line_no, line) in raw.lines().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let negate = trimmed.starts_with('!');
let pattern = trimmed.trim_start_matches('!').trim();
if pattern.is_empty() {
continue;
}
let normalized = normalize_aicx_ignore_pattern(pattern);
let matcher = Glob::new(&normalized)
.with_context(|| {
format!(
"Invalid {} pattern at line {}: {}",
path.display(),
line_no + 1,
trimmed
)
})?
.compile_matcher();
rules.push(IgnoreRule { negate, matcher });
}
Ok(Self {
base: base.to_path_buf(),
rules,
})
}
pub fn is_ignored(&self, path: &Path) -> bool {
if self.rules.is_empty() {
return false;
}
let Ok(relative) = path.strip_prefix(&self.base) else {
return false;
};
let relative = normalize_relative_store_path(relative);
if relative.is_empty() {
return false;
}
let mut ignored = false;
for rule in &self.rules {
if rule.matcher.is_match(&relative) {
ignored = !rule.negate;
}
}
ignored
}
}
fn normalize_relative_store_path(path: &Path) -> String {
path.components()
.map(|component| component.as_os_str().to_string_lossy())
.collect::<Vec<_>>()
.join("/")
}
fn normalize_aicx_ignore_pattern(pattern: &str) -> String {
let mut normalized = pattern
.trim()
.trim_start_matches("./")
.trim_start_matches('/')
.replace('\\', "/");
while normalized.contains("//") {
normalized = normalized.replace("//", "/");
}
if normalized.ends_with('/') {
normalized.push_str("**");
}
normalized
}
pub fn load_ignore_matcher_at(base: &Path) -> Result<StoreIgnoreMatcher> {
StoreIgnoreMatcher::load(base)
}
pub fn filter_ignored_paths_at<P>(base: &Path, paths: &[P]) -> Result<(Vec<PathBuf>, usize)>
where
P: AsRef<Path>,
{
let matcher = load_ignore_matcher_at(base)?;
if matcher.rules.is_empty() {
return Ok((
paths
.iter()
.map(|path| path.as_ref().to_path_buf())
.collect(),
0,
));
}
let mut kept = Vec::with_capacity(paths.len());
let mut ignored = 0usize;
for path in paths {
let path = path.as_ref();
if matcher.is_ignored(path) {
ignored += 1;
} else {
kept.push(path.to_path_buf());
}
}
Ok((kept, ignored))
}
pub fn project_dir(project: &str) -> Result<PathBuf> {
let dir = validated_store_project_dir(&canonical_store_dir()?, project)?;
fs::create_dir_all(&dir)?;
Ok(dir)
}
pub fn chunks_dir() -> Result<PathBuf> {
let dir = store_base_dir()?.join("chunks");
fs::create_dir_all(&dir)?;
Ok(dir)
}
pub fn get_context_path(project: &str, agent: &str, date: &str, time: &str) -> Result<PathBuf> {
let dir = validated_store_project_dir(&canonical_store_dir()?, project)?.join(date);
fs::create_dir_all(&dir)?;
Ok(dir.join(format!("{}_{}-context.md", time, agent)))
}
pub fn get_context_json_path(
project: &str,
agent: &str,
date: &str,
time: &str,
) -> Result<PathBuf> {
let dir = validated_store_project_dir(&canonical_store_dir()?, project)?.join(date);
fs::create_dir_all(&dir)?;
Ok(dir.join(format!("{}_{}-context.json", time, agent)))
}
fn validated_store_project_dir(root: &Path, project: &str) -> Result<PathBuf> {
if !is_valid_repo_project_slug(project) {
anyhow::bail!(
"invalid canonical store project bucket {:?}; expected <bucket> or <org>/<repo> with [A-Za-z0-9][A-Za-z0-9._-]{{0,99}} segments",
project
);
}
Ok(root.join(project))
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct StoreIndex {
pub projects: HashMap<String, ProjectIndex>,
pub last_updated: DateTime<Utc>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ProjectIndex {
pub agents: HashMap<String, AgentIndex>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AgentIndex {
pub dates: Vec<String>,
pub total_entries: usize,
pub last_updated: DateTime<Utc>,
}
pub fn load_index() -> StoreIndex {
let base = match store_base_dir() {
Ok(dir) => dir,
Err(_) => return StoreIndex::default(),
};
load_index_at(&base)
}
fn load_index_at(base: &Path) -> StoreIndex {
let path = base.join("index.json");
if !path.exists() {
return StoreIndex::default();
}
let contents = match fs::read_to_string(&path) {
Ok(c) => c,
Err(_) => return StoreIndex::default(),
};
serde_json::from_str(&contents).unwrap_or_default()
}
pub fn save_index(index: &StoreIndex) -> Result<()> {
save_index_at(&store_base_dir()?, index)
}
fn save_index_at(base: &Path, index: &StoreIndex) -> Result<()> {
let path = base.join("index.json");
let json = serde_json::to_string_pretty(index).context("Failed to serialize index")?;
fs::write(&path, json).with_context(|| format!("Failed to write index: {}", path.display()))?;
Ok(())
}
pub fn update_index(
index: &mut StoreIndex,
project: &str,
agent: &str,
date: &str,
entry_count: usize,
) {
let now = Utc::now();
index.last_updated = now;
let project_idx = index.projects.entry(project.to_string()).or_default();
let agent_idx = project_idx.agents.entry(agent.to_string()).or_default();
if !agent_idx.dates.contains(&date.to_string()) {
agent_idx.dates.push(date.to_string());
agent_idx.dates.sort();
}
agent_idx.total_entries += entry_count;
agent_idx.last_updated = now;
}
pub fn list_stored_projects(index: &StoreIndex) -> Vec<String> {
let mut projects: Vec<String> = index.projects.keys().cloned().collect();
projects.sort();
projects
}
#[derive(Debug, Clone)]
pub struct StoredContextFile {
pub path: PathBuf,
pub project: String,
pub repo: Option<RepoIdentity>,
pub date_compact: String,
pub date_iso: String,
pub kind: Kind,
pub agent: String,
pub session_id: String,
pub chunk: u32,
}
#[derive(Debug, Clone, Serialize)]
pub struct ReadContextChunk {
pub path: PathBuf,
pub relative_path: String,
pub project: String,
pub date: String,
pub kind: String,
pub agent: String,
pub session_id: String,
pub chunk: u32,
pub bytes: u64,
pub content: String,
pub truncated: bool,
}
#[derive(Debug, Clone, Default)]
pub struct StoreWriteSummary {
pub total_entries: usize,
pub written_paths: Vec<PathBuf>,
pub skipped_empty_body: usize,
pub project_summary: BTreeMap<String, BTreeMap<String, usize>>,
}
struct SessionWriteSpec<'a> {
project: Option<&'a str>,
agent: &'a str,
date: &'a str,
session_id: &'a str,
kind: Option<Kind>,
}
pub fn write_context(
project: &str,
agent: &str,
date: &str,
time: &str,
entries: &[TimelineEntry],
) -> Result<Vec<PathBuf>> {
let mut written = Vec::new();
let md_path = get_context_path(project, agent, date, time)?;
let mut md_content = String::new();
md_content.push_str(&format!("# {} | {} | {}\n\n", project, agent, date));
for entry in entries {
let ts = entry.timestamp.format("%Y-%m-%d %H:%M:%S UTC");
md_content.push_str(&format!("### {} | {}\n", ts, entry.role));
for line in entry.message.lines() {
md_content.push_str(&format!("> {}\n", line));
}
md_content.push('\n');
}
let write_path = sanitize::validate_write_path(&md_path)?;
fs::write(&write_path, &md_content)?;
written.push(md_path);
let json_path = get_context_json_path(project, agent, date, time)?;
let json_content = serde_json::to_string_pretty(entries)?;
let write_path = sanitize::validate_write_path(&json_path)?;
fs::write(&write_path, &json_content)?;
written.push(json_path);
Ok(written)
}
pub fn write_context_chunked(
project: &str,
agent: &str,
date: &str,
time: &str,
entries: &[TimelineEntry],
chunker_config: &ChunkerConfig,
) -> Result<Vec<PathBuf>> {
if entries.is_empty() {
return Ok(vec![]);
}
let chunks = chunker::chunk_entries(entries, project, agent, chunker_config);
let dir = validated_store_project_dir(&canonical_store_dir()?, project)?.join(date);
fs::create_dir_all(&dir)?;
let mut written = Vec::new();
for chunk in &chunks {
let seq = chunk.id.rsplit('_').next().unwrap_or("001");
let filename = format!("{}_{}-{}.md", time, agent, seq);
let path = dir.join(&filename);
let write_path = sanitize::validate_write_path(&path)?;
fs::write(&write_path, &chunk.text)?;
written.push(path);
}
Ok(written)
}
pub fn write_context_session_first(
project: &str,
agent: &str,
date: &str,
session_id: &str,
entries: &[TimelineEntry],
chunker_config: &ChunkerConfig,
kind: Option<Kind>,
) -> Result<Vec<PathBuf>> {
write_context_session_first_at(
&canonical_store_dir()?,
SessionWriteSpec {
project: Some(project),
agent,
date,
session_id,
kind,
},
entries,
chunker_config,
)
}
fn write_context_session_first_at(
root: &Path,
spec: SessionWriteSpec<'_>,
entries: &[TimelineEntry],
chunker_config: &ChunkerConfig,
) -> Result<Vec<PathBuf>> {
if entries.is_empty() {
return Ok(vec![]);
}
let kind = spec.kind.unwrap_or_else(|| classify_kind(entries));
let project_label = spec.project.unwrap_or(NON_REPOSITORY_CONTEXTS);
let chunks = chunker::chunk_entries(entries, project_label, spec.agent, chunker_config);
let date_dir = compact_date(spec.date);
let mut written = Vec::new();
for (idx, chunk) in chunks.iter().enumerate() {
if chunk_body_is_empty(&chunk.text) {
continue;
}
let chunk_num = (idx as u32) + 1;
let mut dir = root.join(&date_dir).join(kind.dir_name()).join(spec.agent);
if let Some(project) = spec.project {
dir = validated_store_project_dir(root, project)?
.join(&date_dir)
.join(kind.dir_name())
.join(spec.agent);
}
fs::create_dir_all(&dir)?;
let filename = session_basename(spec.date, spec.agent, spec.session_id, chunk_num);
let path = dir.join(&filename);
let write_path = sanitize::validate_write_path(&path)?;
fs::write(&write_path, &chunk.text)?;
write_chunk_sidecar(&path, chunk)?;
written.push(path);
}
Ok(written)
}
fn chunk_body_after_header(content: &str) -> &str {
let Some(rest) = content.strip_prefix("[project:") else {
return content;
};
let Some((_, body)) = rest.split_once('\n') else {
return "";
};
body.trim_start_matches(['\r', '\n'])
}
fn chunk_body_is_empty(content: &str) -> bool {
!chunk_body_after_header(content)
.lines()
.any(chunk_line_has_signal)
}
fn chunk_line_has_signal(line: &str) -> bool {
let line = line.trim();
if line.is_empty() {
return false;
}
if let Some((_, rest)) = line.split_once("] ")
&& let Some((_, message)) = rest.split_once(':')
{
return !message.trim().is_empty();
}
true
}
fn write_chunk_sidecar(path: &Path, chunk: &chunker::Chunk) -> Result<()> {
let sidecar_path = path.with_extension("meta.json");
let write_path = sanitize::validate_write_path(&sidecar_path)?;
let sidecar = chunker::ChunkMetadataSidecar::from(chunk);
fs::write(&write_path, serde_json::to_vec_pretty(&sidecar)?)?;
Ok(())
}
pub fn store_semantic_segments(
entries: &[TimelineEntry],
chunker_config: &ChunkerConfig,
) -> Result<StoreWriteSummary> {
store_semantic_segments_with_progress(entries, chunker_config, |_, _| {})
}
pub fn store_semantic_segments_with_progress<F>(
entries: &[TimelineEntry],
chunker_config: &ChunkerConfig,
progress: F,
) -> Result<StoreWriteSummary>
where
F: FnMut(usize, usize),
{
store_semantic_segments_at(&store_base_dir()?, entries, chunker_config, progress)
}
pub fn store_semantic_segments_at<F>(
base: &Path,
entries: &[TimelineEntry],
chunker_config: &ChunkerConfig,
mut progress: F,
) -> Result<StoreWriteSummary>
where
F: FnMut(usize, usize),
{
let mut summary = StoreWriteSummary::default();
if entries.is_empty() {
return Ok(summary);
}
let segments = semantic_segments(entries);
let total_segments = segments.len();
let mut index = load_index_at(base);
for (segment_idx, segment) in segments.into_iter().enumerate() {
let date = segment
.entries
.first()
.map(|entry| entry.timestamp.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| Utc::now().format("%Y-%m-%d").to_string());
let project = segment.project_label();
let before_count = chunker::chunk_entries(
&segment.entries,
segment.project_label().as_str(),
&segment.agent,
chunker_config,
)
.len();
let paths = write_semantic_segment_at(base, &segment, &date, chunker_config)?;
summary.skipped_empty_body += before_count.saturating_sub(paths.len());
update_index(
&mut index,
&project,
&segment.agent,
&compact_date(&date),
segment.entries.len(),
);
*summary
.project_summary
.entry(project)
.or_default()
.entry(segment.agent.clone())
.or_insert(0) += segment.entries.len();
summary.total_entries += segment.entries.len();
summary.written_paths.extend(paths);
progress(segment_idx + 1, total_segments);
}
save_index_at(base, &index)?;
Ok(summary)
}
fn write_semantic_segment_at(
base: &Path,
segment: &SemanticSegment,
date: &str,
chunker_config: &ChunkerConfig,
) -> Result<Vec<PathBuf>> {
let project = if segment.has_assertable_identity() {
segment.repo.as_ref().map(RepoIdentity::slug)
} else {
None
};
let root = if project.is_some() {
base.join(CANONICAL_STORE_DIRNAME)
} else {
base.join(NON_REPOSITORY_CONTEXTS)
};
write_context_session_first_at(
&root,
SessionWriteSpec {
project: project.as_deref(),
agent: &segment.agent,
date,
session_id: &segment.session_id,
kind: Some(segment.kind),
},
&segment.entries,
chunker_config,
)
}
pub fn scan_context_files() -> Result<Vec<StoredContextFile>> {
let base = store_base_dir()?;
scan_context_files_at(&base)
}
pub fn scan_context_files_raw() -> Result<Vec<StoredContextFile>> {
let base = store_base_dir()?;
scan_context_files_raw_at(&base)
}
pub fn scan_context_files_at(base: &Path) -> Result<Vec<StoredContextFile>> {
let base = sanitize::validate_dir_path(base)?;
let ignore = load_ignore_matcher_at(&base)?;
scan_context_files_with_ignore(&base, &ignore)
}
pub fn scan_context_files_project_at(
base: &Path,
project_filter: Option<&str>,
) -> Result<Vec<StoredContextFile>> {
let base = sanitize::validate_dir_path(base)?;
let Some(filter) = project_filter
.map(str::trim)
.filter(|filter| !filter.is_empty())
else {
return scan_context_files_at(&base);
};
let filter = filter.to_lowercase();
let ignore = load_ignore_matcher_at(&base)?;
let mut files = Vec::new();
let canonical_root = base.join(CANONICAL_STORE_DIRNAME);
if canonical_root.is_dir() {
scan_repo_store_filtered(&canonical_root, &ignore, &filter, &mut files)?;
}
let non_repo_root = base.join(NON_REPOSITORY_CONTEXTS);
if non_repo_root.is_dir() && NON_REPOSITORY_CONTEXTS.contains(&filter) {
scan_non_repository_store(&non_repo_root, &ignore, &mut files)?;
}
sort_context_files(&mut files);
Ok(files)
}
pub fn scan_context_files_raw_at(base: &Path) -> Result<Vec<StoredContextFile>> {
let base = sanitize::validate_dir_path(base)?;
let ignore = StoreIgnoreMatcher {
base: base.clone(),
rules: Vec::new(),
};
scan_context_files_with_ignore(&base, &ignore)
}
fn scan_context_files_with_ignore(
base: &Path,
ignore: &StoreIgnoreMatcher,
) -> Result<Vec<StoredContextFile>> {
let mut files = Vec::new();
let canonical_root = base.join(CANONICAL_STORE_DIRNAME);
if canonical_root.is_dir() {
scan_repo_store(&canonical_root, ignore, &mut files)?;
}
let non_repo_root = base.join(NON_REPOSITORY_CONTEXTS);
if non_repo_root.is_dir() {
scan_non_repository_store(&non_repo_root, ignore, &mut files)?;
}
sort_context_files(&mut files);
Ok(files)
}
fn sort_context_files(files: &mut [StoredContextFile]) {
files.sort_by(|left, right| {
left.date_compact
.cmp(&right.date_compact)
.then_with(|| left.project.cmp(&right.project))
.then_with(|| left.agent.cmp(&right.agent))
.then_with(|| left.session_id.cmp(&right.session_id))
.then_with(|| left.chunk.cmp(&right.chunk))
});
}
pub fn context_files_since(
cutoff: SystemTime,
project_filter: Option<&str>,
) -> Result<Vec<StoredContextFile>> {
context_files_since_at(&store_base_dir()?, cutoff, project_filter)
}
fn read_store_dir(path: &Path) -> Result<fs::ReadDir> {
let validated = sanitize::validate_dir_path(path)?;
fs::read_dir(&validated)
.with_context(|| format!("Failed to read store dir {}", validated.display()))
}
pub fn read_context_chunk(reference: &str, max_chars: Option<usize>) -> Result<ReadContextChunk> {
read_context_chunk_at(&store_base_dir()?, reference, max_chars)
}
pub fn read_context_chunk_at(
base: &Path,
reference: &str,
max_chars: Option<usize>,
) -> Result<ReadContextChunk> {
let base = sanitize::validate_dir_path(base)?;
let reference = reference.trim();
if reference.is_empty() {
return Err(anyhow!("chunk reference is required"));
}
let files = scan_context_files_at(&base)?;
let Some(file) = files
.into_iter()
.find(|file| stored_file_matches_reference(&base, file, reference))
else {
return Err(anyhow!("chunk not found: {reference}"));
};
let relative_path = file
.path
.strip_prefix(&base)
.unwrap_or(&file.path)
.to_string_lossy()
.to_string();
let path = sanitize::validate_read_path(&file.path)?;
let bytes = path.metadata().map(|meta| meta.len()).unwrap_or(0);
let content = sanitize::read_to_string_validated(&path)?;
let (content, truncated) = truncate_chars(content, max_chars);
Ok(ReadContextChunk {
path,
relative_path,
project: file.project,
date: file.date_iso,
kind: file.kind.dir_name().to_string(),
agent: file.agent,
session_id: file.session_id,
chunk: file.chunk,
bytes,
content,
truncated,
})
}
fn stored_file_matches_reference(base: &Path, file: &StoredContextFile, reference: &str) -> bool {
let path = file.path.to_string_lossy();
if path == reference {
return true;
}
let reference_path = Path::new(reference);
if reference_path.is_absolute() && reference_path == file.path {
return true;
}
if file
.path
.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name == reference)
{
return true;
}
if file
.path
.strip_prefix(base)
.ok()
.is_some_and(|relative| relative.to_string_lossy() == reference)
{
return true;
}
let compact_ref = format!(
"{}|{}|{}|{}|{}|{:03}",
file.project,
file.date_iso,
file.kind.dir_name(),
file.agent,
file.session_id,
file.chunk
);
compact_ref == reference
}
fn truncate_chars(content: String, max_chars: Option<usize>) -> (String, bool) {
let Some(max_chars) = max_chars else {
return (content, false);
};
let mut iter = content.chars();
let truncated: String = iter.by_ref().take(max_chars).collect();
let was_truncated = iter.next().is_some();
(truncated, was_truncated)
}
fn context_files_since_at(
base: &Path,
cutoff: SystemTime,
project_filter: Option<&str>,
) -> Result<Vec<StoredContextFile>> {
let filter = project_filter.map(|value| value.to_ascii_lowercase());
let cutoff_date = DateTime::<Utc>::from(cutoff).format("%Y-%m-%d").to_string();
let mut files = scan_context_files_at(base)?;
files.retain(|file| {
let matches_project = filter
.as_ref()
.is_none_or(|needle| file.project.to_ascii_lowercase().contains(needle));
let matches_cutoff = file.date_iso >= cutoff_date;
matches_project && matches_cutoff
});
Ok(files)
}
pub fn load_sidecar(chunk_path: &Path) -> Option<chunker::ChunkMetadataSidecar> {
let sidecar_path = chunk_path.with_extension("meta.json");
let sidecar_path = sanitize::validate_read_path(&sidecar_path).ok()?;
let content = fs::read_to_string(&sidecar_path).ok()?;
serde_json::from_str(&content).ok()
}
pub fn chunks_by_run_id(run_id: &str, project: Option<&str>) -> Result<Vec<StoredContextFile>> {
let cutoff = SystemTime::now() - std::time::Duration::from_secs(7 * 24 * 3600);
chunks_by_run_id_at(&store_base_dir()?, run_id, project, cutoff)
}
fn chunks_by_run_id_at(
base: &Path,
run_id: &str,
project: Option<&str>,
cutoff: SystemTime,
) -> Result<Vec<StoredContextFile>> {
let filter = project.map(|value| value.to_ascii_lowercase());
let cutoff_date = DateTime::<Utc>::from(cutoff).format("%Y-%m-%d").to_string();
let mut matched = Vec::new();
for file in scan_context_files_at(base)? {
let matches_project = filter
.as_ref()
.is_none_or(|needle| file.project.to_ascii_lowercase().contains(needle));
let matches_cutoff = file.date_iso >= cutoff_date;
if !matches_project || !matches_cutoff {
continue;
}
if load_sidecar(&file.path)
.and_then(|sidecar| sidecar.run_id)
.as_deref()
== Some(run_id)
{
matched.push(file);
}
}
Ok(matched)
}
fn scan_repo_store(
root: &Path,
ignore: &StoreIgnoreMatcher,
files: &mut Vec<StoredContextFile>,
) -> Result<()> {
for organization_entry in read_store_dir(root)?.filter_map(|entry| entry.ok()) {
let organization_path = organization_entry.path();
if !organization_path.is_dir() {
continue;
}
let organization = organization_entry.file_name().to_string_lossy().to_string();
for repository_entry in read_store_dir(&organization_path)?.filter_map(|entry| entry.ok()) {
let repository_path = repository_entry.path();
if !repository_path.is_dir() {
continue;
}
let repository = repository_entry.file_name().to_string_lossy().to_string();
let repo = RepoIdentity {
organization: organization.clone(),
repository: repository.clone(),
};
for date_entry in read_store_dir(&repository_path)?.filter_map(|entry| entry.ok()) {
let date_path = date_entry.path();
if !date_path.is_dir() {
continue;
}
let date_compact = date_entry.file_name().to_string_lossy().to_string();
for kind_entry in read_store_dir(&date_path)?.filter_map(|entry| entry.ok()) {
let kind_path = kind_entry.path();
if !kind_path.is_dir() {
continue;
}
let Some(kind) = Kind::parse(&kind_entry.file_name().to_string_lossy()) else {
continue;
};
for agent_entry in read_store_dir(&kind_path)?.filter_map(|entry| entry.ok()) {
let agent_path = agent_entry.path();
if !agent_path.is_dir() {
continue;
}
let agent = agent_entry.file_name().to_string_lossy().to_string();
let repo_slug = repo.slug();
let ctx = LeafScanContext {
repo: Some(repo.clone()),
project: &repo_slug,
date_compact: &date_compact,
kind,
agent: &agent,
};
collect_leaf_files(&agent_path, &ctx, ignore, files)?;
}
}
}
}
}
Ok(())
}
fn scan_repo_store_filtered(
root: &Path,
ignore: &StoreIgnoreMatcher,
project_filter: &str,
files: &mut Vec<StoredContextFile>,
) -> Result<()> {
for organization_entry in read_store_dir(root)?.filter_map(|entry| entry.ok()) {
let organization_path = organization_entry.path();
if !organization_path.is_dir() {
continue;
}
let organization = organization_entry.file_name().to_string_lossy().to_string();
for repository_entry in read_store_dir(&organization_path)?.filter_map(|entry| entry.ok()) {
let repository_path = repository_entry.path();
if !repository_path.is_dir() {
continue;
}
let repository = repository_entry.file_name().to_string_lossy().to_string();
let repo = RepoIdentity {
organization: organization.clone(),
repository: repository.clone(),
};
let repo_slug = repo.slug();
if !repo_slug.to_lowercase().contains(project_filter)
&& !repository.to_lowercase().contains(project_filter)
&& !organization.to_lowercase().contains(project_filter)
{
continue;
}
scan_single_repo_store(&repository_path, ignore, &repo, &repo_slug, files)?;
}
}
Ok(())
}
fn scan_single_repo_store(
repository_path: &Path,
ignore: &StoreIgnoreMatcher,
repo: &RepoIdentity,
repo_slug: &str,
files: &mut Vec<StoredContextFile>,
) -> Result<()> {
for date_entry in read_store_dir(repository_path)?.filter_map(|entry| entry.ok()) {
let date_path = date_entry.path();
if !date_path.is_dir() {
continue;
}
let date_compact = date_entry.file_name().to_string_lossy().to_string();
for kind_entry in read_store_dir(&date_path)?.filter_map(|entry| entry.ok()) {
let kind_path = kind_entry.path();
if !kind_path.is_dir() {
continue;
}
let Some(kind) = Kind::parse(&kind_entry.file_name().to_string_lossy()) else {
continue;
};
for agent_entry in read_store_dir(&kind_path)?.filter_map(|entry| entry.ok()) {
let agent_path = agent_entry.path();
if !agent_path.is_dir() {
continue;
}
let agent = agent_entry.file_name().to_string_lossy().to_string();
let ctx = LeafScanContext {
repo: Some(repo.clone()),
project: repo_slug,
date_compact: &date_compact,
kind,
agent: &agent,
};
collect_leaf_files(&agent_path, &ctx, ignore, files)?;
}
}
}
Ok(())
}
fn scan_non_repository_store(
root: &Path,
ignore: &StoreIgnoreMatcher,
files: &mut Vec<StoredContextFile>,
) -> Result<()> {
for date_entry in read_store_dir(root)?.filter_map(|entry| entry.ok()) {
let date_path = date_entry.path();
if !date_path.is_dir() {
continue;
}
let date_compact = date_entry.file_name().to_string_lossy().to_string();
for kind_entry in read_store_dir(&date_path)?.filter_map(|entry| entry.ok()) {
let kind_path = kind_entry.path();
if !kind_path.is_dir() {
continue;
}
let Some(kind) = Kind::parse(&kind_entry.file_name().to_string_lossy()) else {
continue;
};
for agent_entry in read_store_dir(&kind_path)?.filter_map(|entry| entry.ok()) {
let agent_path = agent_entry.path();
if !agent_path.is_dir() {
continue;
}
let agent = agent_entry.file_name().to_string_lossy().to_string();
let ctx = LeafScanContext {
repo: None,
project: NON_REPOSITORY_CONTEXTS,
date_compact: &date_compact,
kind,
agent: &agent,
};
collect_leaf_files(&agent_path, &ctx, ignore, files)?;
}
}
}
Ok(())
}
#[derive(Clone)]
struct LeafScanContext<'a> {
repo: Option<RepoIdentity>,
project: &'a str,
date_compact: &'a str,
kind: Kind,
agent: &'a str,
}
fn collect_leaf_files(
dir: &Path,
ctx: &LeafScanContext<'_>,
ignore: &StoreIgnoreMatcher,
files: &mut Vec<StoredContextFile>,
) -> Result<()> {
for file_entry in read_store_dir(dir)?.filter_map(|entry| entry.ok()) {
let path = file_entry.path();
let file_type = match file_entry.file_type() {
Ok(file_type) => file_type,
Err(_) => continue,
};
if file_type.is_symlink() || !file_type.is_file() {
continue;
}
if path
.extension()
.and_then(|ext| ext.to_str())
.is_none_or(|ext| ext != "md" && ext != "json")
{
continue;
}
if ignore.is_ignored(&path) {
continue;
}
let Some((session_id, chunk)) = parse_session_basename(
&file_entry.file_name().to_string_lossy(),
ctx.agent,
ctx.date_compact,
) else {
continue;
};
files.push(StoredContextFile {
path,
project: ctx.project.to_string(),
repo: ctx.repo.clone(),
date_compact: ctx.date_compact.to_string(),
date_iso: expand_compact_date(ctx.date_compact),
kind: ctx.kind,
agent: ctx.agent.to_string(),
session_id,
chunk,
});
}
Ok(())
}
fn parse_session_basename(name: &str, agent: &str, date_compact: &str) -> Option<(String, u32)> {
let ext = if name.ends_with(".md") {
".md"
} else if name.ends_with(".json") {
".json"
} else {
return None;
};
let stem = name.strip_suffix(ext)?;
let prefix = format!("{date_compact}_{agent}_");
let remainder = stem.strip_prefix(&prefix)?;
let (session_id, chunk_str) = remainder.rsplit_once('_')?;
if session_id.is_empty()
|| !session_id
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || ch == '-')
{
return None;
}
if chunk_str.len() != 3 || !chunk_str.chars().all(|ch| ch.is_ascii_digit()) {
return None;
}
let chunk = chunk_str.parse().ok()?;
Some((session_id.to_string(), chunk))
}
pub fn expand_compact_date(compact: &str) -> String {
let digits: String = compact.chars().filter(|ch| ch.is_ascii_digit()).collect();
if digits.len() >= 8 {
format!("{}-{}-{}", &digits[..4], &digits[4..6], &digits[6..8])
} else {
compact.to_string()
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum LegacyItemKind {
ContextBundle,
LooseFile,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum MigrationAction {
Rebuild,
RebuildAndSalvage,
Salvage,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum MigrationExecution {
Planned,
Executed,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct MigrationTotals {
pub total_items: usize,
pub total_legacy_files: usize,
pub rebuild_items: usize,
pub rebuild_and_salvage_items: usize,
pub salvage_items: usize,
pub unclassified_items: usize,
pub resolved_sources: usize,
pub missing_source_hints: usize,
pub ambiguous_source_hints: usize,
pub rebuilt_items: usize,
pub salvaged_items: usize,
pub rebuilt_paths: usize,
pub salvaged_paths: usize,
pub failed_items: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MigrationManifest {
pub generated_at: DateTime<Utc>,
pub legacy_root: String,
pub store_root: String,
pub manifest_path: String,
pub report_path: String,
pub dry_run: bool,
pub totals: MigrationTotals,
pub items: Vec<MigrationItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MigrationItem {
pub item_id: String,
pub legacy_kind: LegacyItemKind,
pub legacy_group: String,
pub legacy_files: Vec<String>,
pub agent_hint: Option<String>,
pub date_hint: Option<String>,
pub source_hints: Vec<String>,
pub existing_sources: Vec<String>,
pub missing_sources: Vec<String>,
pub ambiguous_sources: Vec<String>,
pub action: MigrationAction,
pub action_reason: String,
pub execution: MigrationExecution,
pub canonical_paths: Vec<String>,
pub salvage_paths: Vec<String>,
pub errors: Vec<String>,
}
impl MigrationItem {
fn from_plan(plan: &LegacyItemPlan) -> Self {
let mut legacy_files: Vec<String> = plan
.legacy_files
.iter()
.map(|path| path.display().to_string())
.collect();
legacy_files.sort();
let mut existing_sources: Vec<String> = plan
.resolved_sources
.iter()
.map(|source| source.path.display().to_string())
.collect();
existing_sources.sort();
let mut canonical_paths: Vec<String> = plan
.canonical_paths
.iter()
.map(|path| path.display().to_string())
.collect();
canonical_paths.sort();
let mut salvage_paths: Vec<String> = plan
.salvage_paths
.iter()
.map(|path| path.display().to_string())
.collect();
salvage_paths.sort();
Self {
item_id: plan.item_id.clone(),
legacy_kind: plan.legacy_kind,
legacy_group: plan.legacy_group.clone(),
legacy_files,
agent_hint: plan.agent_hint.clone(),
date_hint: plan.date_hint.clone(),
source_hints: plan.source_hints.clone(),
existing_sources,
missing_sources: plan.missing_sources.clone(),
ambiguous_sources: plan.ambiguous_sources.clone(),
action: plan.action,
action_reason: plan.action_reason.clone(),
execution: plan.execution,
canonical_paths,
salvage_paths,
errors: plan.errors.clone(),
}
}
}
impl MigrationTotals {
fn from_items(items: &[MigrationItem]) -> Self {
Self {
total_items: items.len(),
total_legacy_files: items.iter().map(|item| item.legacy_files.len()).sum(),
rebuild_items: items
.iter()
.filter(|item| item.action == MigrationAction::Rebuild)
.count(),
rebuild_and_salvage_items: items
.iter()
.filter(|item| item.action == MigrationAction::RebuildAndSalvage)
.count(),
salvage_items: items
.iter()
.filter(|item| item.action == MigrationAction::Salvage)
.count(),
unclassified_items: items
.iter()
.filter(|item| is_unclassified_item(item))
.count(),
resolved_sources: items.iter().map(|item| item.existing_sources.len()).sum(),
missing_source_hints: items.iter().map(|item| item.missing_sources.len()).sum(),
ambiguous_source_hints: items.iter().map(|item| item.ambiguous_sources.len()).sum(),
rebuilt_items: items
.iter()
.filter(|item| !item.canonical_paths.is_empty())
.count(),
salvaged_items: items
.iter()
.filter(|item| !item.salvage_paths.is_empty())
.count(),
rebuilt_paths: items.iter().map(|item| item.canonical_paths.len()).sum(),
salvaged_paths: items.iter().map(|item| item.salvage_paths.len()).sum(),
failed_items: items.iter().filter(|item| !item.errors.is_empty()).count(),
}
}
}
#[derive(Debug, Clone)]
struct LegacyItemPlan {
item_id: String,
legacy_kind: LegacyItemKind,
legacy_group: String,
legacy_files: Vec<PathBuf>,
agent_hint: Option<String>,
date_hint: Option<String>,
source_hints: Vec<String>,
resolved_sources: Vec<ResolvedSource>,
missing_sources: Vec<String>,
ambiguous_sources: Vec<String>,
action: MigrationAction,
action_reason: String,
execution: MigrationExecution,
canonical_paths: Vec<PathBuf>,
salvage_paths: Vec<PathBuf>,
errors: Vec<String>,
}
#[derive(Debug, Clone)]
struct LegacyBundleDescriptor {
bundle_key: PathBuf,
agent_hint: Option<String>,
date_hint: Option<String>,
}
#[derive(Debug, Clone, Copy)]
enum SourceFormat {
Claude,
Codex,
Gemini,
GeminiAntigravity,
Junie,
}
#[derive(Debug, Clone)]
struct ResolvedSource {
path: PathBuf,
format: SourceFormat,
}
#[derive(Debug, Clone, Default)]
struct SourceResolution {
source_hints: Vec<String>,
resolved_sources: Vec<ResolvedSource>,
missing_sources: Vec<String>,
ambiguous_sources: Vec<String>,
}
#[derive(Debug, Clone, Default)]
struct SourceProcessingOutcome {
canonical_paths: Vec<PathBuf>,
error: Option<String>,
}
#[derive(Debug, Clone, Default)]
struct SourceLocator {
index: HashMap<String, Vec<PathBuf>>,
}
#[derive(Debug, Clone)]
enum SourceLookupOutcome {
Missing,
Unique(PathBuf),
Ambiguous(Vec<PathBuf>),
}
pub fn run_migration(dry_run: bool) -> Result<MigrationManifest> {
run_migration_with_paths(dry_run, None, None)
}
pub fn run_migration_with_paths(
dry_run: bool,
legacy_root: Option<PathBuf>,
store_root: Option<PathBuf>,
) -> Result<MigrationManifest> {
let legacy_root = legacy_root.unwrap_or(legacy_store_base_dir()?);
let store_root = store_root.unwrap_or(store_base_dir()?);
let locator = SourceLocator::from_home();
let manifest = run_migration_at(&legacy_root, &store_root, dry_run, &locator)?;
print_migration_summary(&manifest);
if dry_run {
println!(
"[DRY RUN] Would write migration manifest to {}",
manifest.manifest_path
);
println!(
"[DRY RUN] Would write migration report to {}",
manifest.report_path
);
} else {
println!("Wrote migration manifest to {}", manifest.manifest_path);
println!("Wrote migration report to {}", manifest.report_path);
}
Ok(manifest)
}
fn run_migration_at(
legacy_root: &Path,
store_root: &Path,
dry_run: bool,
locator: &SourceLocator,
) -> Result<MigrationManifest> {
let normalized_legacy_root = if legacy_root.exists() {
sanitize::validate_dir_path(legacy_root)?
} else {
legacy_root.to_path_buf()
};
let mut items = collect_legacy_items(&normalized_legacy_root, locator)?;
if !dry_run {
execute_migration_items(&normalized_legacy_root, store_root, &mut items)?;
}
let manifest =
build_migration_manifest_at(&normalized_legacy_root, store_root, dry_run, &items);
if !dry_run {
write_migration_artifacts(&manifest)?;
}
Ok(manifest)
}
fn build_migration_manifest_at(
legacy_root: &Path,
store_root: &Path,
dry_run: bool,
items: &[LegacyItemPlan],
) -> MigrationManifest {
let items: Vec<MigrationItem> = items.iter().map(MigrationItem::from_plan).collect();
let totals = MigrationTotals::from_items(&items);
MigrationManifest {
generated_at: Utc::now(),
legacy_root: legacy_root.display().to_string(),
store_root: store_root.display().to_string(),
manifest_path: migration_manifest_path(store_root).display().to_string(),
report_path: migration_report_path(store_root).display().to_string(),
dry_run,
totals,
items,
}
}
fn write_migration_artifacts(manifest: &MigrationManifest) -> Result<()> {
let manifest_path = PathBuf::from(&manifest.manifest_path);
if let Some(parent) = manifest_path.parent() {
fs::create_dir_all(parent)?;
}
let manifest_json = serde_json::to_string_pretty(manifest)?;
let manifest_path = sanitize::validate_write_path(&manifest_path)?;
fs::write(&manifest_path, manifest_json)?;
let report_path = PathBuf::from(&manifest.report_path);
if let Some(parent) = report_path.parent() {
fs::create_dir_all(parent)?;
}
let report = render_migration_report(manifest);
let report_path = sanitize::validate_write_path(&report_path)?;
fs::write(&report_path, report)?;
Ok(())
}
fn render_migration_report(manifest: &MigrationManifest) -> String {
let mut report = String::new();
report.push_str("# AICX Migration Report\n\n");
report.push_str(&format!(
"- Generated at: `{}`\n",
manifest.generated_at.to_rfc3339()
));
report.push_str(&format!("- Dry run: `{}`\n", manifest.dry_run));
report.push_str(&format!("- Legacy root: `{}`\n", manifest.legacy_root));
report.push_str(&format!("- Store root: `{}`\n", manifest.store_root));
report.push_str(&format!("- Manifest: `{}`\n", manifest.manifest_path));
report.push_str(&format!("- Report: `{}`\n\n", manifest.report_path));
report.push_str("## Summary\n\n");
report.push_str(&format!(
"- Swept `{}` migration item(s) from `{}` legacy file(s)\n",
manifest.totals.total_items, manifest.totals.total_legacy_files
));
report.push_str(&format!(
"- Planned rebuild-only items: `{}`\n",
manifest.totals.rebuild_items
));
report.push_str(&format!(
"- Planned rebuild+salvage items: `{}`\n",
manifest.totals.rebuild_and_salvage_items
));
report.push_str(&format!(
"- Planned salvage-only items: `{}`\n",
manifest.totals.salvage_items
));
report.push_str(&format!(
"- Unclassified legacy items: `{}`\n",
manifest.totals.unclassified_items
));
report.push_str(&format!(
"- Resolved source matches: `{}`\n",
manifest.totals.resolved_sources
));
report.push_str(&format!(
"- Missing source hints: `{}`\n",
manifest.totals.missing_source_hints
));
report.push_str(&format!(
"- Ambiguous source hints: `{}`\n",
manifest.totals.ambiguous_source_hints
));
report.push_str(&format!(
"- Rebuilt items: `{}` (`{}` canonical path(s))\n",
manifest.totals.rebuilt_items, manifest.totals.rebuilt_paths
));
report.push_str(&format!(
"- Salvaged items: `{}` (`{}` preserved path(s))\n",
manifest.totals.salvaged_items, manifest.totals.salvaged_paths
));
report.push_str(&format!(
"- Items with execution errors: `{}`\n\n",
manifest.totals.failed_items
));
push_report_section(
&mut report,
if manifest.dry_run {
"Planned Rebuild"
} else {
"Rebuilt"
},
manifest.items.iter().filter(|item| {
matches!(
item.action,
MigrationAction::Rebuild | MigrationAction::RebuildAndSalvage
)
}),
);
push_report_section(
&mut report,
if manifest.dry_run {
"Planned Salvage"
} else {
"Salvaged"
},
manifest.items.iter().filter(|item| {
item.action == MigrationAction::Salvage
|| item.action == MigrationAction::RebuildAndSalvage
|| !item.salvage_paths.is_empty()
}),
);
push_report_section(
&mut report,
"Unclassified Legacy Items",
manifest
.items
.iter()
.filter(|item| is_unclassified_item(item)),
);
report
}
fn push_report_section<'a, I>(report: &mut String, title: &str, items: I)
where
I: Iterator<Item = &'a MigrationItem>,
{
report.push_str(&format!("## {}\n\n", title));
let mut wrote = false;
for item in items {
wrote = true;
report.push_str(&format!(
"- `{}` [{}]\n",
item.legacy_group, item.action_reason
));
if !item.existing_sources.is_empty() {
report.push_str(&format!(
" sources: `{}`\n",
item.existing_sources.join("`, `")
));
}
if !item.canonical_paths.is_empty() {
report.push_str(&format!(
" canonical: `{}`\n",
item.canonical_paths.join("`, `")
));
}
if !item.salvage_paths.is_empty() {
report.push_str(&format!(
" legacy: `{}`\n",
item.salvage_paths.join("`, `")
));
}
if !item.missing_sources.is_empty() {
report.push_str(&format!(
" missing: `{}`\n",
item.missing_sources.join("`, `")
));
}
if !item.ambiguous_sources.is_empty() {
report.push_str(&format!(
" ambiguous: `{}`\n",
item.ambiguous_sources.join("`, `")
));
}
if !item.errors.is_empty() {
report.push_str(&format!(" errors: `{}`\n", item.errors.join("`, `")));
}
}
if !wrote {
report.push_str("- none\n");
}
report.push('\n');
}
fn print_migration_summary(manifest: &MigrationManifest) {
println!(
"Legacy sweep: {} item(s) from {} file(s).",
manifest.totals.total_items, manifest.totals.total_legacy_files
);
println!("Legacy root: {}", manifest.legacy_root);
println!("Store root: {}", manifest.store_root);
println!(
"Plan: {} rebuild-only, {} rebuild+salvage, {} salvage-only, {} unclassified.",
manifest.totals.rebuild_items,
manifest.totals.rebuild_and_salvage_items,
manifest.totals.salvage_items,
manifest.totals.unclassified_items
);
println!(
"Source hints: {} resolved, {} missing, {} ambiguous.",
manifest.totals.resolved_sources,
manifest.totals.missing_source_hints,
manifest.totals.ambiguous_source_hints
);
if !manifest.dry_run {
println!(
"Executed: {} rebuilt item(s) -> {} canonical path(s); {} salvaged item(s) -> {} preserved path(s); {} item(s) with errors.",
manifest.totals.rebuilt_items,
manifest.totals.rebuilt_paths,
manifest.totals.salvaged_items,
manifest.totals.salvaged_paths,
manifest.totals.failed_items
);
}
}
fn collect_legacy_items(
legacy_root: &Path,
locator: &SourceLocator,
) -> Result<Vec<LegacyItemPlan>> {
let mut files = Vec::new();
collect_legacy_files(legacy_root, &mut files)?;
let mut bundles: BTreeMap<String, (LegacyBundleDescriptor, Vec<PathBuf>)> = BTreeMap::new();
let mut loose_files = Vec::new();
for file in files {
let relative = match file.strip_prefix(legacy_root) {
Ok(relative) => relative.to_path_buf(),
Err(_) => continue,
};
if let Some(descriptor) = legacy_bundle_descriptor(&relative) {
bundles
.entry(descriptor.bundle_key.display().to_string())
.or_insert_with(|| (descriptor.clone(), Vec::new()))
.1
.push(file);
} else {
loose_files.push(file);
}
}
let mut items = Vec::new();
for (_, (descriptor, mut bundle_files)) in bundles {
bundle_files.sort();
items.push(build_context_bundle_plan(
&bundle_files,
&descriptor,
locator,
)?);
}
loose_files.sort();
for file in loose_files {
let relative = file
.strip_prefix(legacy_root)
.unwrap_or(file.as_path())
.display()
.to_string();
items.push(LegacyItemPlan {
item_id: relative.clone(),
legacy_kind: LegacyItemKind::LooseFile,
legacy_group: relative,
legacy_files: vec![file],
agent_hint: None,
date_hint: None,
source_hints: Vec::new(),
resolved_sources: Vec::new(),
missing_sources: Vec::new(),
ambiguous_sources: Vec::new(),
action: MigrationAction::Salvage,
action_reason: "non_context_legacy_file".to_string(),
execution: MigrationExecution::Planned,
canonical_paths: Vec::new(),
salvage_paths: Vec::new(),
errors: Vec::new(),
});
}
items.sort_by(|left, right| left.legacy_group.cmp(&right.legacy_group));
Ok(items)
}
fn collect_legacy_files(root: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
if !root.exists() || !root.is_dir() {
return Ok(());
}
for entry in read_store_dir(root)?.filter_map(|entry| entry.ok()) {
let path = entry.path();
let name = entry.file_name().to_string_lossy().to_string();
if name.starts_with('.') {
continue;
}
let file_type = match entry.file_type() {
Ok(file_type) => file_type,
Err(_) => continue,
};
if file_type.is_dir() {
collect_legacy_files(&path, files)?;
} else if file_type.is_file() {
files.push(path);
}
}
Ok(())
}
fn legacy_bundle_descriptor(relative: &Path) -> Option<LegacyBundleDescriptor> {
let file_name = relative.file_name()?.to_str()?;
let stem = Path::new(file_name).file_stem()?.to_str()?;
let parent = relative.parent()?;
let date_hint = parent.file_name()?.to_str()?.to_string();
if !looks_like_iso_date(&date_hint) {
return None;
}
let bundle_re =
Regex::new(r"^(?P<time>\d{6})_(?P<agent>[A-Za-z0-9_]+)(?:-(?P<tail>\d{3}|context))?$")
.expect("legacy bundle regex should compile");
let captures = bundle_re.captures(stem)?;
let bundle_name = format!("{}_{}", &captures["time"], &captures["agent"]);
Some(LegacyBundleDescriptor {
bundle_key: parent.join(bundle_name),
agent_hint: Some(captures["agent"].to_string()),
date_hint: Some(date_hint),
})
}
fn build_context_bundle_plan(
bundle_files: &[PathBuf],
descriptor: &LegacyBundleDescriptor,
locator: &SourceLocator,
) -> Result<LegacyItemPlan> {
let resolution =
resolve_sources_for_legacy_files(bundle_files, descriptor.agent_hint.as_deref(), locator);
let has_resolved = !resolution.resolved_sources.is_empty();
let has_unresolved =
!resolution.missing_sources.is_empty() || !resolution.ambiguous_sources.is_empty();
let (action, action_reason) = if has_resolved && has_unresolved {
(
MigrationAction::RebuildAndSalvage,
"partial_source_recovery".to_string(),
)
} else if has_resolved {
(MigrationAction::Rebuild, "rebuild_from_source".to_string())
} else if !resolution.ambiguous_sources.is_empty() {
(
MigrationAction::Salvage,
"ambiguous_source_hints".to_string(),
)
} else if !resolution.source_hints.is_empty() {
(MigrationAction::Salvage, "missing_source".to_string())
} else {
(MigrationAction::Salvage, "no_source_hints".to_string())
};
Ok(LegacyItemPlan {
item_id: descriptor.bundle_key.display().to_string(),
legacy_kind: LegacyItemKind::ContextBundle,
legacy_group: descriptor.bundle_key.display().to_string(),
legacy_files: bundle_files.to_vec(),
agent_hint: descriptor.agent_hint.clone(),
date_hint: descriptor.date_hint.clone(),
source_hints: resolution.source_hints,
resolved_sources: resolution.resolved_sources,
missing_sources: resolution.missing_sources,
ambiguous_sources: resolution.ambiguous_sources,
action,
action_reason,
execution: MigrationExecution::Planned,
canonical_paths: Vec::new(),
salvage_paths: Vec::new(),
errors: Vec::new(),
})
}
fn resolve_sources_for_legacy_files(
bundle_files: &[PathBuf],
agent_hint: Option<&str>,
locator: &SourceLocator,
) -> SourceResolution {
let mut direct_candidates = BTreeSet::new();
let mut lookup_hints = BTreeSet::new();
let mut source_hints = BTreeSet::new();
for file in bundle_files {
let content = sanitize::read_to_string_validated(file).unwrap_or_default();
collect_source_hints_from_text(
&content,
agent_hint,
&mut direct_candidates,
&mut lookup_hints,
&mut source_hints,
);
}
let mut resolved_sources = BTreeMap::new();
let mut missing_sources = BTreeSet::new();
let mut ambiguous_sources = BTreeSet::new();
let mut handled_lookup_hints = BTreeSet::new();
for direct in direct_candidates {
source_hints.insert(direct.display().to_string());
if direct.exists() {
if let Some(format) = source_format_hint(&direct, agent_hint) {
register_lookup_keys(&direct, &mut handled_lookup_hints);
resolved_sources.insert(
direct.clone(),
ResolvedSource {
path: direct,
format,
},
);
} else {
missing_sources.insert(format!("unsupported: {}", direct.display()));
}
continue;
}
let lookup_key = direct
.file_name()
.and_then(|name| name.to_str())
.map(|name| name.to_ascii_lowercase());
match lookup_key
.as_deref()
.map(|key| locator.lookup(key))
.unwrap_or(SourceLookupOutcome::Missing)
{
SourceLookupOutcome::Unique(path) => {
if let Some(format) = source_format_hint(&path, agent_hint) {
register_lookup_keys(&direct, &mut handled_lookup_hints);
resolved_sources.insert(path.clone(), ResolvedSource { path, format });
} else {
missing_sources.insert(format!("unsupported: {}", direct.display()));
}
}
SourceLookupOutcome::Ambiguous(paths) => {
ambiguous_sources.insert(format!(
"{} -> {}",
direct.display(),
display_paths(&paths)
));
}
SourceLookupOutcome::Missing => {
missing_sources.insert(direct.display().to_string());
}
}
}
for hint in lookup_hints {
if handled_lookup_hints.contains(&hint) {
continue;
}
source_hints.insert(hint.clone());
match locator.lookup(&hint) {
SourceLookupOutcome::Unique(path) => {
if let Some(format) = source_format_hint(&path, agent_hint) {
resolved_sources.insert(path.clone(), ResolvedSource { path, format });
} else {
missing_sources.insert(format!("unsupported: {}", hint));
}
}
SourceLookupOutcome::Ambiguous(paths) => {
ambiguous_sources.insert(format!("{} -> {}", hint, display_paths(&paths)));
}
SourceLookupOutcome::Missing => {
missing_sources.insert(hint);
}
}
}
SourceResolution {
source_hints: source_hints.into_iter().collect(),
resolved_sources: resolved_sources.into_values().collect(),
missing_sources: missing_sources.into_iter().collect(),
ambiguous_sources: ambiguous_sources.into_iter().collect(),
}
}
fn collect_source_hints_from_text(
text: &str,
agent_hint: Option<&str>,
direct_candidates: &mut BTreeSet<PathBuf>,
lookup_hints: &mut BTreeSet<String>,
source_hints: &mut BTreeSet<String>,
) {
let absolute_path_re = Regex::new(
r"(?:file://)?(/(?:[A-Za-z0-9._~\-]+/)*[A-Za-z0-9._~\-]+(?:\.[A-Za-z0-9._~-]+)?)",
)
.expect("absolute legacy source hint regex should compile");
let tilde_path_re =
Regex::new(r"(~/(?:[A-Za-z0-9._~\-]+/)*[A-Za-z0-9._~\-]+(?:\.[A-Za-z0-9._~-]+)?)")
.expect("tilde legacy source hint regex should compile");
for capture in absolute_path_re.captures_iter(text) {
if let Some(raw) = capture.get(1) {
let candidate = PathBuf::from(raw.as_str());
if source_format_hint(&candidate, agent_hint).is_some() {
source_hints.insert(candidate.display().to_string());
direct_candidates.insert(candidate);
}
}
}
for capture in tilde_path_re.captures_iter(text) {
if let Some(raw) = capture.get(1) {
let expanded = expand_tilde(raw.as_str());
if source_format_hint(&expanded, agent_hint).is_some() {
source_hints.insert(raw.as_str().to_string());
direct_candidates.insert(expanded);
}
}
}
for hint_re in [
Regex::new(r"\brollout-[A-Za-z0-9T._:-]+\.jsonl\b")
.expect("codex rollout hint regex should compile"),
Regex::new(r"\bsession-[A-Za-z0-9._-]+\.json\b")
.expect("gemini session hint regex should compile"),
Regex::new(r"\b[0-9a-fA-F-]{16,}\.pb\b").expect("antigravity pb hint regex should compile"),
Regex::new(r"\b[0-9a-fA-F-]{16,}\.jsonl\b")
.expect("claude jsonl hint regex should compile"),
] {
for capture in hint_re.captures_iter(text) {
if let Some(raw) = capture.get(0) {
let hint = raw.as_str().to_ascii_lowercase();
source_hints.insert(raw.as_str().to_string());
lookup_hints.insert(hint);
}
}
}
}
fn source_format_hint(path: &Path, agent_hint: Option<&str>) -> Option<SourceFormat> {
let path_str = path.to_string_lossy().to_ascii_lowercase();
let file_name = path
.file_name()
.and_then(|name| name.to_str())
.map(|name| name.to_ascii_lowercase())
.unwrap_or_default();
let extension = path
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_ascii_lowercase());
match agent_hint {
Some("claude") => {
if extension.as_deref() == Some("jsonl")
|| extension.as_deref() == Some("output")
|| path_str.contains("/.claude/")
{
return Some(SourceFormat::Claude);
}
return None;
}
Some("codex") => {
if extension.as_deref() == Some("jsonl")
|| file_name == "history.jsonl"
|| file_name.starts_with("rollout-")
|| path_str.contains("/.codex/")
{
return Some(SourceFormat::Codex);
}
return None;
}
Some("gemini") => {
if extension.as_deref() == Some("pb")
|| path_str.contains("/antigravity/brain/")
|| path_str.contains("/antigravity/conversations/")
{
return Some(SourceFormat::GeminiAntigravity);
}
if extension.as_deref() == Some("json")
&& (file_name.starts_with("session-") || path_str.contains("/.gemini/tmp/"))
{
return Some(SourceFormat::Gemini);
}
return None;
}
Some("junie") => {
if extension.as_deref() == Some("jsonl")
&& file_name == "events.jsonl"
&& (path_str.contains("/.junie/sessions/")
|| path
.parent()
.and_then(|parent| parent.file_name())
.and_then(|name| name.to_str())
.is_some_and(|name| name.starts_with("session-")))
{
return Some(SourceFormat::Junie);
}
return None;
}
Some(_) => return None,
None => {}
}
if extension.as_deref() == Some("pb")
|| path_str.contains("/antigravity/brain/")
|| path_str.contains("/antigravity/conversations/")
{
return Some(SourceFormat::GeminiAntigravity);
}
if extension.as_deref() == Some("json")
&& (file_name.starts_with("session-") || path_str.contains("/.gemini/tmp/"))
{
return Some(SourceFormat::Gemini);
}
if extension.as_deref() == Some("jsonl")
&& file_name == "events.jsonl"
&& (path_str.contains("/.junie/sessions/")
|| path
.parent()
.and_then(|parent| parent.file_name())
.and_then(|name| name.to_str())
.is_some_and(|name| name.starts_with("session-")))
{
return Some(SourceFormat::Junie);
}
if extension.as_deref() == Some("jsonl")
&& (file_name.starts_with("rollout-")
|| file_name == "history.jsonl"
|| path_str.contains("/.codex/"))
{
return Some(SourceFormat::Codex);
}
if (extension.as_deref() == Some("jsonl") || extension.as_deref() == Some("output"))
&& path_str.contains("/.claude/")
{
return Some(SourceFormat::Claude);
}
None
}
fn execute_migration_items(
legacy_root: &Path,
store_root: &Path,
items: &mut [LegacyItemPlan],
) -> Result<()> {
let chunker_config = ChunkerConfig::default();
let mut source_cache: HashMap<PathBuf, SourceProcessingOutcome> = HashMap::new();
for item in items {
item.execution = MigrationExecution::Executed;
if matches!(
item.action,
MigrationAction::Rebuild | MigrationAction::RebuildAndSalvage
) {
for source in item.resolved_sources.clone() {
let outcome = source_cache.entry(source.path.clone()).or_insert_with(|| {
rebuild_source_into_store(store_root, &source, &chunker_config)
});
for path in &outcome.canonical_paths {
push_unique_path(&mut item.canonical_paths, path.clone());
}
if let Some(error) = &outcome.error {
item.errors
.push(format!("{}: {}", source.path.display(), error));
}
}
}
if item.action == MigrationAction::Rebuild && item.canonical_paths.is_empty() {
item.errors
.push("No canonical paths were written from resolved sources.".to_string());
}
let should_salvage = matches!(
item.action,
MigrationAction::Salvage | MigrationAction::RebuildAndSalvage
) || !item.errors.is_empty()
|| (item.action == MigrationAction::Rebuild && item.canonical_paths.is_empty());
if should_salvage {
let salvaged = preserve_legacy_item(legacy_root, store_root, item)?;
item.salvage_paths = salvaged;
}
}
Ok(())
}
fn rebuild_source_into_store(
store_root: &Path,
source: &ResolvedSource,
chunker_config: &ChunkerConfig,
) -> SourceProcessingOutcome {
match rebuild_source_into_store_impl(store_root, source, chunker_config) {
Ok(canonical_paths) => SourceProcessingOutcome {
canonical_paths,
error: None,
},
Err(error) => SourceProcessingOutcome {
canonical_paths: Vec::new(),
error: Some(error.to_string()),
},
}
}
fn rebuild_source_into_store_impl(
store_root: &Path,
source: &ResolvedSource,
chunker_config: &ChunkerConfig,
) -> Result<Vec<PathBuf>> {
let entries = extract_entries_from_source(source)?;
if entries.is_empty() {
anyhow::bail!("source produced no timeline entries");
}
let summary = store_semantic_segments_at(store_root, &entries, chunker_config, |_, _| {})?;
Ok(summary.written_paths)
}
fn extract_entries_from_source(source: &ResolvedSource) -> Result<Vec<TimelineEntry>> {
let config = ExtractionConfig {
project_filter: Vec::new(),
cutoff: Utc
.timestamp_opt(0, 0)
.single()
.expect("unix epoch should be representable"),
include_assistant: true,
watermark: None,
};
match source.format {
SourceFormat::Claude => sources::extract_claude_file(&source.path, &config),
SourceFormat::Codex => sources::extract_codex_file(&source.path, &config),
SourceFormat::Gemini => sources::extract_gemini_file(&source.path, &config),
SourceFormat::GeminiAntigravity => {
sources::extract_gemini_antigravity_file(&source.path, &config)
}
SourceFormat::Junie => sources::extract_junie_file(&source.path, &config),
}
}
fn preserve_legacy_item(
legacy_root: &Path,
store_root: &Path,
item: &LegacyItemPlan,
) -> Result<Vec<PathBuf>> {
let salvage_root = legacy_salvage_dir(store_root);
fs::create_dir_all(&salvage_root)?;
let mut preserved = Vec::new();
for legacy_file in &item.legacy_files {
let legacy_file = sanitize::validate_read_path(legacy_file)?;
let relative = legacy_file
.strip_prefix(legacy_root)
.unwrap_or(legacy_file.as_path());
let destination = salvage_root.join(relative);
if let Some(parent) = destination.parent() {
fs::create_dir_all(parent)?;
}
let destination = sanitize::validate_write_path(&destination)?;
fs::copy(legacy_file, &destination)?;
preserved.push(destination);
}
let provenance_path = provenance_path_for_item(&salvage_root, legacy_root, item);
if let Some(parent) = provenance_path.parent() {
fs::create_dir_all(parent)?;
}
let provenance_path = sanitize::validate_write_path(&provenance_path)?;
let provenance = serde_json::json!({
"generated_at": Utc::now().to_rfc3339(),
"item_id": item.item_id.clone(),
"legacy_group": item.legacy_group.clone(),
"action": item.action,
"action_reason": item.action_reason.clone(),
"legacy_files": item.legacy_files.iter().map(|path| path.display().to_string()).collect::<Vec<_>>(),
"source_hints": item.source_hints.clone(),
"existing_sources": item.resolved_sources.iter().map(|source| source.path.display().to_string()).collect::<Vec<_>>(),
"missing_sources": item.missing_sources.clone(),
"ambiguous_sources": item.ambiguous_sources.clone(),
"errors": item.errors.clone(),
});
fs::write(&provenance_path, serde_json::to_string_pretty(&provenance)?)?;
preserved.push(provenance_path);
Ok(preserved)
}
fn provenance_path_for_item(
salvage_root: &Path,
legacy_root: &Path,
item: &LegacyItemPlan,
) -> PathBuf {
let anchor = if item.legacy_kind == LegacyItemKind::ContextBundle {
PathBuf::from(&item.legacy_group)
} else {
item.legacy_files
.first()
.and_then(|path| path.strip_prefix(legacy_root).ok())
.map(Path::to_path_buf)
.unwrap_or_else(|| PathBuf::from(&item.legacy_group))
};
let parent = anchor.parent().map(Path::to_path_buf).unwrap_or_default();
let file_name = anchor
.file_name()
.map(|name| name.to_string_lossy().to_string())
.unwrap_or_else(|| "legacy-item".to_string());
salvage_root
.join(parent)
.join(format!("{}.migration-provenance.json", file_name))
}
fn push_unique_path(paths: &mut Vec<PathBuf>, candidate: PathBuf) {
if !paths.iter().any(|path| path == &candidate) {
paths.push(candidate);
}
}
fn is_unclassified_item(item: &MigrationItem) -> bool {
item.legacy_kind == LegacyItemKind::LooseFile
|| item.action_reason == "no_source_hints"
|| item.action_reason == "non_context_legacy_file"
}
fn looks_like_iso_date(value: &str) -> bool {
value.len() == 10
&& value.chars().enumerate().all(|(idx, ch)| match idx {
4 | 7 => ch == '-',
_ => ch.is_ascii_digit(),
})
}
fn expand_tilde(raw: &str) -> PathBuf {
if let Some(rest) = raw.strip_prefix("~/")
&& let Some(home) = dirs::home_dir()
{
return home.join(rest);
}
PathBuf::from(raw)
}
fn display_paths(paths: &[PathBuf]) -> String {
paths
.iter()
.map(|path| path.display().to_string())
.collect::<Vec<_>>()
.join(", ")
}
fn register_lookup_keys(path: &Path, handled_lookup_hints: &mut BTreeSet<String>) {
if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
handled_lookup_hints.insert(file_name.to_ascii_lowercase());
}
if let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) {
handled_lookup_hints.insert(stem.to_ascii_lowercase());
}
}
impl SourceLocator {
fn from_home() -> Self {
let Some(home) = dirs::home_dir() else {
return Self::default();
};
let mut locator = Self::default();
locator.index_recursive(home.join(".claude").join("projects"), |path| {
matches!(
path.extension().and_then(|ext| ext.to_str()),
Some("jsonl" | "output")
)
});
locator.index_recursive(home.join(".codex").join("sessions"), |path| {
path.extension().and_then(|ext| ext.to_str()) == Some("jsonl")
});
locator.index_file(home.join(".codex").join("history.jsonl"));
locator.index_recursive(home.join(".gemini").join("tmp"), |path| {
path.extension().and_then(|ext| ext.to_str()) == Some("json")
&& path
.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name.starts_with("session-"))
});
locator.index_recursive(
home.join(".gemini")
.join("antigravity")
.join("conversations"),
|path| path.extension().and_then(|ext| ext.to_str()) == Some("pb"),
);
locator.index_directories(home.join(".gemini").join("antigravity").join("brain"));
locator
}
fn lookup(&self, hint: &str) -> SourceLookupOutcome {
let key = hint.to_ascii_lowercase();
let Some(paths) = self.index.get(&key) else {
return SourceLookupOutcome::Missing;
};
let unique: Vec<PathBuf> = paths
.iter()
.cloned()
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
match unique.as_slice() {
[] => SourceLookupOutcome::Missing,
[only] => SourceLookupOutcome::Unique(only.clone()),
many => SourceLookupOutcome::Ambiguous(many.to_vec()),
}
}
fn index_recursive<F>(&mut self, root: PathBuf, include: F)
where
F: Fn(&Path) -> bool + Copy,
{
if !root.exists() {
return;
}
let Ok(read_dir) = fs::read_dir(&root) else {
return;
};
for entry in read_dir.flatten() {
let path = entry.path();
if path.is_dir() {
self.index_recursive(path, include);
continue;
}
if include(&path) {
self.add_path(&path);
}
}
}
fn index_directories(&mut self, root: PathBuf) {
if !root.exists() {
return;
}
let Ok(read_dir) = fs::read_dir(&root) else {
return;
};
for entry in read_dir.flatten() {
let path = entry.path();
if path.is_dir() {
self.add_path(&path);
}
}
}
fn index_file(&mut self, path: PathBuf) {
if path.exists() {
self.add_path(&path);
}
}
fn add_path(&mut self, path: &Path) {
let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
return;
};
let lower_name = name.to_ascii_lowercase();
self.index
.entry(lower_name.clone())
.or_default()
.push(path.to_path_buf());
if let Some(stem) = path.file_stem().and_then(|stem| stem.to_str()) {
let lower_stem = stem.to_ascii_lowercase();
if lower_stem != lower_name {
self.index
.entry(lower_stem)
.or_default()
.push(path.to_path_buf());
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
use filetime::{FileTime, set_file_mtime};
use std::env;
#[test]
fn test_store_base_dir() {
if let Ok(path) = store_base_dir() {
assert!(path.to_string_lossy().contains(".aicx"));
}
}
#[test]
fn test_chunks_dir() {
if let Ok(path) = chunks_dir() {
assert!(path.to_string_lossy().contains(".aicx"));
assert!(path.to_string_lossy().contains("chunks"));
}
}
#[test]
fn test_get_context_path_new_layout() {
if let Ok(path) = get_context_path("CodeScribe", "claude", "2026-01-22", "143005") {
let s = path.to_string_lossy();
assert!(s.contains("CodeScribe"));
assert!(s.contains("2026-01-22"));
assert!(s.ends_with("143005_claude-context.md"));
}
}
#[test]
fn test_get_context_json_path_new_layout() {
if let Ok(path) = get_context_json_path("CodeScribe", "claude", "2026-01-22", "143005") {
let s = path.to_string_lossy();
assert!(s.contains("CodeScribe"));
assert!(s.contains("2026-01-22"));
assert!(s.ends_with("143005_claude-context.json"));
}
}
#[test]
fn validated_store_project_dir_rejects_junk_bucket_segments() {
let root = retrieval_test_root("invalid-bucket-segments");
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&root).unwrap();
let bad = "VetCoders/vibecrafted.git`";
let err = validated_store_project_dir(&root, bad).expect_err("invalid repo bucket");
assert!(
err.to_string()
.contains("invalid canonical store project bucket")
);
assert!(!root.join("VetCoders").join("vibecrafted.git`").exists());
let bad = "VetCoders/loctree\n\n**AICX";
assert!(validated_store_project_dir(&root, bad).is_err());
assert!(!root.join("VetCoders").join("loctree\n\n**AICX").exists());
let _ = fs::remove_dir_all(&root);
}
#[test]
fn session_first_write_blocks_invalid_project_before_mkdir() {
let root = retrieval_test_root("invalid-session-write");
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&root).unwrap();
let entries = vec![semantic_entry(
(2026, 5, 6, 11, 0, 0),
"sess-invalid-bucket",
"user",
"This must not create a junk corpus bucket.",
None,
)];
let err = write_context_session_first_at(
&root,
SessionWriteSpec {
project: Some("VetCoders/vc-skills.git\"><span"),
agent: "codex",
date: "2026-05-06",
session_id: "sess-invalid-bucket",
kind: Some(Kind::Conversations),
},
&entries,
&ChunkerConfig::default(),
)
.expect_err("invalid repo segment should fail before mkdir");
assert!(
err.to_string()
.contains("invalid canonical store project bucket")
);
assert!(
!root
.join("VetCoders")
.join("vc-skills.git\"><span")
.exists()
);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn test_write_context_creates_both_files() {
let tmp = env::temp_dir().join("ai-ctx-test-store-new");
let _ = fs::remove_dir_all(&tmp);
let date_dir = tmp.join("TestProj").join("2026-01-22");
fs::create_dir_all(&date_dir).unwrap();
let entries = vec![
TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 1, 22, 14, 30, 5).unwrap(),
agent: "claude".to_string(),
session_id: "sess-1".to_string(),
role: "user".to_string(),
message: "hello world".to_string(),
branch: None,
cwd: None,
frame_kind: None,
},
TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 1, 22, 14, 30, 12).unwrap(),
agent: "claude".to_string(),
session_id: "sess-1".to_string(),
role: "assistant".to_string(),
message: "hi there\nsecond line".to_string(),
branch: None,
cwd: None,
frame_kind: None,
},
];
let md_path = date_dir.join("143005_claude-context.md");
let mut content = String::new();
content.push_str("# TestProj | claude | 2026-01-22\n\n");
for entry in &entries {
let ts = entry.timestamp.format("%Y-%m-%d %H:%M:%S UTC");
content.push_str(&format!("### {} | {}\n", ts, entry.role));
for line in entry.message.lines() {
content.push_str(&format!("> {}\n", line));
}
content.push('\n');
}
fs::write(&md_path, &content).unwrap();
let written = fs::read_to_string(&md_path).unwrap();
assert!(written.contains("# TestProj | claude | 2026-01-22"));
assert!(written.contains("### 2026-01-22 14:30:05 UTC | user"));
assert!(written.contains("> hello world"));
assert!(written.contains("> hi there"));
assert!(written.contains("> second line"));
let json_path = date_dir.join("143005_claude-context.json");
let json_content = serde_json::to_string_pretty(&entries).unwrap();
fs::write(&json_path, &json_content).unwrap();
assert!(json_path.exists());
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_index_serialization_roundtrip() {
let mut index = StoreIndex::default();
update_index(&mut index, "CodeScribe", "claude", "2026-01-22", 42);
update_index(&mut index, "CodeScribe", "gemini", "2026-01-20", 10);
update_index(&mut index, "vista", "claude", "2026-01-21", 5);
let json = serde_json::to_string_pretty(&index).unwrap();
let restored: StoreIndex = serde_json::from_str(&json).unwrap();
assert_eq!(restored.projects.len(), 2);
assert!(restored.projects.contains_key("CodeScribe"));
assert!(restored.projects.contains_key("vista"));
let cs = &restored.projects["CodeScribe"];
assert_eq!(cs.agents["claude"].total_entries, 42);
assert_eq!(cs.agents["claude"].dates, vec!["2026-01-22"]);
assert_eq!(cs.agents["gemini"].total_entries, 10);
}
#[test]
fn test_update_index() {
let mut index = StoreIndex::default();
update_index(&mut index, "proj", "claude", "2026-01-20", 10);
update_index(&mut index, "proj", "claude", "2026-01-21", 5);
update_index(&mut index, "proj", "claude", "2026-01-20", 3);
let agent_idx = &index.projects["proj"].agents["claude"];
assert_eq!(agent_idx.total_entries, 18); assert_eq!(agent_idx.dates, vec!["2026-01-20", "2026-01-21"]);
}
#[test]
fn test_list_stored_projects() {
let mut index = StoreIndex::default();
update_index(&mut index, "zebra", "claude", "2026-01-01", 1);
update_index(&mut index, "alpha", "codex", "2026-01-01", 1);
update_index(&mut index, "middle", "gemini", "2026-01-01", 1);
let projects = list_stored_projects(&index);
assert_eq!(projects, vec!["alpha", "middle", "zebra"]); }
#[test]
fn test_update_index_deduplicates_dates() {
let mut index = StoreIndex::default();
update_index(&mut index, "proj", "claude", "2026-01-22", 5);
update_index(&mut index, "proj", "claude", "2026-01-22", 3);
update_index(&mut index, "proj", "claude", "2026-01-22", 7);
let dates = &index.projects["proj"].agents["claude"].dates;
assert_eq!(dates.len(), 1); assert_eq!(dates[0], "2026-01-22");
}
fn make_entry(role: &str, message: &str) -> TimelineEntry {
TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 3, 21, 10, 0, 0).unwrap(),
agent: "claude".to_string(),
session_id: "test-session-abc123".to_string(),
role: role.to_string(),
message: message.to_string(),
branch: None,
cwd: None,
frame_kind: None,
}
}
#[test]
fn test_kind_dir_names() {
assert_eq!(Kind::Conversations.dir_name(), "conversations");
assert_eq!(Kind::Plans.dir_name(), "plans");
assert_eq!(Kind::Reports.dir_name(), "reports");
assert_eq!(Kind::Other.dir_name(), "other");
}
#[test]
fn test_kind_parse_roundtrip() {
for kind in [Kind::Conversations, Kind::Plans, Kind::Reports, Kind::Other] {
let parsed = Kind::parse(kind.dir_name()).unwrap();
assert_eq!(parsed, kind);
}
assert_eq!(Kind::parse("conversation"), Some(Kind::Conversations));
assert_eq!(Kind::parse("plan"), Some(Kind::Plans));
assert_eq!(Kind::parse("report"), Some(Kind::Reports));
assert_eq!(Kind::parse("PLANS"), Some(Kind::Plans));
assert_eq!(Kind::parse("Reports"), Some(Kind::Reports));
assert_eq!(Kind::parse("bogus"), None);
}
#[test]
fn test_kind_serde_roundtrip() {
let kind = Kind::Conversations;
let json = serde_json::to_string(&kind).unwrap();
assert_eq!(json, "\"conversations\"");
let restored: Kind = serde_json::from_str(&json).unwrap();
assert_eq!(restored, Kind::Conversations);
}
#[test]
fn test_kind_default_is_other() {
assert_eq!(Kind::default(), Kind::Other);
}
#[test]
fn test_classify_kind_empty_is_other() {
assert_eq!(classify_kind(&[]), Kind::Other);
}
#[test]
fn test_classify_kind_conversation_first() {
let entries = vec![
make_entry("user", "Can you help me fix this bug?"),
make_entry("assistant", "Sure, let me look at the code."),
make_entry("user", "It crashes on startup."),
make_entry("assistant", "I see the issue in the initialization."),
];
assert_eq!(classify_kind(&entries), Kind::Conversations);
}
#[test]
fn test_classify_kind_plan() {
let entries = vec![
make_entry("user", "Plan the migration"),
make_entry(
"assistant",
"## Plan\n\nStep 1: Audit current schema\nStep 2: Create migration scripts\nStep 3: Test on staging\nAction items for the team.",
),
make_entry("user", "Looks good, what are the milestones?"),
make_entry(
"assistant",
"Here are the milestones and acceptance criteria for each phase.",
),
];
assert_eq!(classify_kind(&entries), Kind::Plans);
}
#[test]
fn test_classify_kind_report() {
let entries = vec![
make_entry("user", "Review the PR"),
make_entry(
"assistant",
"## Findings\n\nThe code review reveals several issues.\n## Summary\nOverall quality is good.\n## Recommendations\nAdd more tests.",
),
make_entry("user", "Any metrics?"),
make_entry(
"assistant",
"## Metrics\nCoverage: 85%. Test results show 3 failures.\n## Conclusion\nReady after fixes.",
),
];
assert_eq!(classify_kind(&entries), Kind::Reports);
}
#[test]
fn test_classify_kind_conservative_fallback() {
let entries = vec![
make_entry("user", "What do you think about this approach?"),
make_entry("assistant", "It could work. Let me think about the plan."),
];
assert_eq!(classify_kind(&entries), Kind::Conversations);
}
#[test]
fn test_classify_kind_user_keywords_ignored() {
let entries = vec![
make_entry(
"user",
"## Plan\nStep 1: do this\nStep 2: do that\nStep 3: done\nAction items here",
),
make_entry("assistant", "Understood, I'll help with that."),
];
assert_eq!(classify_kind(&entries), Kind::Conversations);
}
#[test]
fn test_session_basename_format() {
let name = session_basename("2026-03-21", "claude", "abc123def456", 1);
assert_eq!(name, "2026_0321_claude_abc123def456_001.md");
}
#[test]
fn test_session_basename_truncates_long_session_id() {
let long_id = "a1b2c3d4-e5f6-7890-abcd-ef1234567890";
let name = session_basename("2026-03-21", "claude", long_id, 3);
assert!(name.contains("a1b2c3d4-e5f"));
assert!(name.ends_with("_003.md"));
assert!(!name.contains("ef1234567890"));
}
#[test]
fn test_session_basename_chunk_ordering() {
let a = session_basename("2026-03-21", "claude", "sess1", 1);
let b = session_basename("2026-03-21", "claude", "sess1", 2);
let c = session_basename("2026-03-21", "claude", "sess1", 10);
assert!(a < b);
assert!(b < c);
}
#[test]
fn test_session_basename_date_ordering() {
let a = session_basename("2026-03-20", "claude", "sess1", 1);
let b = session_basename("2026-03-21", "claude", "sess1", 1);
assert!(a < b, "Earlier date should sort first: {} vs {}", a, b);
}
#[test]
fn test_session_basename_self_describing() {
let name = session_basename("2026-03-21", "codex", "task-abc-123", 2);
assert!(name.contains("2026_0321"), "Must contain date");
assert!(name.contains("codex"), "Must contain agent");
assert!(
name.contains("task-abc-12"),
"Must contain session fragment"
);
assert!(name.contains("002"), "Must contain chunk number");
assert!(name.ends_with(".md"), "Must have .md extension");
}
#[test]
fn test_compact_date() {
assert_eq!(compact_date("2026-03-21"), "2026_0321");
assert_eq!(compact_date("2026-01-01"), "2026_0101");
assert_eq!(compact_date("2026_0321"), "2026_0321");
}
#[test]
fn test_truncate_session_id_short() {
assert_eq!(truncate_session_id("abc"), "abc");
assert_eq!(truncate_session_id(""), "");
}
#[test]
fn test_truncate_session_id_strips_non_alnum() {
assert_eq!(truncate_session_id("a/b:c!d@e#f"), "abcdef");
}
#[test]
fn test_chunk_uniqueness_same_session_day() {
let mut names = std::collections::HashSet::new();
for chunk in 1..=20 {
let name = session_basename("2026-03-21", "claude", "session-xyz", chunk);
assert!(names.insert(name.clone()), "Duplicate basename: {}", name);
}
}
#[test]
fn test_chunk_uniqueness_different_sessions_same_day() {
let a = session_basename("2026-03-21", "claude", "session-aaa", 1);
let b = session_basename("2026-03-21", "claude", "session-bbb", 1);
assert_ne!(a, b, "Different sessions must produce different basenames");
}
#[test]
fn test_chunk_uniqueness_different_agents_same_session() {
let a = session_basename("2026-03-21", "claude", "session-xyz", 1);
let b = session_basename("2026-03-21", "codex", "session-xyz", 1);
assert_ne!(a, b, "Different agents must produce different basenames");
}
#[test]
fn output_session_first_path_structure() {
let date = "2026-03-21";
let kind = Kind::Conversations;
let agent = "claude";
let project = "ai-contexters";
let expected_subpath = format!("{}/{}/{}/{}", project, date, kind.dir_name(), agent);
let basename = session_basename(date, agent, "sess-abc123", 1);
let full_subpath = format!("{}/{}", expected_subpath, basename);
assert!(full_subpath.contains("conversations/claude"));
assert!(full_subpath.ends_with("2026_0321_claude_sess-abc123_001.md"));
}
#[test]
fn canonical_store_writes_sidecar_with_frontmatter_telemetry() {
let root = retrieval_test_root("telemetry-sidecar");
let _ = fs::remove_dir_all(&root);
let entries = vec![TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 3, 27, 10, 0, 0).unwrap(),
agent: "codex".to_string(),
session_id: "sess-telemetry".to_string(),
role: "assistant".to_string(),
message: "---\nrun_id: mrbl-001\nprompt_id: api-redesign_20260327\nmodel: gpt-5.4\nstarted_at: 2026-03-27T10:00:00Z\ncompleted_at: 2026-03-27T10:01:00Z\ntoken_usage: 1234\nfindings_count: 4\nframe_kind: agent_reply\nphase: implement\nmode: session-first\nskill_code: vc-workflow\nframework_version: 2026-03\n---\n## Findings\nTelemetry wiring landed.\n".to_string(),
branch: None,
cwd: None,
frame_kind: None,
}];
let written = write_context_session_first_at(
&root.join("store"),
SessionWriteSpec {
project: Some("VetCoders/ai-contexters"),
agent: "codex",
date: "2026-03-27",
session_id: "sess-telemetry",
kind: Some(Kind::Reports),
},
&entries,
&ChunkerConfig::default(),
)
.expect("write canonical context");
assert_eq!(written.len(), 1);
let chunk_path = &written[0];
assert!(chunk_path.exists());
let content = fs::read_to_string(chunk_path).expect("read stored chunk");
assert!(content.contains("## Findings"));
assert!(!content.contains("run_id: mrbl-001"));
assert!(!content.contains("mode: session-first"));
let sidecar_path = chunk_path.with_extension("meta.json");
assert!(sidecar_path.exists());
let sidecar = load_sidecar(chunk_path).expect("load sidecar");
assert_eq!(sidecar.run_id.as_deref(), Some("mrbl-001"));
assert_eq!(sidecar.prompt_id.as_deref(), Some("api-redesign_20260327"));
assert_eq!(sidecar.agent_model.as_deref(), Some("gpt-5.4"));
assert_eq!(sidecar.started_at.as_deref(), Some("2026-03-27T10:00:00Z"));
assert_eq!(
sidecar.completed_at.as_deref(),
Some("2026-03-27T10:01:00Z")
);
assert_eq!(sidecar.token_usage, Some(1234));
assert_eq!(sidecar.findings_count, Some(4));
assert_eq!(
sidecar.frame_kind,
Some(crate::timeline::FrameKind::AgentReply)
);
assert_eq!(sidecar.workflow_phase.as_deref(), Some("implement"));
assert_eq!(sidecar.mode.as_deref(), Some("session-first"));
assert_eq!(sidecar.skill_code.as_deref(), Some("vc-workflow"));
assert_eq!(sidecar.framework_version.as_deref(), Some("2026-03"));
let scanned = scan_context_files_at(&root).expect("scan canonical store");
assert_eq!(scanned.len(), 1, "sidecar files must not scan as chunks");
let matched = chunks_by_run_id_at(
&root,
"mrbl-001",
Some("ai-contexters"),
SystemTime::UNIX_EPOCH,
)
.expect("query by run id");
assert_eq!(matched.len(), 1);
assert_eq!(matched[0].path.file_name(), chunk_path.file_name());
let _ = fs::remove_dir_all(&root);
}
#[test]
fn session_first_write_skips_empty_body_chunks() {
let root = retrieval_test_root("empty-body-guard");
let _ = fs::remove_dir_all(&root);
let entries = vec![TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 5, 6, 12, 0, 0).unwrap(),
agent: "claude".to_string(),
session_id: "sess-empty-body".to_string(),
role: "assistant".to_string(),
message: " \n\t".to_string(),
branch: None,
cwd: None,
frame_kind: Some(crate::timeline::FrameKind::InternalThought),
}];
let written = write_context_session_first_at(
&root.join("store"),
SessionWriteSpec {
project: Some("VetCoders/aicx"),
agent: "claude",
date: "2026-05-06",
session_id: "sess-empty-body",
kind: Some(Kind::Conversations),
},
&entries,
&ChunkerConfig::default(),
)
.expect("write should succeed");
assert!(written.is_empty());
assert!(!root.join("store").join("VetCoders").join("aicx").exists());
let _ = fs::remove_dir_all(root);
}
fn semantic_entry(
ts: (i32, u32, u32, u32, u32, u32),
session_id: &str,
role: &str,
message: &str,
cwd: Option<&str>,
) -> TimelineEntry {
TimelineEntry {
timestamp: Utc
.with_ymd_and_hms(ts.0, ts.1, ts.2, ts.3, ts.4, ts.5)
.unwrap(),
agent: "codex".to_string(),
session_id: session_id.to_string(),
role: role.to_string(),
message: message.to_string(),
branch: None,
cwd: cwd.map(ToOwned::to_owned),
frame_kind: None,
}
}
#[test]
fn test_store_semantic_segments_emit_repo_and_non_repo_roots() {
let root = env::temp_dir().join("aicx-store-segmentation-proof");
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&root).unwrap();
let entries = vec![
semantic_entry(
(2026, 3, 21, 9, 0, 0),
"sess-a",
"user",
"No repo yet, just planning the migration.",
None,
),
semantic_entry(
(2026, 3, 21, 9, 1, 0),
"sess-a",
"assistant",
"Goal:\n- make segmentation real\nAcceptance:\n- stop fake buckets",
None,
),
semantic_entry(
(2026, 3, 21, 9, 2, 0),
"sess-a",
"user",
"Switch to https://github.com/VetCoders/ai-contexters now.",
Some("https://github.com/VetCoders/ai-contexters"),
),
semantic_entry(
(2026, 3, 21, 9, 3, 0),
"sess-a",
"user",
"Then inspect https://github.com/VetCoders/loctree as well.",
Some("https://github.com/VetCoders/loctree"),
),
];
let summary =
store_semantic_segments_at(&root, &entries, &ChunkerConfig::default(), |_, _| {})
.expect("store semantic segments");
assert_eq!(summary.total_entries, 4);
assert!(
summary
.written_paths
.iter()
.any(|path| { path.starts_with(root.join("non-repository-contexts")) })
);
assert!(summary.written_paths.iter().any(|path| {
path.starts_with(root.join("store").join("VetCoders").join("ai-contexters"))
}));
assert!(summary.written_paths.iter().any(|path| {
path.starts_with(root.join("store").join("VetCoders").join("loctree"))
}));
let scanned = scan_context_files_at(&root).expect("scan stored files");
assert!(
scanned
.iter()
.any(|file| file.project == NON_REPOSITORY_CONTEXTS)
);
assert!(
scanned
.iter()
.any(|file| file.project == "VetCoders/ai-contexters")
);
assert!(
scanned
.iter()
.any(|file| file.project == "VetCoders/loctree")
);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn test_store_semantic_segments_reports_progress_per_segment() {
let root = retrieval_test_root("segmentation-progress");
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&root).unwrap();
let entries = vec![
semantic_entry(
(2026, 3, 21, 9, 0, 0),
"sess-a",
"user",
"No repo yet, just planning the migration.",
None,
),
semantic_entry(
(2026, 3, 21, 9, 1, 0),
"sess-a",
"assistant",
"Goal:\n- make segmentation real\nAcceptance:\n- stop fake buckets",
None,
),
semantic_entry(
(2026, 3, 21, 9, 2, 0),
"sess-a",
"user",
"Switch to https://github.com/VetCoders/ai-contexters now.",
None,
),
semantic_entry(
(2026, 3, 21, 9, 3, 0),
"sess-a",
"user",
"Then inspect https://github.com/VetCoders/loctree as well.",
None,
),
];
let mut progress_updates = Vec::new();
let summary = store_semantic_segments_at(
&root,
&entries,
&ChunkerConfig::default(),
|done, total| progress_updates.push((done, total)),
)
.expect("store semantic segments");
assert_eq!(summary.total_entries, 4);
assert_eq!(progress_updates, vec![(1, 3), (2, 3), (3, 3)]);
let _ = fs::remove_dir_all(&root);
}
fn retrieval_test_root(name: &str) -> PathBuf {
env::temp_dir().join(format!(
"aicx-retrieval-{name}-{}-{}",
std::process::id(),
Utc::now().timestamp_nanos_opt().unwrap_or_default()
))
}
fn write_chunk_file(path: &Path, content: &str) {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(path, content).unwrap();
}
fn set_mtime(path: &Path, unix_seconds: i64) {
set_file_mtime(path, FileTime::from_unix_time(unix_seconds, 0)).unwrap();
}
#[test]
fn scan_retrieves_repo_centric_files_with_correct_metadata() {
let root = retrieval_test_root("repo-scan");
let _ = fs::remove_dir_all(&root);
let chunk_dir = root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2026_0321")
.join("conversations")
.join("claude");
write_chunk_file(
&chunk_dir.join("2026_0321_claude_sess-abc123_001.md"),
"Decision: use repo-centric store layout",
);
let scanned = scan_context_files_at(&root).expect("scan should succeed");
assert_eq!(scanned.len(), 1);
let file = &scanned[0];
assert_eq!(file.project, "VetCoders/ai-contexters");
assert_eq!(file.agent, "claude");
assert_eq!(file.kind, Kind::Conversations);
assert_eq!(file.date_compact, "2026_0321");
assert_eq!(file.date_iso, "2026-03-21");
assert_eq!(file.session_id, "sess-abc123");
assert_eq!(file.chunk, 1);
assert!(file.repo.is_some());
assert_eq!(
file.repo.as_ref().unwrap().slug(),
"VetCoders/ai-contexters"
);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn read_context_chunk_accepts_relative_path_file_name_and_compact_ref() {
let root = retrieval_test_root("read-chunk");
let _ = fs::remove_dir_all(&root);
let chunk_path = root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2026_0321")
.join("conversations")
.join("claude")
.join("2026_0321_claude_sess-abc123_001.md");
write_chunk_file(&chunk_path, "Decision: read is the re-entry primitive");
let by_relative = read_context_chunk_at(
&root,
"store/VetCoders/ai-contexters/2026_0321/conversations/claude/2026_0321_claude_sess-abc123_001.md",
Some(14),
)
.expect("read by relative path");
assert_eq!(by_relative.project, "VetCoders/ai-contexters");
assert_eq!(by_relative.kind, "conversations");
assert_eq!(by_relative.session_id, "sess-abc123");
assert_eq!(by_relative.chunk, 1);
assert_eq!(by_relative.content, "Decision: read");
assert!(by_relative.truncated);
let by_file = read_context_chunk_at(&root, "2026_0321_claude_sess-abc123_001.md", None)
.expect("read by file name");
assert!(by_file.content.contains("re-entry primitive"));
let by_compact = read_context_chunk_at(
&root,
"VetCoders/ai-contexters|2026-03-21|conversations|claude|sess-abc123|001",
None,
)
.expect("read by compact ref");
assert_eq!(by_compact.relative_path, by_relative.relative_path);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn scan_retrieves_non_repository_files_with_explicit_project_label() {
let root = retrieval_test_root("non-repo-scan");
let _ = fs::remove_dir_all(&root);
let chunk_dir = root
.join("non-repository-contexts")
.join("2026_0321")
.join("plans")
.join("codex");
write_chunk_file(
&chunk_dir.join("2026_0321_codex_sess-xyz789_001.md"),
"Migration plan before repo identity is known",
);
let scanned = scan_context_files_at(&root).expect("scan should succeed");
assert_eq!(scanned.len(), 1);
let file = &scanned[0];
assert_eq!(file.project, NON_REPOSITORY_CONTEXTS);
assert_eq!(file.agent, "codex");
assert_eq!(file.kind, Kind::Plans);
assert!(file.repo.is_none());
let _ = fs::remove_dir_all(&root);
}
#[test]
fn scan_retrieves_both_repo_and_non_repo_files_together() {
let root = retrieval_test_root("combined-scan");
let _ = fs::remove_dir_all(&root);
let repo_dir = root
.join("store")
.join("VetCoders")
.join("loctree")
.join("2026_0320")
.join("reports")
.join("gemini");
write_chunk_file(
&repo_dir.join("2026_0320_gemini_sess-rpt001_001.md"),
"## Report\nCoverage report for loctree scanner",
);
let non_repo_dir = root
.join("non-repository-contexts")
.join("2026_0321")
.join("other")
.join("claude");
write_chunk_file(
&non_repo_dir.join("2026_0321_claude_sess-misc01_001.md"),
"Unscoped brainstorm notes",
);
let scanned = scan_context_files_at(&root).expect("scan should succeed");
assert_eq!(scanned.len(), 2);
let repo_file = scanned.iter().find(|f| f.project == "VetCoders/loctree");
let non_repo_file = scanned
.iter()
.find(|f| f.project == NON_REPOSITORY_CONTEXTS);
assert!(repo_file.is_some(), "repo-centric file must be found");
assert!(non_repo_file.is_some(), "non-repository file must be found");
let repo_file = repo_file.unwrap();
assert_eq!(repo_file.kind, Kind::Reports);
assert_eq!(repo_file.agent, "gemini");
assert!(repo_file.repo.is_some());
let non_repo_file = non_repo_file.unwrap();
assert_eq!(non_repo_file.kind, Kind::Other);
assert_eq!(non_repo_file.agent, "claude");
assert!(non_repo_file.repo.is_none());
let _ = fs::remove_dir_all(&root);
}
#[test]
fn context_files_since_uses_canonical_chunk_date_not_mtime() {
let root = retrieval_test_root("context-files-since-date");
let _ = fs::remove_dir_all(&root);
let recent = root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2026_0331")
.join("reports")
.join("claude")
.join("2026_0331_claude_sess-new_001.md");
let old = root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2026_0328")
.join("reports")
.join("claude")
.join("2026_0328_claude_sess-old_001.md");
write_chunk_file(&recent, "Fresh canonical chunk");
write_chunk_file(&old, "Stale canonical chunk");
set_mtime(&recent, 1);
set_mtime(&old, 2_000_000_000);
let cutoff: SystemTime = Utc.with_ymd_and_hms(2026, 3, 30, 0, 0, 0).unwrap().into();
let files = context_files_since_at(&root, cutoff, Some("ai-contexters"))
.expect("context file filtering should succeed");
assert_eq!(files.len(), 1);
assert_eq!(files[0].date_iso, "2026-03-31");
assert_eq!(
files[0]
.path
.file_name()
.and_then(|name| name.to_str())
.unwrap(),
"2026_0331_claude_sess-new_001.md"
);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn scan_context_files_respects_aicxignore_and_negation() {
let root = retrieval_test_root("context-files-ignore");
let _ = fs::remove_dir_all(&root);
let ignored = root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2026_0331")
.join("reports")
.join("codex")
.join("2026_0331_codex_sess-rpt_001.md");
let kept = root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2026_0331")
.join("conversations")
.join("codex")
.join("2026_0331_codex_sess-conv_001.md");
write_chunk_file(&ignored, "## Report\nIgnore this chunk");
write_chunk_file(&kept, "Conversation that should remain visible");
fs::write(
root.join(AICX_IGNORE_FILENAME),
"store/VetCoders/ai-contexters/**\n!store/VetCoders/ai-contexters/**/conversations/**\n",
)
.unwrap();
let scanned = scan_context_files_at(&root).expect("ignore-aware scan should succeed");
assert_eq!(scanned.len(), 1);
assert_eq!(
scanned[0]
.path
.file_name()
.and_then(|name| name.to_str())
.unwrap(),
"2026_0331_codex_sess-conv_001.md"
);
let raw = scan_context_files_raw_at(&root).expect("raw scan should succeed");
assert_eq!(raw.len(), 2);
let (filtered, ignored_count) =
filter_ignored_paths_at(&root, &[ignored.clone(), kept.clone()])
.expect("ignore filter should succeed");
assert_eq!(ignored_count, 1);
assert_eq!(filtered, vec![kept]);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn load_ignore_matcher_rejects_traversal_base() {
let root = retrieval_test_root("context-files-ignore-traversal");
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&root).unwrap();
fs::create_dir_all(root.join("nested")).unwrap();
fs::write(root.join(AICX_IGNORE_FILENAME), "store/**\n").unwrap();
let traversal_base = root.join("nested").join("..");
let err = load_ignore_matcher_at(&traversal_base)
.expect_err("traversal base should be rejected by validated read");
let message = err.to_string().to_lowercase();
assert!(message.contains("traversal"), "unexpected error: {err:#}");
let _ = fs::remove_dir_all(&root);
}
#[test]
fn migration_rebuilds_existing_sources_into_canonical_store() {
let root = migration_test_root("rebuild-canonical");
let legacy_root = root.join("legacy");
let store_root = root.join("aicx");
let repo_root = root.join("hosted").join("VetCoders").join("ai-contexters");
let source = root
.join("sources")
.join("rollout-rebuild-canonical-019be5e4.jsonl");
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(&repo_root).unwrap();
fs::create_dir_all(repo_root.join(".git")).unwrap();
fs::create_dir_all(legacy_root.join("demo").join("2026-03-21")).unwrap();
write_codex_history(
&source,
"sess-rebuild",
Some(repo_root.to_string_lossy().as_ref()),
&[
("user", 1_742_560_000, "Please inspect the migration seam."),
(
"assistant",
1_742_560_060,
"Reviewing the repo-centric store now.",
),
],
);
write_text(
&legacy_root
.join("demo")
.join("2026-03-21")
.join("101045_codex-001.md"),
&format!("input: {}\n", source.display()),
);
let manifest =
run_migration_at(&legacy_root, &store_root, false, &SourceLocator::default())
.expect("run migration");
assert_eq!(manifest.totals.rebuild_items, 1);
assert_eq!(manifest.totals.rebuilt_items, 1);
assert_eq!(manifest.totals.salvaged_items, 0);
assert!(manifest.items.iter().any(|item| {
item.canonical_paths.iter().any(|path| {
path.contains("/store/VetCoders/ai-contexters/2025_0321/conversations/codex/")
})
}));
assert!(
store_root
.join("store")
.join("VetCoders")
.join("ai-contexters")
.join("2025_0321")
.join("conversations")
.join("codex")
.is_dir()
);
let _ = fs::remove_dir_all(&root);
}
#[test]
fn migration_salvages_legacy_bundle_when_source_is_missing() {
let root = migration_test_root("salvage-missing");
let legacy_root = root.join("legacy");
let store_root = root.join("aicx");
let missing_source = root
.join("sources")
.join("rollout-missing-source-019be5e4.jsonl");
let _ = fs::remove_dir_all(&root);
write_text(
&legacy_root
.join("demo")
.join("2026-03-21")
.join("101045_codex-001.md"),
&format!("input: {}\n", missing_source.display()),
);
let manifest =
run_migration_at(&legacy_root, &store_root, false, &SourceLocator::default())
.expect("run migration");
let item = manifest.items.first().expect("migration item");
assert_eq!(item.action, MigrationAction::Salvage);
assert_eq!(item.action_reason, "missing_source");
assert!(item.canonical_paths.is_empty());
assert!(
item.salvage_paths
.iter()
.any(|path| { path.contains("/legacy-store/demo/2026-03-21/101045_codex-001.md") })
);
assert!(item.salvage_paths.iter().any(|path| {
path.contains("/legacy-store/demo/2026-03-21/101045_codex.migration-provenance.json")
}));
let _ = fs::remove_dir_all(&root);
}
#[test]
fn migration_writes_manifest_report_and_non_repo_rebuilds() {
let root = migration_test_root("manifest-report");
let legacy_root = root.join("legacy");
let store_root = root.join("aicx");
let source = root.join("sources").join("rollout-non-repo-019be5e4.jsonl");
let _ = fs::remove_dir_all(&root);
write_codex_history(
&source,
"sess-non-repo",
None,
&[
(
"user",
1_742_560_000,
"Draft a migration plan before we know the repo.",
),
(
"assistant",
1_742_560_060,
"Working in non-repository mode for now.",
),
],
);
write_text(
&legacy_root
.join("demo")
.join("2026-03-21")
.join("101045_codex-001.md"),
&format!("input: {}\n", source.display()),
);
write_text(&legacy_root.join("state.json"), "{\"seen_hashes\":[]}");
let manifest =
run_migration_at(&legacy_root, &store_root, false, &SourceLocator::default())
.expect("run migration");
let report = fs::read_to_string(&manifest.report_path).expect("read report");
let manifest_json =
fs::read_to_string(&manifest.manifest_path).expect("read manifest json");
assert!(manifest.items.iter().any(|item| {
item.canonical_paths.iter().any(|path| {
path.contains("/non-repository-contexts/2025_0321/conversations/codex/")
})
}));
assert!(manifest.items.iter().any(|item| {
item.action_reason == "non_context_legacy_file"
&& item
.salvage_paths
.iter()
.any(|path| path.contains("/legacy-store/state.json"))
}));
assert!(report.contains("## Rebuilt"));
assert!(report.contains("## Unclassified Legacy Items"));
assert!(report.contains("non_context_legacy_file"));
assert!(manifest_json.contains("\"report_path\""));
assert!(PathBuf::from(&manifest.report_path).exists());
assert!(PathBuf::from(&manifest.manifest_path).exists());
let _ = fs::remove_dir_all(&root);
}
fn migration_test_root(name: &str) -> PathBuf {
env::temp_dir().join(format!(
"aicx-migration-{name}-{}-{}",
std::process::id(),
Utc::now().timestamp_nanos_opt().unwrap_or_default()
))
}
fn write_text(path: &Path, content: &str) {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(path, content).unwrap();
}
fn write_codex_history(
path: &Path,
session_id: &str,
cwd: Option<&str>,
records: &[(&str, i64, &str)],
) {
let mut lines = Vec::new();
for (role, ts, text) in records {
lines.push(
serde_json::json!({
"session_id": session_id,
"text": text,
"ts": ts,
"role": role,
"cwd": cwd,
})
.to_string(),
);
}
write_text(path, &lines.join("\n"));
}
}