mod cli;
mod commands;
mod output;
use anyhow::{Context, Result, bail};
use clap::Parser;
use cli::{Cli, Commands, DispatchTraceFormat, GraphDbQuery, SemanticRelatedKind};
#[cfg(test)]
use cli::{GraphDbBackend, TraverseFormat};
use commands::digests::{
cmd_context_pack, cmd_diff_digest, cmd_log_digest, cmd_metric_digest, cmd_session_cost,
cmd_session_digest, cmd_session_review_with_budget, cmd_test_digest,
};
#[cfg(test)]
use commands::graph::cmd_explain;
use commands::graph::{
cmd_analyze, cmd_communities, cmd_explain_with_budget, cmd_graph, cmd_path, cmd_traverse,
};
#[cfg(test)]
use commands::index_search::cmd_search;
use commands::index_search::{cmd_index, cmd_search_with_budget, cmd_search_worker};
use commands::infra::{
cmd_convex_sync, cmd_edit, cmd_graph_db, cmd_init, cmd_locks, cmd_rewrite, cmd_route, cmd_sql,
cmd_status,
};
use commands::quality::{cmd_audit, cmd_audit_tagpath, cmd_lint};
use commands::summarize::cmd_summarize;
use flate2::{Compression, read::GzDecoder, write::GzEncoder};
use output::tagpath::{
CommunityMemberAmbiguityDiagnostic, TagpathAnnotationDiagnostic, TagpathSearchOpts,
annotate_communities_with_tagpath, annotate_hits_with_tagpath,
annotate_path_nodes_with_tagpath, annotate_stored_edges_with_tagpath,
annotate_stored_symbols_with_tagpath,
};
use output::{
OutputFormat, ResponseBudget, ResponseBudgetPreset, ToolEnvelope, ToolEnvelopeMetric,
ToolEnvelopeSummary, TranscriptArtifactRef,
};
use rusqlite::{Connection, OptionalExtension};
use serde::{Deserialize, Serialize};
use sift::{SearchInput, SearchOptions, Sift};
#[cfg(test)]
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
use std::env;
use std::fs;
use std::io::{BufRead as _, BufReader, Read as _, Write as _};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use substrate::{
ConvexEdgeRow, ConvexNodeRow, ConvexProjectionRows, GraphEdge as SubstrateGraphEdge,
GraphFreshness, GraphNode as SubstrateGraphNode, GraphProjection, GraphPropertyFilter,
GraphProvenance, GraphQueryOptions, GraphQueryPage, GraphStore, SQLITE_GRAPH_SCHEMA_VERSION,
SqliteGraphStore, SqliteProjectionRefresh,
};
use tagpath::{family as tagpath_family, ontology as tagpath_ontology};
use tempfile::NamedTempFile;
#[cfg(test)]
use tsift_agent_doc::session_cost;
use tsift_agent_doc::{session_digest, session_review};
use tsift_digest::{diff_digest, log_digest, metric_digest, test_digest};
use tsift_graph as graph;
use tsift_index::{config, index, init, walk};
use tsift_quality::{dci_benchmark, lint, perf_gate};
use tsift_resolution as resolution;
use tsift_search::{impact, sift, tagpath_adapter};
use tsift_sqlite as substrate;
use tsift_status::status;
use tsift_summarize::summarize;
use tsift_tokensave::TokensaveDb;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)]
pub(crate) enum GraphDbExperimentalBackend {
DuckdbDuckpgq,
Falkordb,
Ladybug,
Kuzu,
}
#[derive(Serialize)]
struct GraphDbBackendPromotionGate {
status: String,
native_adapter_required: bool,
required_checks: Vec<String>,
}
impl GraphDbExperimentalBackend {
fn name(self) -> &'static str {
match self {
Self::DuckdbDuckpgq => "duckdb-duckpgq",
Self::Falkordb => "falkordb",
Self::Ladybug => "ladybug",
Self::Kuzu => "kuzu",
}
}
fn adapter_label(self) -> &'static str {
match self {
Self::DuckdbDuckpgq => "DuckDB/DuckPGQ read-only prototype",
Self::Falkordb => "FalkorDB read-only prototype",
Self::Ladybug => "Ladybug read-only prototype",
Self::Kuzu => "Kuzu (Vela-Engineering/kuzu) read-only prototype",
}
}
fn projection_load(self) -> &'static str {
match self {
Self::Falkordb => {
"provider-neutral rows loaded into a FalkorDB-shaped read snapshot for parity and timing only; production FalkorDB storage remains behind backend-eval until a real adapter passes the full-projection gate"
}
Self::Kuzu => {
"provider-neutral rows loaded into a Kuzu-compatible in-process read snapshot for parity and performance gates; production Vela-Engineering/kuzu storage remains behind a future optional adapter"
}
_ => {
"provider-neutral rows loaded into a dependency-free in-process read snapshot for parity and performance gates"
}
}
}
fn lock_behavior(self) -> &'static str {
match self {
Self::Falkordb => {
"read-only FalkorDB prototype snapshot; production promotion must prove multi-process writer behavior and local fallback semantics before replacing SQLite"
}
Self::Kuzu => {
"read-only Kuzu prototype snapshot; no SQLite writer lock is taken during benchmarks, and production Vela-Engineering/kuzu promotion must prove concurrent writer semantics before replacing SQLite"
}
_ => "read-only snapshot/row adapter; no writer lock is taken during query benchmarks",
}
}
fn install_portability(self) -> &'static str {
match self {
Self::Falkordb => {
"prototype is dependency-free in this binary; production FalkorDB promotion must keep install optional and preserve cargo build/install without a service"
}
Self::Kuzu => {
"prototype is dependency-free in this binary; production Vela-Engineering/kuzu integration must stay optional so cargo build/install works without a native Kuzu toolchain"
}
_ => {
"prototype is dependency-free in this binary; a production engine adapter must remain optional before promotion"
}
}
}
fn prototype_hold_reason(self) -> Option<&'static str> {
match self {
Self::DuckdbDuckpgq => Some(
"DuckDB/DuckPGQ remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
),
Self::Falkordb => Some(
"FalkorDB remains behind backend-eval until a production adapter beats SQLite on full_projection conflict-matrix, evidence, dispatch-trace, path tiers, install portability, and lock behavior",
),
Self::Ladybug => Some(
"Ladybug remains behind backend-eval until a native production adapter proves projection writes, freshness/parity, full_projection wins, install portability, and lock behavior",
),
Self::Kuzu => Some(
"Kuzu remains behind backend-eval until a native optional adapter proves projection writes/load, SQLite parity, full_projection wins, install portability, and lock behavior",
),
}
}
fn promotion_gate(self) -> GraphDbBackendPromotionGate {
match self {
Self::DuckdbDuckpgq => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_duckdb_duckpgq_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"embedded_or_service_lock_behavior_match_or_beat_sqlite".to_string(),
"operator_install_cost_keeps_cargo_build_install_duckdb_extension_free_by_default"
.to_string(),
],
},
Self::Falkordb => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_falkordb_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"multi_process_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_service_free_by_default"
.to_string(),
],
},
Self::Ladybug => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_ladybug_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_ladybug_free_by_default"
.to_string(),
],
},
Self::Kuzu => GraphDbBackendPromotionGate {
status: "hold_native_adapter_required".to_string(),
native_adapter_required: true,
required_checks: vec![
"native_kuzu_projection_load_writes_provider_neutral_rows_without_sqlite_row_replay"
.to_string(),
"freshness_and_parity_match_sqlite_on_real_and_full_projection_datasets"
.to_string(),
"concurrent_writer_and_read_only_lock_behavior_match_or_beat_sqlite"
.to_string(),
"operator_install_cost_keeps_cargo_build_install_native_kuzu_free_by_default"
.to_string(),
],
},
}
}
fn parse(raw: &str) -> Result<Self> {
match raw {
"duckdb-duckpgq" | "duckdb" | "duckpgq" => Ok(Self::DuckdbDuckpgq),
"falkordb" | "falkor" => Ok(Self::Falkordb),
"ladybug" => Ok(Self::Ladybug),
"kuzu" | "vela-kuzu" => Ok(Self::Kuzu),
_ => {
bail!(
"unknown backend-eval candidate {raw:?}; expected duckdb-duckpgq, falkordb, ladybug, or kuzu"
)
}
}
}
}
#[derive(Deserialize)]
pub(crate) struct EditBatch {
edits: Vec<EditOp>,
}
#[derive(Deserialize)]
struct EditOp {
/// File path to edit
file: PathBuf,
/// Text to find and replace
old: String,
/// Replacement text
new: String,
/// Replace all occurrences (default: false — fails if not unique)
#[serde(default)]
replace_all: bool,
}
pub(crate) struct MetricDigestOptions<'a> {
input_path: Option<&'a Path>,
baseline_path: Option<&'a Path>,
metrics: &'a [String],
lower_is_better: &'a [String],
higher_is_better: &'a [String],
history: usize,
top: usize,
}
#[derive(Serialize)]
pub(crate) struct EditResult {
file: PathBuf,
status: EditStatus,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
replacements: Option<usize>,
}
#[derive(Serialize)]
#[serde(rename_all = "lowercase")]
pub(crate) enum EditStatus {
Ok,
Skipped,
}
struct PlannedEdit {
index: usize,
file: PathBuf,
new_content: String,
replacements: usize,
}
struct StagedEdit {
index: usize,
file: PathBuf,
replacements: usize,
staged_file: NamedTempFile,
}
struct AppliedEdit {
index: usize,
file: PathBuf,
replacements: usize,
backup_path: PathBuf,
}
pub fn run() -> Result<()> {
let cli = Cli::parse();
let compact = cli.compact;
let pretty = cli.pretty;
let terse = cli.terse;
let absolute = cli.absolute;
let tabular = cli.tabular;
let schema = cli.schema;
let envelope = cli.envelope;
match cli.command {
Some(Commands::Search {
query,
path,
limit,
strategy,
exact,
scope,
federated,
json,
autoindex,
no_autoindex,
timeout,
max_items,
max_bytes,
budget,
no_tagpath,
tagpath_strict,
}) => cmd_search_with_budget(
query,
path,
limit,
if exact {
Some("exact".to_string())
} else {
strategy
},
scope,
federated,
json || terse || schema || envelope,
autoindex || !no_autoindex,
timeout,
compact,
pretty,
terse,
absolute,
tabular,
schema,
envelope,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::SearchWorker {
path,
cache_dir,
query,
limit,
strategy,
output,
}) => cmd_search_worker(&path, &cache_dir, &query, limit, &strategy, &output),
Some(Commands::DigestRunner {
kind,
path,
runner,
shell_command,
json,
}) => cmd_digest_runner(
&kind,
&path,
runner.as_deref(),
&shell_command,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::Edit { dry_run, file }) => {
cmd_edit(dry_run, file, compact, pretty, terse, schema)
}
Some(Commands::Index {
path,
rebuild,
check,
exit_code,
prune,
quiet,
workspace,
submodule,
json,
}) => cmd_index(
&path,
rebuild,
check,
exit_code,
prune,
quiet,
workspace,
submodule.as_deref(),
json || terse || schema || envelope,
compact,
pretty,
terse,
absolute,
schema,
),
Some(Commands::Rewrite { command, run }) => cmd_rewrite(
&command,
run,
OutputFormat {
json_output: terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::Route { task, id }) => cmd_route(&task, id),
Some(Commands::Graph {
symbol,
path,
callers,
callees,
scope,
limit,
json,
no_tagpath,
tagpath_strict,
}) => cmd_graph(
&symbol,
&path,
callers,
callees,
scope.as_deref(),
limit,
json || terse || schema || envelope,
compact,
pretty,
terse,
absolute,
tabular,
schema,
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Sql {
db,
query,
table,
json,
}) => cmd_sql(
&db,
query,
table,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Communities {
path,
scope,
min_size,
limit,
json,
no_tagpath,
tagpath_strict,
}) => cmd_communities(
&path,
scope.as_deref(),
min_size,
limit,
json || terse || schema || envelope,
compact,
pretty,
terse,
tabular,
schema,
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Analyze {
path,
scope,
entry_points,
limit,
json,
}) => cmd_analyze(
&path,
scope.as_deref(),
&entry_points,
limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::Path {
from,
to,
path,
scope,
json,
no_tagpath,
tagpath_strict,
}) => cmd_path(
&from,
&to,
&path,
scope.as_deref(),
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Explain {
symbol,
path,
scope,
limit,
json,
max_items,
max_bytes,
budget,
no_tagpath,
tagpath_strict,
}) => cmd_explain_with_budget(
&symbol,
&path,
scope.as_deref(),
limit,
json || terse || schema || envelope,
compact,
pretty,
terse,
absolute,
tabular,
schema,
envelope,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
TagpathSearchOpts {
no_tagpath,
strict: tagpath_strict,
},
),
Some(Commands::Traverse {
node,
to,
path,
scope,
depth,
limit,
format,
convex_snapshot,
}) => cmd_traverse(
node.as_deref(),
to.as_deref(),
&path,
scope.as_deref(),
depth,
limit,
format,
pretty,
terse,
schema,
convex_snapshot.as_deref(),
),
Some(Commands::ConvexSync {
path,
scope,
snapshot,
chunk_size,
remote_snapshot,
apply,
endpoint,
auth_token_env,
json,
}) => cmd_convex_sync(
ConvexSyncOptions {
path: &path,
scope: scope.as_deref(),
snapshot: snapshot.as_deref(),
chunk_size,
remote_snapshot,
apply,
endpoint: endpoint.as_deref(),
auth_token_env: &auth_token_env,
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::GraphDb {
path,
scope,
backend,
convex_snapshot,
json,
query,
}) => cmd_graph_db(
&path,
scope.as_deref(),
backend,
convex_snapshot.as_deref(),
query,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::SourceRead {
file,
path,
start,
lines,
end,
scope,
json,
max_items,
max_bytes,
budget,
}) => cmd_source_read(
&file,
&path,
start,
lines,
end,
scope.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
absolute,
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::Audit {
skills_dir,
manifest,
usage,
cleanup,
report,
json,
}) => cmd_audit(
&skills_dir,
manifest,
usage,
cleanup,
report,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::AuditTagpath { path, scope, json }) => cmd_audit_tagpath(
&path,
scope.as_deref(),
json || terse || schema || envelope,
pretty,
terse,
schema,
),
Some(Commands::Init {
path,
codex,
opencode,
workspace,
}) => cmd_init(&path, codex, opencode, workspace),
Some(Commands::Lint {
file,
index,
entities_from,
json,
}) => cmd_lint(
&file,
index,
entities_from,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Summarize {
symbol,
file,
extract,
diff,
stats,
path,
json,
}) => cmd_summarize(
symbol,
file,
extract,
diff,
stats,
&path,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Semantic {
query,
path,
scope,
limit,
kind,
json,
}) => cmd_semantic_related(
&query,
&path,
scope.as_deref(),
limit,
kind,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::DiffDigest {
path,
cached,
revision,
json,
}) => cmd_diff_digest(
&path,
cached,
revision.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::Impact {
path,
cached,
revision,
scope,
limit,
json,
}) => cmd_impact(
&path,
cached,
revision.as_deref(),
scope.as_deref(),
limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::TestDigest {
path,
input,
runner,
json,
}) => cmd_test_digest(
&path,
input.as_deref(),
runner.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::LogDigest { path, input, json }) => cmd_log_digest(
&path,
input.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::ContextPack {
path,
test_input,
runner,
log_input,
json,
max_items,
max_bytes,
budget,
convex_snapshot,
}) => cmd_context_pack(
&path,
test_input.as_deref(),
runner.as_deref(),
log_input.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
convex_snapshot.as_deref(),
),
Some(Commands::ConflictMatrix {
targets,
path,
scope,
depth,
limit,
impact_limit,
json,
}) => cmd_conflict_matrix(
&path,
scope.as_deref(),
&targets,
depth,
limit,
impact_limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::DispatchTrace {
targets,
path,
scope,
depth,
limit,
impact_limit,
format,
json,
}) => cmd_dispatch_trace(
DispatchTraceOptions {
path: &path,
scope: scope.as_deref(),
raw_targets: &targets,
depth,
limit,
impact_limit,
trace_format: if json {
DispatchTraceFormat::Json
} else {
format
},
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::DependencyDag {
targets,
path,
scope,
depth,
limit,
json,
}) => cmd_dependency_dag(
&path,
scope.as_deref(),
&targets,
depth,
limit,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::TokenSavings {
fixture,
fail_under,
json,
}) => cmd_token_savings(
&fixture,
fail_under,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::MetricDigest {
input,
baseline,
metrics,
lower_is_better,
higher_is_better,
history,
top,
json,
}) => cmd_metric_digest(
MetricDigestOptions {
input_path: input.as_deref(),
baseline_path: baseline.as_deref(),
metrics: &metrics,
lower_is_better: &lower_is_better,
higher_is_better: &higher_is_better,
history,
top,
},
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::DciBenchmark { fixture, json }) => cmd_dci_benchmark(
&fixture,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::Workflow { topic, json }) => cmd_workflow(
&topic,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::SessionDigest {
path,
input,
source,
json,
}) => cmd_session_digest(
&path,
input.as_deref(),
source.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::SessionCost {
input,
source,
json,
}) => cmd_session_cost(
input.as_deref(),
source.as_deref(),
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
),
Some(Commands::SessionReview {
path,
next_context,
json,
max_items,
max_bytes,
budget,
}) => cmd_session_review_with_budget(
&path,
next_context,
OutputFormat {
json_output: json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
envelope,
},
ResponseBudget::from_cli(max_items, max_bytes, budget, envelope),
),
Some(Commands::Status { path, fix, json }) => cmd_status(
&path,
fix,
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
Some(Commands::Locks { path, scope, json }) => cmd_locks(
&path,
scope.as_deref(),
json || terse || schema || envelope,
compact,
pretty,
terse,
schema,
),
None => {
println!("tsift v{}", env!("CARGO_PKG_VERSION"));
println!("Run `tsift --help` for usage.");
Ok(())
}
}
}
/// Classify a task description into a model tier.
/// Returns (tier_name, model_id).
pub fn classify_task(task: &str) -> (&'static str, &'static str) {
let lower = task.to_lowercase();
// Architecture/design signals → opus
for signal in &[
"architect",
"architecture",
"design",
"plan",
"strateg",
"analy",
"review",
"evaluate",
"assess",
] {
if lower.contains(signal) {
return ("opus", "claude-opus-4-6");
}
}
// Edit/write signals → sonnet
for signal in &[
"edit",
"write",
"fix",
"change",
"update",
"create",
"add ",
"remove",
"delete",
"modify",
"refactor",
"implement",
"build",
] {
if lower.contains(signal) {
return ("sonnet", "claude-sonnet-4-6");
}
}
// Default: search/lookup → haiku
("haiku", "claude-haiku-4-5-20251001")
}
#[cfg(test)]
fn to_json<T: serde::Serialize>(val: &T, pretty: bool, terse: bool) -> anyhow::Result<String> {
to_json_schema(val, pretty, terse, false)
}
/// Add top-level `tagpath_index_stale: true` + `tagpath_stale_reason: <reason>`
/// fields to a JSON response when the tagpath adapter reported any helper
/// going stale. JSON consumers (`tsift --envelope` / `--json` callers) can
/// then act on the same condition the stderr `tagpath_index_stale: …` log
/// already surfaces without parsing logs. No-op when `stale=false` or when
/// `value` is not a JSON object.
pub(crate) fn inject_tagpath_stale_into_json(
value: &mut serde_json::Value,
stale: bool,
reason: Option<&str>,
) {
if !stale {
return;
}
if let Some(obj) = value.as_object_mut() {
obj.insert(
"tagpath_index_stale".to_string(),
serde_json::Value::Bool(true),
);
if let Some(reason) = reason {
obj.insert(
"tagpath_stale_reason".to_string(),
serde_json::Value::String(reason.to_string()),
);
}
}
}
pub(crate) fn to_json_schema<T: serde::Serialize>(
val: &T,
pretty: bool,
terse: bool,
schema: bool,
) -> anyhow::Result<String> {
if terse || schema {
let value = serde_json::to_value(val)?;
let mut transformed = if terse { terse_transform(value) } else { value };
if schema {
transformed = schema_transform(transformed);
}
if terse {
let terse_schema = terse_schema_for(&transformed);
let wrapped = serde_json::json!({"_s": terse_schema, "d": transformed});
if pretty {
Ok(serde_json::to_string_pretty(&wrapped)?)
} else {
Ok(serde_json::to_string(&wrapped)?)
}
} else if pretty {
Ok(serde_json::to_string_pretty(&transformed)?)
} else {
Ok(serde_json::to_string(&transformed)?)
}
} else if pretty {
Ok(serde_json::to_string_pretty(val)?)
} else {
Ok(serde_json::to_string(val)?)
}
}
pub(crate) fn envelope_metric(label: &str, value: impl ToString) -> ToolEnvelopeMetric {
ToolEnvelopeMetric {
label: label.to_string(),
value: value.to_string(),
}
}
pub(crate) fn dedupe_preserve_order(values: Vec<String>) -> Vec<String> {
let mut seen = HashSet::new();
let mut deduped = Vec::new();
for value in values {
if seen.insert(value.clone()) {
deduped.push(value);
}
}
deduped
}
pub(crate) fn print_json_or_envelope<T: Serialize>(
report: &T,
format: &OutputFormat,
tool: &str,
view: &str,
summary: ToolEnvelopeSummary,
truncated: bool,
follow_up: Vec<String>,
) -> Result<()> {
if format.envelope {
let envelope = ToolEnvelope {
tool,
view,
summary,
truncated,
follow_up: dedupe_preserve_order(follow_up),
report,
};
println!(
"{}",
to_json_schema(&envelope, format.pretty, format.terse, format.schema)?
);
} else {
println!(
"{}",
to_json_schema(report, format.pretty, format.terse, format.schema)?
);
}
Ok(())
}
#[derive(Serialize)]
struct WorkflowStep {
name: &'static str,
goal: &'static str,
command: &'static str,
preserves: Vec<&'static str>,
next: Vec<&'static str>,
}
#[derive(Serialize)]
struct WorkflowRecipe {
topic: &'static str,
summary: &'static str,
handle_contract: Vec<&'static str>,
steps: Vec<WorkflowStep>,
}
fn search_workflow_recipe() -> WorkflowRecipe {
WorkflowRecipe {
topic: "search",
summary: "Chain exact search, semantic search, explain, summarize, and digest commands without dropping the stable handles emitted by each envelope.",
handle_contract: vec![
"Keep every handle with its originating command, query, path, and strategy.",
"Use each step's expand command for deeper context, but cite the parent handle in notes and follow-up prompts.",
"Prefer --envelope plus --budget normal when handing results to an agent so handles, follow_up commands, and truncation state stay machine-readable.",
],
steps: vec![
WorkflowStep {
name: "exact-anchor",
goal: "Start from a literal identifier, file path, error text, or prior handle label.",
command: "tsift --envelope search \"<literal>\" --exact --path . --budget normal",
preserves: vec![
"summary.handle",
"report.symbols[].handle",
"report.hits[].handle",
],
next: vec![
"Run the matching report.symbols[].expand or report.hits[].expand command before broadening the query.",
],
},
WorkflowStep {
name: "semantic-search",
goal: "Broaden from the exact anchor to lexical, vector, or hybrid retrieval while keeping search-family handles.",
command: "tsift --envelope search \"<concept>\" --path . --strategy hybrid --budget normal",
preserves: vec![
"sfam-* symbol-family handles",
"shit-* content-hit handles",
"follow_up[]",
],
next: vec![
"Use a symbol-family expand command for more search results, or pass the selected symbol name to explain.",
],
},
WorkflowStep {
name: "explain-symbol",
goal: "Expand a selected symbol into definitions, callers, callees, and community context.",
command: "tsift --envelope explain \"<symbol>\" --path . --budget normal",
preserves: vec![
"edef-* definition handles",
"ecall-* caller handles",
"eces-* callee handles",
],
next: vec![
"Run edge expand commands for neighboring symbols, or summarize the selected symbol/file when the cache is available.",
],
},
WorkflowStep {
name: "summarize-selection",
goal: "Read cached summaries for the selected symbol or file without mutating the summary cache.",
command: "tsift summarize \"<symbol>\" --path . --json",
preserves: vec![
"summary refs emitted by search, explain, test-digest, log-digest, diff-digest, and context-pack",
],
next: vec![
"If summaries are missing, run the status-recommended summarize --extract command outside the read-only query path.",
],
},
WorkflowStep {
name: "digest-expansion",
goal: "Expand from code navigation into changed files, tests, logs, or session context while retaining digest artifact handles.",
command: "tsift --envelope context-pack <path> --test-input test.log --log-input build.log --budget normal",
preserves: vec![
"artifact handles",
"touched symbol handles",
"digest summary handles",
"resume_commands[]",
],
next: vec![
"Use resume_commands[] or each digest entry's expand command, and carry forward the original search/explain handle that motivated the digest.",
],
},
],
}
}
fn workflow_recipe(topic: &str) -> Result<WorkflowRecipe> {
match topic {
"search" | "search-handles" | "search-workflow" => Ok(search_workflow_recipe()),
other => bail!("unknown workflow `{other}`; available workflows: search"),
}
}
fn print_workflow_human(recipe: &WorkflowRecipe, compact: bool) {
if compact {
println!("workflow:{} steps:{}", recipe.topic, recipe.steps.len());
for step in &recipe.steps {
println!(" {} cmd:{}", step.name, step.command);
}
return;
}
println!("Workflow: {}", recipe.topic);
println!("{}", recipe.summary);
println!();
println!("Handle contract:");
for item in &recipe.handle_contract {
println!(" - {item}");
}
println!();
println!("Steps:");
for (index, step) in recipe.steps.iter().enumerate() {
println!(" {}. {} - {}", index + 1, step.name, step.goal);
println!(" cmd: {}", step.command);
println!(" preserves: {}", step.preserves.join(", "));
println!(" next: {}", step.next.join(" "));
}
}
fn cmd_workflow(topic: &str, format: OutputFormat) -> Result<()> {
let recipe = workflow_recipe(topic)?;
if format.json_output {
print_json_or_envelope(
&recipe,
&format,
"workflow",
recipe.topic,
ToolEnvelopeSummary {
text: recipe.summary.to_string(),
metrics: vec![envelope_metric("steps", recipe.steps.len())],
},
false,
recipe
.steps
.iter()
.map(|step| step.command.to_string())
.collect(),
)
} else {
print_workflow_human(&recipe, format.compact);
Ok(())
}
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsFixture {
schema_version: u64,
#[serde(default)]
description: String,
token_estimate: String,
cases: Vec<TokenSavingsFixtureCase>,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsFixtureCase {
name: String,
surface: String,
minimum_savings_percent: f64,
raw_symbols: Vec<TokenSavingsRawSymbol>,
tagpath_families: Vec<TokenSavingsFamily>,
#[serde(default)]
session_review_inputs: Option<TokenSavingsSessionReviewInputs>,
#[serde(default)]
context_pack_inputs: Option<TokenSavingsContextPackInputs>,
#[serde(default)]
source_read_inputs: Option<TokenSavingsSourceReadInputs>,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsRawSymbol {
identifier: String,
file: String,
line: u64,
context: String,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsFamily {
canonical: String,
count: usize,
#[serde(default)]
aliases: BTreeMap<String, String>,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsSessionReviewInputs {
prompt_targets: Vec<serde_json::Value>,
sessions: Vec<serde_json::Value>,
commands: Vec<serde_json::Value>,
touched_files: Vec<serde_json::Value>,
touched_symbols: Vec<serde_json::Value>,
failures: Vec<serde_json::Value>,
guardrails: Vec<serde_json::Value>,
largest_turns: Vec<serde_json::Value>,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsContextPackInputs {
next_context: Vec<serde_json::Value>,
diff: Vec<serde_json::Value>,
test: Vec<serde_json::Value>,
log: Vec<serde_json::Value>,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsSourceReadInputs {
reads: Vec<TokenSavingsSourceReadInput>,
}
#[derive(Deserialize, Serialize)]
struct TokenSavingsSourceReadInput {
command: String,
file: String,
raw_start: u64,
raw_lines: u64,
raw_excerpt: String,
envelope_start: u64,
envelope_lines: u64,
required_line_anchors: Vec<u64>,
}
#[derive(Serialize)]
struct TokenSavingsEnvelopeFamily {
handle: String,
tag_alias: String,
count: usize,
expand: String,
}
#[derive(Serialize)]
struct TokenSavingsSessionReviewEnvelope<'a> {
section: &'a str,
handle: String,
count: usize,
expand: String,
}
#[derive(Serialize)]
struct TokenSavingsContextPackEnvelope<'a> {
section: &'a str,
handle: String,
count: usize,
expand: String,
}
#[derive(Serialize)]
struct TokenSavingsSourceReadEnvelope {
handle: String,
file: String,
start: u64,
lines: u64,
required_line_anchors: Vec<u64>,
expand: String,
}
#[derive(Serialize)]
struct TokenSavingsCaseReport {
name: String,
surface: String,
raw_symbol_count: usize,
family_count: usize,
raw_bytes: usize,
envelope_bytes: usize,
byte_delta: usize,
raw_estimated_tokens: usize,
envelope_estimated_tokens: usize,
estimated_token_delta: usize,
savings_percent: f64,
minimum_savings_percent: f64,
status: String,
}
#[derive(Serialize)]
struct TokenSavingsTotals {
cases: usize,
raw_bytes: usize,
envelope_bytes: usize,
byte_delta: usize,
raw_estimated_tokens: usize,
envelope_estimated_tokens: usize,
estimated_token_delta: usize,
savings_percent: f64,
}
#[derive(Serialize)]
struct TokenSavingsReport {
schema_version: u64,
token_estimate: String,
pass: bool,
totals: TokenSavingsTotals,
cases: Vec<TokenSavingsCaseReport>,
}
fn estimated_tokens_from_bytes(bytes: usize) -> usize {
bytes.div_ceil(4)
}
fn savings_percent(raw_bytes: usize, envelope_bytes: usize) -> f64 {
if raw_bytes == 0 || envelope_bytes >= raw_bytes {
0.0
} else {
((raw_bytes - envelope_bytes) as f64 / raw_bytes as f64) * 100.0
}
}
fn token_savings_expand_command(surface: &str, canonical: &str) -> String {
let query = canonical.replace('_', " ");
match surface {
"explain" => format!(
"tsift --envelope explain {} --budget normal",
shell_quote(canonical)
),
"session-review" => format!("tsift summarize {}", shell_quote(canonical)),
"context-pack" => {
"tsift --envelope context-pack <target> --test-input <test.log> --log-input <build.log> --budget normal"
.to_string()
}
_ => format!(
"tsift --envelope search {} --budget normal",
shell_quote(&query)
),
}
}
fn token_savings_envelope_families(
case: &TokenSavingsFixtureCase,
) -> Vec<TokenSavingsEnvelopeFamily> {
case.tagpath_families
.iter()
.map(|family| {
let key = format!("{}:{}:{}", case.surface, case.name, family.canonical);
TokenSavingsEnvelopeFamily {
handle: stable_handle("tfam", &key),
tag_alias: family.canonical.replace('_', "/"),
count: family.count,
expand: token_savings_expand_command(&case.surface, &family.canonical),
}
})
.collect()
}
fn token_savings_context_pack_raw_bytes(inputs: &TokenSavingsContextPackInputs) -> Result<usize> {
Ok(serde_json::to_vec(inputs)?.len())
}
fn token_savings_session_review_raw_bytes(
inputs: &TokenSavingsSessionReviewInputs,
) -> Result<usize> {
Ok(serde_json::to_vec(inputs)?.len())
}
fn token_savings_source_read_raw_bytes(inputs: &TokenSavingsSourceReadInputs) -> Result<usize> {
Ok(serde_json::to_vec(&inputs.reads)?.len())
}
fn token_savings_session_review_envelope(
case: &TokenSavingsFixtureCase,
inputs: &TokenSavingsSessionReviewInputs,
) -> Vec<TokenSavingsSessionReviewEnvelope<'static>> {
let mut rows = vec![
TokenSavingsSessionReviewEnvelope {
section: "prompt_targets",
handle: stable_handle("tsr", &format!("{}:prompt_targets", case.name)),
count: inputs.prompt_targets.len(),
expand: "tsift session-review <target> --json".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "sessions",
handle: stable_handle("tsr", &format!("{}:sessions", case.name)),
count: inputs.sessions.len(),
expand: "tsift session-review <target> --json".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "commands",
handle: stable_handle("tsr", &format!("{}:commands", case.name)),
count: inputs.commands.len(),
expand: "tsift session-digest --source auto --input <transcript> --json".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "files",
handle: stable_handle("tsr", &format!("{}:files", case.name)),
count: inputs.touched_files.len(),
expand: "tsift session-review <target> --json".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "symbols",
handle: stable_handle("tsr", &format!("{}:symbols", case.name)),
count: inputs.touched_symbols.len(),
expand: "tsift --envelope search <symbol> --budget normal".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "failures",
handle: stable_handle("tsr", &format!("{}:failures", case.name)),
count: inputs.failures.len(),
expand: "tsift session-review <target> --json".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "guardrails",
handle: stable_handle("tsr", &format!("{}:guardrails", case.name)),
count: inputs.guardrails.len(),
expand: "tsift session-cost --input <transcript> --json".to_string(),
},
TokenSavingsSessionReviewEnvelope {
section: "largest_turns",
handle: stable_handle("tsr", &format!("{}:largest_turns", case.name)),
count: inputs.largest_turns.len(),
expand: "tsift session-cost --input <transcript> --json".to_string(),
},
];
rows.retain(|row| row.count > 0);
rows
}
fn token_savings_context_pack_envelope(
case: &TokenSavingsFixtureCase,
inputs: &TokenSavingsContextPackInputs,
) -> Vec<TokenSavingsContextPackEnvelope<'static>> {
let mut rows = vec![
TokenSavingsContextPackEnvelope {
section: "next_context",
handle: stable_handle("tcp", &format!("{}:next_context", case.name)),
count: inputs.next_context.len(),
expand: "tsift session-review --next-context <target> --json".to_string(),
},
TokenSavingsContextPackEnvelope {
section: "diff",
handle: stable_handle("tcp", &format!("{}:diff", case.name)),
count: inputs.diff.len(),
expand: "tsift diff-digest . --json".to_string(),
},
TokenSavingsContextPackEnvelope {
section: "test",
handle: stable_handle("tcp", &format!("{}:test", case.name)),
count: inputs.test.len(),
expand: "tsift test-digest --path . < test.log".to_string(),
},
TokenSavingsContextPackEnvelope {
section: "log",
handle: stable_handle("tcp", &format!("{}:log", case.name)),
count: inputs.log.len(),
expand: "tsift log-digest --path . < build.log".to_string(),
},
];
rows.retain(|row| row.count > 0);
rows
}
fn token_savings_source_read_envelope(
case: &TokenSavingsFixtureCase,
inputs: &TokenSavingsSourceReadInputs,
) -> Result<Vec<TokenSavingsSourceReadEnvelope>> {
inputs
.reads
.iter()
.map(|read| {
if read.envelope_lines == 0 {
bail!(
"source-read fixture {} has an empty envelope window for {}",
case.name,
read.command
);
}
let envelope_end = read
.envelope_start
.saturating_add(read.envelope_lines)
.saturating_sub(1);
for anchor in &read.required_line_anchors {
if *anchor < read.envelope_start || *anchor > envelope_end {
bail!(
"source-read fixture {} hides required line anchor {} for {} outside {}-{}",
case.name,
anchor,
read.command,
read.envelope_start,
envelope_end
);
}
}
Ok(TokenSavingsSourceReadEnvelope {
handle: stable_handle("tsrc", &format!("{}:{}", case.name, read.command)),
file: read.file.clone(),
start: read.envelope_start,
lines: read.envelope_lines,
required_line_anchors: read.required_line_anchors.clone(),
expand: format!(
"tsift --envelope source-read {} --start {} --lines {} --budget normal",
shell_quote(&read.file),
read.envelope_start,
read.envelope_lines
),
})
})
.collect()
}
fn build_token_savings_report(fixture: &TokenSavingsFixture) -> Result<TokenSavingsReport> {
let mut cases = Vec::new();
let mut total_raw_bytes = 0;
let mut total_envelope_bytes = 0;
for case in &fixture.cases {
let mut raw_bytes = serde_json::to_vec(&case.raw_symbols)?.len();
let envelope = token_savings_envelope_families(case);
let mut envelope_bytes = serde_json::to_vec(&envelope)?.len();
if let Some(inputs) = &case.session_review_inputs {
raw_bytes += token_savings_session_review_raw_bytes(inputs)?;
envelope_bytes +=
serde_json::to_vec(&token_savings_session_review_envelope(case, inputs))?.len();
}
if let Some(inputs) = &case.context_pack_inputs {
raw_bytes += token_savings_context_pack_raw_bytes(inputs)?;
envelope_bytes +=
serde_json::to_vec(&token_savings_context_pack_envelope(case, inputs))?.len();
}
if let Some(inputs) = &case.source_read_inputs {
raw_bytes += token_savings_source_read_raw_bytes(inputs)?;
envelope_bytes +=
serde_json::to_vec(&token_savings_source_read_envelope(case, inputs)?)?.len();
}
let byte_delta = raw_bytes.saturating_sub(envelope_bytes);
let raw_estimated_tokens = estimated_tokens_from_bytes(raw_bytes);
let envelope_estimated_tokens = estimated_tokens_from_bytes(envelope_bytes);
let estimated_token_delta = raw_estimated_tokens.saturating_sub(envelope_estimated_tokens);
let savings_percent = savings_percent(raw_bytes, envelope_bytes);
let pass = savings_percent >= case.minimum_savings_percent;
total_raw_bytes += raw_bytes;
total_envelope_bytes += envelope_bytes;
cases.push(TokenSavingsCaseReport {
name: case.name.clone(),
surface: case.surface.clone(),
raw_symbol_count: case.raw_symbols.len(),
family_count: case.tagpath_families.len(),
raw_bytes,
envelope_bytes,
byte_delta,
raw_estimated_tokens,
envelope_estimated_tokens,
estimated_token_delta,
savings_percent,
minimum_savings_percent: case.minimum_savings_percent,
status: if pass { "pass" } else { "fail" }.to_string(),
});
}
let total_byte_delta = total_raw_bytes.saturating_sub(total_envelope_bytes);
let total_raw_estimated_tokens = estimated_tokens_from_bytes(total_raw_bytes);
let total_envelope_estimated_tokens = estimated_tokens_from_bytes(total_envelope_bytes);
let total_estimated_token_delta =
total_raw_estimated_tokens.saturating_sub(total_envelope_estimated_tokens);
let pass = cases.iter().all(|case| case.status == "pass");
Ok(TokenSavingsReport {
schema_version: fixture.schema_version,
token_estimate: fixture.token_estimate.clone(),
pass,
totals: TokenSavingsTotals {
cases: cases.len(),
raw_bytes: total_raw_bytes,
envelope_bytes: total_envelope_bytes,
byte_delta: total_byte_delta,
raw_estimated_tokens: total_raw_estimated_tokens,
envelope_estimated_tokens: total_envelope_estimated_tokens,
estimated_token_delta: total_estimated_token_delta,
savings_percent: savings_percent(total_raw_bytes, total_envelope_bytes),
},
cases,
})
}
fn print_token_savings_human(report: &TokenSavingsReport) {
println!(
"surface\tcase\traw_bytes\tenvelope_bytes\tbyte_delta\traw_tokens\tenvelope_tokens\ttoken_delta\tsavings_percent\tminimum_percent\tstatus"
);
for case in &report.cases {
println!(
"{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{:.1}\t{:.1}\t{}",
case.surface,
case.name,
case.raw_bytes,
case.envelope_bytes,
case.byte_delta,
case.raw_estimated_tokens,
case.envelope_estimated_tokens,
case.estimated_token_delta,
case.savings_percent,
case.minimum_savings_percent,
case.status
);
}
println!(
"total\tall\t{}\t{}\t{}\t{}\t{}\t{}\t{:.1}\t-\t{}",
report.totals.raw_bytes,
report.totals.envelope_bytes,
report.totals.byte_delta,
report.totals.raw_estimated_tokens,
report.totals.envelope_estimated_tokens,
report.totals.estimated_token_delta,
report.totals.savings_percent,
if report.pass { "pass" } else { "fail" }
);
}
fn cmd_token_savings(fixture_path: &Path, fail_under: bool, format: OutputFormat) -> Result<()> {
let fixture_body = fs::read_to_string(fixture_path)
.with_context(|| format!("reading token-savings fixture: {}", fixture_path.display()))?;
let fixture: TokenSavingsFixture = serde_json::from_str(&fixture_body)
.with_context(|| format!("parsing token-savings fixture: {}", fixture_path.display()))?;
let report = build_token_savings_report(&fixture)?;
if format.json_output {
print_json_or_envelope(
&report,
&format,
"token-savings",
"report",
ToolEnvelopeSummary {
text: "token-savings report".to_string(),
metrics: vec![
envelope_metric("cases", report.totals.cases),
envelope_metric("raw_tokens", report.totals.raw_estimated_tokens),
envelope_metric("envelope_tokens", report.totals.envelope_estimated_tokens),
envelope_metric("token_delta", report.totals.estimated_token_delta),
envelope_metric(
"savings_percent",
format!("{:.1}", report.totals.savings_percent),
),
],
},
false,
vec![],
)?;
} else {
print_token_savings_human(&report);
}
if fail_under && !report.pass {
bail!("token-savings threshold failed");
}
Ok(())
}
fn persist_transcript_artifact(
root: &Path,
prefix: &str,
suffix: &str,
key: &str,
body: &str,
expand: String,
) -> Result<TranscriptArtifactRef> {
let handle = stable_handle(prefix, key);
let artifacts_dir = root.join(".tsift/artifacts");
fs::create_dir_all(&artifacts_dir).with_context(|| {
format!(
"creating transcript artifacts dir: {}",
artifacts_dir.display()
)
})?;
let file_name = format!("{handle}.{suffix}");
let artifact_path = artifacts_dir.join(file_name);
fs::write(&artifact_path, body)
.with_context(|| format!("writing transcript artifact: {}", artifact_path.display()))?;
let rel_path = relativize_pathbuf(&artifact_path, root);
Ok(TranscriptArtifactRef {
handle,
path: rel_path.display().to_string(),
bytes: body.len(),
lines: body.lines().count(),
expand,
})
}
fn terse_key(key: &str) -> &str {
match key {
"name" => "n",
"kind" => "k",
"file" => "f",
"line" => "l",
"path" => "p",
"from" => "fr",
"type" => "ty",
"text" => "tx",
"new" => "nw",
"run" => "r",
"use" => "u",
"score" => "sc",
"language" => "la",
"status" => "st",
"state" => "stt",
"error" => "err",
"errors" => "ers",
"hops" => "hp",
"tags" => "tg",
"model" => "ml",
"skill" => "sk",
"count" => "ct",
"total" => "tot",
"column" => "col",
"description" => "dsc",
"end_line" => "el",
"signature" => "sig",
"parent_module" => "pm",
"visibility" => "vis",
"match_type" => "mt",
"caller_file" => "cf",
"caller_name" => "cn",
"caller_line" => "cl",
"callee_name" => "en",
"call_site_line" => "csl",
"members" => "m",
"refs" => "refs",
"role" => "rl",
"peer" => "pr",
"modularity" => "q",
"modularity_contribution" => "mc",
"iterations" => "it",
"node_count" => "nc",
"edge_count" => "ec",
"community_count" => "cc",
"communities" => "cms",
"community" => "cm",
"community_diagnostics" => "cd",
"cache_hit" => "cah",
"tagpath_state" => "tps",
"tagpath_stale_reason" => "tsr",
"annotated_community_count" => "acc",
"annotated_member_count" => "amc",
"ambiguous_member_count" => "ambc",
"ambiguous_members" => "amb",
"candidate_count" => "cand",
"tagpath_candidate_count" => "tcand",
"evidence" => "ev",
"chosen_file" => "chf",
"symbol" => "s",
"symbols" => "sy",
"definitions" => "df",
"callers" => "crs",
"callees" => "ces",
"total_tracked" => "tt",
"modified" => "md",
"deleted" => "dl",
"unchanged" => "uc",
"changes" => "ch",
"prune_stats" => "ps",
"hits" => "h",
"rank" => "rk",
"snippet" => "sn",
"confidence" => "co",
"index" => "ix",
"summaries" => "sms",
"recommendations" => "rec",
"total_files" => "tf",
"stale_files" => "sf",
"last_indexed_secs_ago" => "age",
"cached_files" => "caf",
"total_indexed_files" => "tif",
"coverage_pct" => "cov",
"symbol_name" => "syn",
"file_path" => "fp",
"content_hash" => "hsh",
"summary" => "sum",
"tool" => "tl",
"view" => "vw",
"truncated" => "tr",
"follow_up" => "fu",
"report" => "rp",
"metrics" => "ms",
"label" => "lb",
"value" => "v",
"command" => "cmd",
"exit_code" => "xc",
"success" => "ok",
"artifact" => "art",
"digest" => "dg",
"bytes" => "bt",
"lines" => "lns",
"expand" => "xp",
"entities" => "ent",
"relationships" => "rel",
"concept_labels" => "cls",
"extracted_at" => "at",
"tokens_input" => "ti",
"tokens_output" => "tout",
"total_summaries" => "ts",
"stale_count" => "stc",
"total_tokens_input" => "tti",
"total_tokens_output" => "tto",
"estimated_tokens_saved" => "ets",
"files_processed" => "fps",
"symbols_extracted" => "se",
"skills_dir" => "sd",
"healthy" => "ok",
"broken" => "brk",
"skills" => "sks",
"manifest_diffs" => "mdf",
"similar_pairs" => "sim",
"usage" => "usg",
"cleanup" => "cln",
"has_skill_md" => "hsm",
"is_symlink" => "isl",
"issues" => "iss",
"invocation_count" => "inv",
"reasons" => "rsn",
"token_estimate" => "te",
"skill_a" => "sa",
"skill_b" => "sb",
"desc_a" => "da",
"desc_b" => "db",
"annotations" => "ann",
"entity" => "ety",
"suggestion" => "sug",
"columns" => "cols",
"row_count" => "rc",
"notnull" => "nn",
"default_value" => "dv",
"replace_all" => "ra",
other => other,
}
}
fn terse_transform(val: serde_json::Value) -> serde_json::Value {
match val {
serde_json::Value::Object(map) => {
let mut new_map = serde_json::Map::new();
for (k, v) in map {
new_map.insert(terse_key(&k).to_string(), terse_transform(v));
}
serde_json::Value::Object(new_map)
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(terse_transform).collect())
}
other => other,
}
}
fn terse_schema_for(val: &serde_json::Value) -> serde_json::Value {
let mut keys = HashSet::new();
collect_terse_keys(val, &mut keys);
let mut schema = serde_json::Map::new();
for (long, short) in TERSE_PAIRS {
if keys.contains(*short) {
schema.insert(
short.to_string(),
serde_json::Value::String(long.to_string()),
);
}
}
serde_json::Value::Object(schema)
}
fn collect_terse_keys(val: &serde_json::Value, keys: &mut HashSet<String>) {
match val {
serde_json::Value::Object(map) => {
for (k, v) in map {
keys.insert(k.clone());
collect_terse_keys(v, keys);
}
}
serde_json::Value::Array(arr) => {
for v in arr {
collect_terse_keys(v, keys);
}
}
_ => {}
}
}
fn schema_transform(val: serde_json::Value) -> serde_json::Value {
match val {
serde_json::Value::Array(arr) if arr.len() >= 2 => {
if let Some(cols) = homogeneous_keys(&arr) {
let rows: Vec<serde_json::Value> = arr
.into_iter()
.map(|item| {
if let serde_json::Value::Object(map) = item {
let vals: Vec<serde_json::Value> = cols
.iter()
.map(|c| map.get(c).cloned().unwrap_or(serde_json::Value::Null))
.collect();
serde_json::Value::Array(vals)
} else {
item
}
})
.collect();
let col_vals: Vec<serde_json::Value> =
cols.into_iter().map(serde_json::Value::String).collect();
serde_json::json!({"_c": col_vals, "_r": rows})
} else {
serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
}
}
serde_json::Value::Array(arr) => {
serde_json::Value::Array(arr.into_iter().map(schema_transform).collect())
}
serde_json::Value::Object(map) => {
let new_map: serde_json::Map<String, serde_json::Value> = map
.into_iter()
.map(|(k, v)| (k, schema_transform(v)))
.collect();
serde_json::Value::Object(new_map)
}
other => other,
}
}
fn homogeneous_keys(arr: &[serde_json::Value]) -> Option<Vec<String>> {
let first = arr.first()?.as_object()?;
let keys: Vec<String> = first.keys().cloned().collect();
for item in &arr[1..] {
let obj = item.as_object()?;
if obj.len() != keys.len() {
return None;
}
for k in &keys {
if !obj.contains_key(k) {
return None;
}
}
}
Some(keys)
}
const TERSE_PAIRS: &[(&str, &str)] = &[
("name", "n"),
("kind", "k"),
("file", "f"),
("line", "l"),
("path", "p"),
("from", "fr"),
("type", "ty"),
("text", "tx"),
("new", "nw"),
("run", "r"),
("use", "u"),
("score", "sc"),
("language", "la"),
("status", "st"),
("state", "stt"),
("error", "err"),
("errors", "ers"),
("hops", "hp"),
("tags", "tg"),
("model", "ml"),
("skill", "sk"),
("count", "ct"),
("total", "tot"),
("column", "col"),
("description", "dsc"),
("end_line", "el"),
("signature", "sig"),
("parent_module", "pm"),
("visibility", "vis"),
("match_type", "mt"),
("caller_file", "cf"),
("caller_name", "cn"),
("caller_line", "cl"),
("callee_name", "en"),
("call_site_line", "csl"),
("members", "m"),
("refs", "refs"),
("role", "rl"),
("peer", "pr"),
("modularity", "q"),
("modularity_contribution", "mc"),
("iterations", "it"),
("node_count", "nc"),
("edge_count", "ec"),
("community_count", "cc"),
("communities", "cms"),
("community", "cm"),
("community_diagnostics", "cd"),
("cache_hit", "cah"),
("tagpath_state", "tps"),
("tagpath_stale_reason", "tsr"),
("annotated_community_count", "acc"),
("annotated_member_count", "amc"),
("ambiguous_member_count", "ambc"),
("ambiguous_members", "amb"),
("candidate_count", "cand"),
("tagpath_candidate_count", "tcand"),
("evidence", "ev"),
("chosen_file", "chf"),
("symbol", "s"),
("symbols", "sy"),
("definitions", "df"),
("callers", "crs"),
("callees", "ces"),
("total_tracked", "tt"),
("modified", "md"),
("deleted", "dl"),
("unchanged", "uc"),
("changes", "ch"),
("prune_stats", "ps"),
("hits", "h"),
("rank", "rk"),
("snippet", "sn"),
("confidence", "co"),
("index", "ix"),
("summaries", "sms"),
("recommendations", "rec"),
("total_files", "tf"),
("stale_files", "sf"),
("last_indexed_secs_ago", "age"),
("cached_files", "caf"),
("total_indexed_files", "tif"),
("coverage_pct", "cov"),
("symbol_name", "syn"),
("file_path", "fp"),
("content_hash", "hsh"),
("summary", "sum"),
("tool", "tl"),
("view", "vw"),
("truncated", "tr"),
("follow_up", "fu"),
("report", "rp"),
("metrics", "ms"),
("label", "lb"),
("value", "v"),
("command", "cmd"),
("exit_code", "xc"),
("success", "ok"),
("artifact", "art"),
("digest", "dg"),
("bytes", "bt"),
("lines", "lns"),
("expand", "xp"),
("entities", "ent"),
("relationships", "rel"),
("concept_labels", "cls"),
("extracted_at", "at"),
("tokens_input", "ti"),
("tokens_output", "tout"),
("total_summaries", "ts"),
("stale_count", "stc"),
("total_tokens_input", "tti"),
("total_tokens_output", "tto"),
("estimated_tokens_saved", "ets"),
("files_processed", "fps"),
("symbols_extracted", "se"),
("skills_dir", "sd"),
("healthy", "ok"),
("broken", "brk"),
("skills", "sks"),
("manifest_diffs", "mdf"),
("similar_pairs", "sim"),
("usage", "usg"),
("cleanup", "cln"),
("has_skill_md", "hsm"),
("is_symlink", "isl"),
("issues", "iss"),
("invocation_count", "inv"),
("reasons", "rsn"),
("token_estimate", "te"),
("skill_a", "sa"),
("skill_b", "sb"),
("desc_a", "da"),
("desc_b", "db"),
("annotations", "ann"),
("entity", "ety"),
("suggestion", "sug"),
("columns", "cols"),
("row_count", "rc"),
("notnull", "nn"),
("default_value", "dv"),
("replace_all", "ra"),
];
pub(crate) fn relativize(path: &str, root: &std::path::Path) -> String {
let root_str = root.to_string_lossy();
let prefix = format!("{}/", root_str.trim_end_matches('/'));
path.strip_prefix(&prefix).unwrap_or(path).to_string()
}
fn transcript_artifact_root(path: &Path) -> Result<PathBuf> {
let canonical = path
.canonicalize()
.with_context(|| format!("canonicalizing {}", path.display()))?;
let start = if canonical.is_dir() {
canonical.clone()
} else {
canonical
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(|| canonical.clone())
};
for ancestor in start.ancestors() {
if ancestor.join(".git").exists() || ancestor.join(".gitmodules").is_file() {
return Ok(ancestor.to_path_buf());
}
}
Ok(start)
}
fn relativize_pathbuf(path: &std::path::Path, root: &std::path::Path) -> PathBuf {
path.strip_prefix(root)
.map(|p| p.to_path_buf())
.unwrap_or_else(|_| path.to_path_buf())
}
pub(crate) fn relativize_edges(edges: &mut [index::StoredEdge], root: &std::path::Path) {
for edge in edges {
edge.caller_file = relativize(&edge.caller_file, root);
}
}
pub(crate) fn relativize_symbols(symbols: &mut [index::StoredSymbol], root: &std::path::Path) {
for sym in symbols {
sym.file = relativize(&sym.file, root);
}
}
pub(crate) fn relativize_symbol_hits(hits: &mut [index::SymbolHit], root: &std::path::Path) {
for hit in hits {
hit.file = relativize(&hit.file, root);
}
}
const COMMUNITY_DETECTION_CACHE_VERSION: &str = "community-detection-cache-v1";
static COMMUNITY_DETECTION_CACHE: OnceLock<Mutex<BTreeMap<String, graph::CommunityResult>>> =
OnceLock::new();
#[derive(Debug, Clone, Serialize)]
struct CommunityDetectionDiagnostics {
cache_hit: bool,
edge_count: usize,
iterations: usize,
tagpath_state: String,
tagpath_readiness: GraphEffectivenessReadiness,
#[serde(skip_serializing_if = "Option::is_none")]
tagpath_stale_reason: Option<String>,
annotated_community_count: usize,
annotated_member_count: usize,
ambiguous_member_count: usize,
#[serde(skip_serializing_if = "Vec::is_empty")]
ambiguous_members: Vec<CommunityMemberAmbiguityDiagnostic>,
}
#[derive(Debug, Clone)]
pub(crate) struct CommunityDetectionReport {
result: graph::CommunityResult,
diagnostics: CommunityDetectionDiagnostics,
}
#[derive(Debug, Clone)]
struct CommunityTagpathCachePart {
state: String,
reason: Option<String>,
key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct CommunityDetectionCacheEntry {
version: String,
key: String,
result: graph::CommunityResult,
}
fn community_detection_cache() -> &'static Mutex<BTreeMap<String, graph::CommunityResult>> {
COMMUNITY_DETECTION_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
}
pub(crate) fn community_tagpath_cache_part_for_loaded(
adapter: &tagpath_adapter::TagpathAdapter,
) -> CommunityTagpathCachePart {
let index_path = tagpath::index::index_path(&adapter.project_root);
let index_hash = fs::read(&index_path)
.map(|bytes| hash_bytes_hex(&bytes))
.unwrap_or_else(|err| hash_bytes_hex(format!("fresh-index-unreadable:{err:#}").as_bytes()));
CommunityTagpathCachePart {
state: "fresh".to_string(),
reason: None,
key: format!("fresh:{index_hash}"),
}
}
pub(crate) fn community_tagpath_cache_part(
root: &std::path::Path,
opts: &TagpathSearchOpts,
) -> Result<CommunityTagpathCachePart> {
if opts.no_tagpath {
return Ok(CommunityTagpathCachePart {
state: "disabled".to_string(),
reason: None,
key: "disabled".to_string(),
});
}
match tagpath_adapter::try_load(root) {
tagpath_adapter::LoadResult::Loaded(adapter) => {
Ok(community_tagpath_cache_part_for_loaded(&adapter))
}
tagpath_adapter::LoadResult::Stale { reason, .. } => {
if opts.strict {
anyhow::bail!(
"tagpath index is stale (reason={reason}); rerun `tagpath index --update` or drop --tagpath-strict"
);
}
Ok(CommunityTagpathCachePart {
state: "stale".to_string(),
key: format!("stale:{reason}"),
reason: Some(reason),
})
}
tagpath_adapter::LoadResult::Missing => Ok(CommunityTagpathCachePart {
state: "missing".to_string(),
reason: None,
key: "missing".to_string(),
}),
}
}
fn graph_effectiveness_ready(reason: impl Into<String>) -> GraphEffectivenessReadiness {
GraphEffectivenessReadiness {
status: "ready".to_string(),
fail_closed: false,
reason: reason.into(),
diagnostics: Vec::new(),
next_commands: Vec::new(),
}
}
fn graph_effectiveness_blocked(
reason: impl Into<String>,
diagnostics: Vec<String>,
next_commands: Vec<String>,
) -> GraphEffectivenessReadiness {
GraphEffectivenessReadiness {
status: "blocked".to_string(),
fail_closed: true,
reason: reason.into(),
diagnostics,
next_commands,
}
}
fn tagpath_index_update_command(root: &std::path::Path) -> String {
format!(
"cd {} && tagpath index --update",
shell_quote(root.to_string_lossy().as_ref())
)
}
fn graph_tagpath_readiness(
root: &std::path::Path,
tagpath: &CommunityTagpathCachePart,
) -> GraphEffectivenessReadiness {
match tagpath.state.as_str() {
"fresh" => graph_effectiveness_ready("tagpath_handles_available"),
"disabled" => GraphEffectivenessReadiness {
status: "disabled".to_string(),
fail_closed: false,
reason: "tagpath_lookup_disabled".to_string(),
diagnostics: Vec::new(),
next_commands: Vec::new(),
},
"stale" => graph_effectiveness_blocked(
"tagpath_state_stale",
vec![format!(
"tagpath_state=stale{}: community members may miss stable tagpath_handle citations; rebuild the tagpath index before relying on handle coverage",
tagpath
.reason
.as_ref()
.map(|reason| format!(" (reason={reason})"))
.unwrap_or_default()
)],
vec![tagpath_index_update_command(root)],
),
"missing" => graph_effectiveness_blocked(
"tagpath_state_missing",
vec![format!(
"tagpath_state=missing: community members cannot emit stable tagpath_handle citations; create .naming.toml if needed, then run tagpath indexing from {}",
root.display()
)],
vec![tagpath_index_update_command(root)],
),
state => graph_effectiveness_blocked(
format!("tagpath_state_{state}"),
vec![format!(
"tagpath_state={state}: community tagpath_handle readiness is unknown"
)],
vec![tagpath_index_update_command(root)],
),
}
}
fn community_graph_watermark(db: &index::IndexDb) -> Result<String> {
let source_snapshot = db.source_snapshot_parts()?;
let edge_rows = db.edge_count()?;
let symbol_rows = db.symbol_count()?;
content_hash(&serde_json::json!({
"source_snapshot": source_snapshot,
"edge_rows": edge_rows,
"symbol_rows": symbol_rows,
}))
}
fn community_detection_cache_key(
root: &std::path::Path,
scope: Option<&str>,
graph_watermark: &str,
tagpath: &CommunityTagpathCachePart,
) -> Result<String> {
content_hash(&serde_json::json!({
"version": COMMUNITY_DETECTION_CACHE_VERSION,
"root": root.display().to_string(),
"scope": scope.unwrap_or("root"),
"graph_watermark": graph_watermark,
"tagpath": tagpath.key,
}))
}
fn community_detection_cache_path(
root: &std::path::Path,
scope: Option<&str>,
key: &str,
) -> PathBuf {
root.join(".tsift/community-cache")
.join(scope.unwrap_or("root"))
.join(format!("{key}.json"))
}
fn read_community_detection_cache(
root: &std::path::Path,
scope: Option<&str>,
key: &str,
) -> Option<graph::CommunityResult> {
let path = community_detection_cache_path(root, scope, key);
let bytes = fs::read(path).ok()?;
let entry: CommunityDetectionCacheEntry = serde_json::from_slice(&bytes).ok()?;
if entry.version == COMMUNITY_DETECTION_CACHE_VERSION && entry.key == key {
Some(entry.result)
} else {
None
}
}
fn write_community_detection_cache(
root: &std::path::Path,
scope: Option<&str>,
key: &str,
result: &graph::CommunityResult,
) {
let path = community_detection_cache_path(root, scope, key);
let Some(parent) = path.parent() else {
return;
};
if fs::create_dir_all(parent).is_err() {
return;
}
let entry = CommunityDetectionCacheEntry {
version: COMMUNITY_DETECTION_CACHE_VERSION.to_string(),
key: key.to_string(),
result: result.clone(),
};
if let Ok(bytes) = serde_json::to_vec(&entry) {
let _ = fs::write(path, bytes);
}
}
fn community_detection_diagnostics(
cache_hit: bool,
result: &graph::CommunityResult,
tagpath: &CommunityTagpathCachePart,
tagpath_root: &std::path::Path,
) -> CommunityDetectionDiagnostics {
CommunityDetectionDiagnostics {
cache_hit,
edge_count: result.edge_count,
iterations: result.iterations,
tagpath_state: tagpath.state.clone(),
tagpath_readiness: graph_tagpath_readiness(tagpath_root, tagpath),
tagpath_stale_reason: tagpath.reason.clone(),
annotated_community_count: 0,
annotated_member_count: 0,
ambiguous_member_count: 0,
ambiguous_members: Vec::new(),
}
}
pub(crate) fn update_community_annotation_diagnostics(
diagnostics: &mut CommunityDetectionDiagnostics,
communities: &[graph::Community],
annotation: Option<&TagpathAnnotationDiagnostic>,
) {
diagnostics.annotated_community_count = communities
.iter()
.filter(|community| {
community
.members
.iter()
.any(|member| member.tagpath_handle.is_some())
})
.count();
diagnostics.annotated_member_count = communities
.iter()
.flat_map(|community| community.members.iter())
.filter(|member| member.tagpath_handle.is_some())
.count();
if let Some(annotation) = annotation {
diagnostics.ambiguous_member_count = annotation.ambiguous_members.len();
diagnostics.ambiguous_members = annotation.ambiguous_members.clone();
} else {
diagnostics.ambiguous_member_count = 0;
diagnostics.ambiguous_members.clear();
}
}
pub(crate) fn detect_communities_cached(
db: &index::IndexDb,
root: &std::path::Path,
scope: Option<&str>,
tagpath: &CommunityTagpathCachePart,
tagpath_root: &std::path::Path,
) -> Result<CommunityDetectionReport> {
let graph_watermark = community_graph_watermark(db)?;
let cache_key = community_detection_cache_key(root, scope, &graph_watermark, tagpath)?;
if let Some(result) = community_detection_cache()
.lock()
.ok()
.and_then(|cache| cache.get(&cache_key).cloned())
{
return Ok(CommunityDetectionReport {
diagnostics: community_detection_diagnostics(true, &result, tagpath, tagpath_root),
result,
});
}
if let Some(result) = read_community_detection_cache(root, scope, &cache_key) {
if let Ok(mut cache) = community_detection_cache().lock() {
cache.insert(cache_key.clone(), result.clone());
}
return Ok(CommunityDetectionReport {
diagnostics: community_detection_diagnostics(true, &result, tagpath, tagpath_root),
result,
});
}
let edges = db.all_edges()?;
let result = graph::detect_communities(&edges);
write_community_detection_cache(root, scope, &cache_key, &result);
if let Ok(mut cache) = community_detection_cache().lock() {
cache.insert(cache_key, result.clone());
}
Ok(CommunityDetectionReport {
diagnostics: community_detection_diagnostics(false, &result, tagpath, tagpath_root),
result,
})
}
fn index_file_abs(file: &str, root: &std::path::Path) -> std::path::PathBuf {
if std::path::Path::new(file).is_absolute() {
std::path::PathBuf::from(file)
} else {
root.join(file)
}
}
fn index_file_key(file: &str, root: &std::path::Path) -> String {
let path = std::path::Path::new(file);
let rel = if path.is_absolute() {
path.strip_prefix(root).unwrap_or(path)
} else {
path
};
rel.to_string_lossy().replace('\\', "/")
}
fn tagpath_handle_for_index_file(
file: &str,
name: &str,
root: &std::path::Path,
adapter: &tagpath_adapter::TagpathAdapter,
) -> Option<String> {
adapter.handle_for_member(&index_file_abs(file, root), name)
}
#[derive(Debug, Clone)]
struct TagpathHandleCandidate {
file: String,
line: i64,
handle: String,
}
fn tagpath_handle_candidates_for_symbol_rows(
name: &str,
syms: &[index::StoredSymbol],
root: &std::path::Path,
adapter: &tagpath_adapter::TagpathAdapter,
) -> Vec<TagpathHandleCandidate> {
syms.iter()
.filter_map(|sym| {
let handle = tagpath_handle_for_index_file(&sym.file, name, root, adapter)?;
Some(TagpathHandleCandidate {
file: index_file_key(&sym.file, root),
line: sym.line,
handle,
})
})
.collect()
}
pub(crate) fn file_communities_from_callers(
db: &index::IndexDb,
root: &std::path::Path,
scope: Option<&str>,
tagpath: &CommunityTagpathCachePart,
) -> Result<std::collections::HashMap<String, std::collections::HashSet<usize>>> {
let community_report = detect_communities_cached(db, root, scope, tagpath, root)?;
if community_report.result.communities.is_empty() {
return Ok(std::collections::HashMap::new());
}
let mut community_by_symbol = std::collections::HashMap::new();
for community in community_report.result.communities {
for member in community.members {
community_by_symbol.insert(member.name, community.id);
}
}
let mut communities_by_file: std::collections::HashMap<
String,
std::collections::HashSet<usize>,
> = std::collections::HashMap::new();
for sym in db.all_symbols()? {
if let Some(community_id) = community_by_symbol.get(&sym.name) {
communities_by_file
.entry(index_file_key(&sym.file, root))
.or_default()
.insert(*community_id);
}
}
for edge in db.all_stored_edges()? {
if let Some(community_id) = community_by_symbol.get(&edge.caller_name) {
communities_by_file
.entry(index_file_key(&edge.caller_file, root))
.or_default()
.insert(*community_id);
}
}
Ok(communities_by_file)
}
pub(crate) fn resolve_tagpath_handle_for_callee_edge(
edge: &index::StoredEdge,
db: &index::IndexDb,
root: &std::path::Path,
adapter: &tagpath_adapter::TagpathAdapter,
communities_by_file: &std::collections::HashMap<String, std::collections::HashSet<usize>>,
) -> Option<String> {
let syms = db.symbol_info(&edge.callee_name).ok()?;
let candidates =
tagpath_handle_candidates_for_symbol_rows(&edge.callee_name, &syms, root, adapter);
let caller_file = index_file_key(&edge.caller_file, root);
if let Some(candidate) = candidates
.iter()
.find(|candidate| candidate.file == caller_file)
{
return Some(candidate.handle.clone());
}
if let Some(caller_communities) = communities_by_file.get(&caller_file) {
for candidate in &candidates {
if let Some(candidate_communities) = communities_by_file.get(&candidate.file)
&& !caller_communities.is_disjoint(candidate_communities)
{
return Some(candidate.handle.clone());
}
}
}
candidates.first().map(|candidate| candidate.handle.clone())
}
fn push_bounded_community_member_ref(
refs_by_member: &mut HashMap<(usize, String), Vec<graph::CommunityMemberRef>>,
community_id: usize,
name: &str,
reference: graph::CommunityMemberRef,
) {
let refs = refs_by_member
.entry((community_id, name.to_string()))
.or_default();
if refs.iter().any(|existing| {
existing.file == reference.file
&& existing.line == reference.line
&& existing.role == reference.role
&& existing.peer == reference.peer
}) {
return;
}
if refs.len() < 6 {
refs.push(reference);
}
}
fn choose_symbol_row_by_files<'a>(
syms: &'a [index::StoredSymbol],
files: &BTreeSet<String>,
root: &std::path::Path,
) -> Option<(&'a index::StoredSymbol, &'static str)> {
let matches: Vec<&index::StoredSymbol> = syms
.iter()
.filter(|sym| files.contains(&index_file_key(&sym.file, root)))
.collect();
if matches.len() == 1 {
Some((matches[0], "edge_file"))
} else {
None
}
}
fn choose_tagpath_candidate_by_files<'a>(
candidates: &'a [TagpathHandleCandidate],
files: &BTreeSet<String>,
evidence: &'static str,
) -> Option<(&'a TagpathHandleCandidate, &'static str)> {
let matches: Vec<&TagpathHandleCandidate> = candidates
.iter()
.filter(|candidate| files.contains(&candidate.file))
.collect();
if matches.len() == 1 {
Some((matches[0], evidence))
} else {
None
}
}
pub(crate) fn annotate_community_members_with_context(
communities: &mut [graph::Community],
db: &index::IndexDb,
root: &std::path::Path,
adapter: Option<&tagpath_adapter::TagpathAdapter>,
) -> Result<Vec<CommunityMemberAmbiguityDiagnostic>> {
let mut community_by_name = HashMap::<String, usize>::new();
for community in communities.iter() {
for member in &community.members {
community_by_name.insert(member.name.clone(), community.id);
}
}
let mut symbols_by_name = HashMap::<String, Vec<index::StoredSymbol>>::new();
for sym in db.all_symbols()? {
symbols_by_name
.entry(sym.name.clone())
.or_default()
.push(sym);
}
let mut refs_by_member = HashMap::<(usize, String), Vec<graph::CommunityMemberRef>>::new();
let mut evidence_files_by_member = HashMap::<(usize, String), BTreeSet<String>>::new();
let mut context_files_by_community = HashMap::<usize, BTreeSet<String>>::new();
for edge in db.all_stored_edges()? {
let Some(&caller_community) = community_by_name.get(&edge.caller_name) else {
continue;
};
let Some(&callee_community) = community_by_name.get(&edge.callee_name) else {
continue;
};
if caller_community != callee_community {
continue;
}
let file = index_file_key(&edge.caller_file, root);
context_files_by_community
.entry(caller_community)
.or_default()
.insert(file.clone());
evidence_files_by_member
.entry((caller_community, edge.caller_name.clone()))
.or_default()
.insert(file.clone());
push_bounded_community_member_ref(
&mut refs_by_member,
caller_community,
&edge.caller_name,
graph::CommunityMemberRef {
file: file.clone(),
line: edge.caller_line,
role: "caller".to_string(),
peer: edge.callee_name.clone(),
},
);
evidence_files_by_member
.entry((callee_community, edge.callee_name.clone()))
.or_default()
.insert(file.clone());
push_bounded_community_member_ref(
&mut refs_by_member,
callee_community,
&edge.callee_name,
graph::CommunityMemberRef {
file,
line: edge.call_site_line,
role: "callee".to_string(),
peer: edge.caller_name.clone(),
},
);
}
let mut diagnostics = Vec::new();
for community in communities.iter_mut() {
let community_files = context_files_by_community
.get(&community.id)
.cloned()
.unwrap_or_default();
for member in community.members.iter_mut() {
member.file = None;
member.line = None;
member.tagpath_handle = None;
let key = (community.id, member.name.clone());
member.refs = refs_by_member.remove(&key).unwrap_or_default();
let syms = symbols_by_name
.get(&member.name)
.map(Vec::as_slice)
.unwrap_or(&[]);
let evidence_files = evidence_files_by_member
.get(&key)
.cloned()
.unwrap_or_default();
let candidates = adapter
.map(|adapter| {
tagpath_handle_candidates_for_symbol_rows(&member.name, syms, root, adapter)
})
.unwrap_or_default();
let mut selected_file: Option<String> = None;
let mut selected_line: Option<i64> = None;
let mut selected_handle: Option<String> = None;
let mut selected_evidence: Option<&'static str> = None;
if let Some(candidate) = candidates.first().filter(|_| candidates.len() == 1) {
selected_file = Some(candidate.file.clone());
selected_line = Some(candidate.line);
selected_handle = Some(candidate.handle.clone());
selected_evidence = Some("unique_tagpath_handle");
} else if let Some((candidate, evidence)) =
choose_tagpath_candidate_by_files(&candidates, &evidence_files, "edge_file")
{
selected_file = Some(candidate.file.clone());
selected_line = Some(candidate.line);
selected_handle = Some(candidate.handle.clone());
selected_evidence = Some(evidence);
} else if let Some((candidate, evidence)) =
choose_tagpath_candidate_by_files(&candidates, &community_files, "community_file")
{
selected_file = Some(candidate.file.clone());
selected_line = Some(candidate.line);
selected_handle = Some(candidate.handle.clone());
selected_evidence = Some(evidence);
}
if selected_file.is_none() {
if let Some(sym) = syms.first().filter(|_| syms.len() == 1) {
selected_file = Some(index_file_key(&sym.file, root));
selected_line = Some(sym.line);
selected_evidence = Some("unique_symbol_row");
} else if let Some((sym, evidence)) =
choose_symbol_row_by_files(syms, &evidence_files, root)
{
selected_file = Some(index_file_key(&sym.file, root));
selected_line = Some(sym.line);
selected_evidence = Some(evidence);
} else if let Some((sym, _)) =
choose_symbol_row_by_files(syms, &community_files, root)
{
selected_file = Some(index_file_key(&sym.file, root));
selected_line = Some(sym.line);
selected_evidence = Some("community_file");
}
}
member.file = selected_file.clone();
member.line = selected_line;
member.tagpath_handle = selected_handle;
if syms.len() > 1 || candidates.len() > 1 {
diagnostics.push(CommunityMemberAmbiguityDiagnostic {
community_id: community.id,
name: member.name.clone(),
candidate_count: syms.len(),
tagpath_candidate_count: candidates.len(),
evidence: selected_evidence
.unwrap_or("ambiguous_no_evidence")
.to_string(),
chosen_file: selected_file,
});
}
}
}
Ok(diagnostics)
}
/// Which endpoint of a `StoredEdge` is the row's primary symbol — caller
/// (caller list) or callee (callee list).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EdgeSide {
Caller,
Callee,
}
const JSON_PATH_KEYS: &[&str] = &["file", "path", "caller_file", "file_path"];
pub(crate) fn relativize_json_paths(val: &mut serde_json::Value, root: &std::path::Path) {
let root_str = root.to_string_lossy();
let prefix = format!("{}/", root_str.trim_end_matches('/'));
relativize_json_inner(val, &prefix);
}
fn relativize_json_inner(val: &mut serde_json::Value, prefix: &str) {
match val {
serde_json::Value::Array(arr) => {
for v in arr {
relativize_json_inner(v, prefix);
}
}
serde_json::Value::Object(map) => {
for (k, v) in map.iter_mut() {
if JSON_PATH_KEYS.contains(&k.as_str())
&& let serde_json::Value::String(s) = v
&& let Some(rest) = s.strip_prefix(prefix)
{
*s = rest.to_string();
}
relativize_json_inner(v, prefix);
}
}
_ => {}
}
}
pub(crate) fn format_score(score: f64, compact: bool) -> String {
if compact {
format!("{score:.2}")
} else {
format!("{score:.4}")
}
}
pub(crate) fn truncate_for_compact(input: &str, max_chars: usize) -> String {
let trimmed = input.trim();
let count = trimmed.chars().count();
if count <= max_chars {
return trimmed.to_string();
}
let prefix: String = trimmed.chars().take(max_chars.saturating_sub(3)).collect();
format!("{prefix}...")
}
pub(crate) fn compact_snippet(snippet: &str) -> Option<String> {
snippet
.lines()
.find(|line| !line.trim().is_empty())
.map(|line| truncate_for_compact(line, 100))
}
pub(crate) fn compact_members(members: &[graph::CommunityMember], limit: usize) -> String {
let names: Vec<&str> = members.iter().map(|m| m.name.as_str()).collect();
if names.len() <= limit {
return names.join(", ");
}
format!(
"{} (+{} more)",
names[..limit].join(", "),
names.len() - limit
)
}
fn stable_handle(prefix: &str, key: &str) -> String {
let mut hasher = blake3::Hasher::new();
hasher.update(prefix.as_bytes());
hasher.update(&[0]);
hasher.update(key.as_bytes());
let hex = hasher.finalize().to_hex();
format!("{prefix}-{}", &hex[..10])
}
#[derive(Clone, Debug, PartialEq, Eq)]
struct CanonicalTagFamily {
canonical: String,
tag_alias: String,
}
fn canonical_family_from_tagpath_family(
family: tagpath_family::TagFamily,
) -> Option<CanonicalTagFamily> {
let tag_alias = if family.dimensions.is_empty() {
family.tags.join("/")
} else {
family
.dimensions
.iter()
.filter(|dimension| !dimension.tags.is_empty())
.map(|dimension| dimension.tags.join("."))
.collect::<Vec<_>>()
.join("/")
};
if tag_alias.is_empty() {
None
} else {
Some(CanonicalTagFamily {
canonical: family.canonical,
tag_alias,
})
}
}
fn canonical_tag_family_from_name(name: &str) -> Option<CanonicalTagFamily> {
let trimmed = name.trim();
if trimmed.is_empty() {
return None;
}
canonical_family_from_tagpath_family(tagpath_family::generate_family(trimmed))
}
fn canonical_tag_family_from_tags(tags: &str) -> Option<CanonicalTagFamily> {
let canonical = tags
.split(',')
.map(str::trim)
.filter(|tag| !tag.is_empty())
.collect::<Vec<_>>()
.join("_");
if canonical.is_empty() {
None
} else {
canonical_family_from_tagpath_family(tagpath_family::generate_family(&canonical))
}
}
fn canonical_tag_family_from_symbol(name: &str, tags: Option<&str>) -> Option<CanonicalTagFamily> {
tags.and_then(canonical_tag_family_from_tags)
.or_else(|| canonical_tag_family_from_name(name))
}
fn tag_alias_from_name(name: &str) -> Option<String> {
canonical_tag_family_from_name(name).map(|family| family.tag_alias)
}
fn tag_alias_from_tags(name: &str, tags: Option<&str>) -> Option<String> {
canonical_tag_family_from_symbol(name, tags).map(|family| family.tag_alias)
}
fn family_query_from_tag_alias(tag_alias: &str) -> Option<String> {
let query = tag_alias
.split(['/', '.'])
.map(str::trim)
.filter(|part| !part.is_empty())
.collect::<Vec<_>>()
.join(" ");
if query.is_empty() { None } else { Some(query) }
}
#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
struct CompactOntologyRefPreview {
handle: String,
tag: String,
path: String,
#[serde(skip_serializing_if = "Option::is_none")]
title: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
domain: Option<String>,
}
#[derive(Clone, Debug)]
struct TagOntologyPreviewContext {
project_root: PathBuf,
tags: BTreeMap<String, tagpath_ontology::OntologyTag>,
}
#[derive(Serialize, Clone, Debug, PartialEq, Eq)]
struct CompactSymbolRefPreview {
handle: String,
name: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ontology_refs: Vec<CompactOntologyRefPreview>,
}
fn build_compact_symbol_ref(
prefix: &str,
key: &str,
name: &str,
tags: Option<&str>,
max_bytes: usize,
) -> CompactSymbolRefPreview {
build_compact_symbol_ref_with_ontology(prefix, key, name, tags, max_bytes, None)
}
fn build_compact_symbol_ref_with_ontology(
prefix: &str,
key: &str,
name: &str,
tags: Option<&str>,
max_bytes: usize,
ontology: Option<&TagOntologyPreviewContext>,
) -> CompactSymbolRefPreview {
let tag_alias = tag_alias_from_tags(name, tags);
let ontology_refs = tag_alias
.as_deref()
.map(|alias| ontology_refs_for_alias(ontology, alias))
.unwrap_or_default();
CompactSymbolRefPreview {
handle: stable_handle(prefix, key),
name: truncate_for_budget(name, max_bytes),
tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
ontology_refs,
}
}
fn load_tag_ontology_preview_context(root: &Path) -> Option<TagOntologyPreviewContext> {
let report = tagpath_ontology::load_project(root).ok()?;
if report.tags.is_empty() {
return None;
}
Some(TagOntologyPreviewContext {
project_root: report.project_path,
tags: report
.tags
.into_iter()
.map(|tag| (tag.tag.clone(), tag))
.collect(),
})
}
fn ontology_refs_for_alias(
ontology: Option<&TagOntologyPreviewContext>,
alias: &str,
) -> Vec<CompactOntologyRefPreview> {
let Some(ontology) = ontology else {
return Vec::new();
};
let mut seen = BTreeSet::new();
alias
.split('/')
.flat_map(|part| part.split('.'))
.map(str::trim)
.filter(|tag| !tag.is_empty())
.filter_map(|tag| {
let key = tag.to_ascii_lowercase();
if !seen.insert(key.clone()) {
return None;
}
let ontology_tag = ontology.tags.get(&key)?;
let path = relativize_ontology_path(&ontology_tag.path, &ontology.project_root);
Some(CompactOntologyRefPreview {
handle: stable_handle("tont", &format!("{}:{path}", ontology_tag.tag)),
tag: ontology_tag.tag.clone(),
path,
title: ontology_tag.title.clone(),
domain: ontology_tag.domain.clone(),
})
})
.collect()
}
fn relativize_ontology_path(path: &Path, root: &Path) -> String {
path.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn format_symbol_preview_line(handle: &str, name: &str, tag_alias: Option<&str>) -> String {
match tag_alias {
Some(alias) => format!("{handle} {name} tag:{alias}"),
None => format!("{handle} {name}"),
}
}
fn format_summary_ref_line(summary: &ContextPackSummaryRefPreview) -> String {
match summary.tag_alias.as_deref() {
Some(alias) => format!(
"{} {} tag:{} expand:{}",
summary.handle, summary.symbol, alias, summary.expand
),
None => format!(
"{} {} expand:{}",
summary.handle, summary.symbol, summary.expand
),
}
}
fn compact_symbol_ref_token(symbol: &CompactSymbolRefPreview) -> String {
match symbol.tag_alias.as_deref() {
Some(alias) => format!("{}@{}", symbol.handle, alias),
None => format!("{}@{}", symbol.handle, symbol.name),
}
}
fn truncate_for_budget(input: &str, max_bytes: usize) -> String {
let trimmed = input.trim();
if trimmed.len() <= max_bytes {
return trimmed.to_string();
}
if max_bytes <= 3 {
return ".".repeat(max_bytes);
}
let mut end = 0usize;
for (idx, ch) in trimmed.char_indices() {
let next = idx + ch.len_utf8();
if next > max_bytes.saturating_sub(3) {
break;
}
end = next;
}
if end == 0 {
"...".to_string()
} else {
format!("{}...", &trimmed[..end])
}
}
pub(crate) fn abbreviate_kind(kind: &str) -> &str {
match kind {
"function" => "fn",
"method" => "meth",
"module" | "mod" => "mod",
"struct" => "struct",
"trait" => "trait",
"impl" => "impl",
"class" => "cls",
"interface" => "iface",
"type_alias" => "type",
"data_class" => "data_cls",
"sealed_class" => "sealed_cls",
"enum_class" => "enum_cls",
"companion_object" => "comp_obj",
"object" => "obj",
"heading" => "h",
"code_block" => "code",
"alias" => "alias",
other => other,
}
}
pub(crate) fn abbreviate_match_type(mt: &str) -> &str {
match mt {
"exact_name" => "exact",
"all_tags" => "all_tags",
"partial_tags" => "partial",
other => other,
}
}
pub(crate) fn symbol_path_summary(path: &[graph::PathNode]) -> String {
path.iter()
.map(|n| n.name.as_str())
.collect::<Vec<_>>()
.join(" -> ")
}
const SEARCH_GROUP_SAMPLE_LIMIT: usize = 2;
struct SearchHitGroup {
path: String,
first_rank: usize,
top_score: f64,
confidence: String,
hits: usize,
samples: Vec<String>,
}
fn format_search_sample(hit: &sift::SearchHit) -> Option<String> {
let snippet = compact_snippet(&hit.snippet)?;
Some(match hit.location.as_deref() {
Some(location) => format!("{location}: {snippet}"),
None => snippet,
})
}
pub(crate) fn group_search_hits(
hits: &[sift::SearchHit],
root: &Path,
absolute: bool,
) -> Vec<SearchHitGroup> {
let mut positions = BTreeMap::new();
let mut groups = Vec::new();
for hit in hits {
let path = if absolute {
hit.path.clone()
} else {
relativize(&hit.path, root)
};
let entry = positions.entry(path.clone()).or_insert_with(|| {
groups.push(SearchHitGroup {
path: path.clone(),
first_rank: hit.rank,
top_score: hit.score,
confidence: format!("{:?}", hit.confidence),
hits: 0,
samples: Vec::new(),
});
groups.len() - 1
});
let group = &mut groups[*entry];
group.hits += 1;
if hit.rank < group.first_rank {
group.first_rank = hit.rank;
}
if hit.score > group.top_score {
group.top_score = hit.score;
}
if let Some(sample) = format_search_sample(hit)
&& group.samples.len() < SEARCH_GROUP_SAMPLE_LIMIT
&& !group.samples.contains(&sample)
{
group.samples.push(sample);
}
}
groups.sort_by_key(|group| group.first_rank);
groups
}
pub(crate) fn should_collapse_search_hits(
hits: &[sift::SearchHit],
root: &Path,
absolute: bool,
) -> bool {
let groups = group_search_hits(hits, root, absolute);
let max_hits_per_file = groups.iter().map(|group| group.hits).max().unwrap_or(0);
max_hits_per_file >= 3 || (hits.len() >= 6 && groups.len() < hits.len())
}
pub(crate) fn format_edge_groups(edges: &[index::StoredEdge], use_callers: bool) -> Vec<String> {
let mut grouped: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
for edge in edges {
let key = edge.caller_file.as_str();
let name = if use_callers {
edge.caller_name.as_str()
} else {
edge.callee_name.as_str()
};
let names = grouped.entry(key).or_default();
if !names.contains(&name) {
names.push(name);
}
}
grouped
.into_iter()
.map(|(file, names)| format!(" {} ({}): {}", file, names.len(), names.join(", ")))
.collect()
}
pub(crate) fn should_collapse_edge_groups(edges: &[index::StoredEdge]) -> bool {
let mut grouped: BTreeMap<&str, usize> = BTreeMap::new();
for edge in edges {
*grouped.entry(edge.caller_file.as_str()).or_default() += 1;
}
let max_hits_per_file = grouped.values().copied().max().unwrap_or(0);
max_hits_per_file >= 3 || (edges.len() >= 6 && grouped.len() < edges.len())
}
/// Apply a single edit operation to file contents. Returns new content.
pub(crate) fn apply_edit_op(content: &str, op: &EditOp) -> Result<(String, usize)> {
if op.old == op.new {
bail!("old and new strings are identical");
}
let count = content.matches(op.old.as_str()).count();
if count == 0 {
bail!("old_string not found");
}
if count > 1 && !op.replace_all {
bail!(
"old_string matches {} times (use replace_all or provide more context)",
count
);
}
let replaced = if op.replace_all {
content.replace(op.old.as_str(), &op.new)
} else {
content.replacen(op.old.as_str(), &op.new, 1)
};
Ok((replaced, count))
}
pub(crate) fn build_edit_plan(batch: &EditBatch) -> Result<Vec<PlannedEdit>> {
let mut plan = Vec::with_capacity(batch.edits.len());
for (i, op) in batch.edits.iter().enumerate() {
let content = fs::read_to_string(&op.file)
.with_context(|| format!("edit #{}: reading {}", i + 1, op.file.display()))?;
let (replaced, count) = apply_edit_op(&content, op)
.with_context(|| format!("edit #{}: {}", i + 1, op.file.display()))?;
plan.push(PlannedEdit {
index: i,
file: op.file.clone(),
new_content: replaced,
replacements: count,
});
}
Ok(plan)
}
fn stage_edit_plan(plan: Vec<PlannedEdit>) -> Result<Vec<StagedEdit>> {
let mut staged = Vec::with_capacity(plan.len());
for planned in plan {
let parent = planned.file.parent().unwrap_or_else(|| Path::new("."));
let mut staged_file = NamedTempFile::new_in(parent)
.with_context(|| format!("staging {}", planned.file.display()))?;
staged_file
.write_all(planned.new_content.as_bytes())
.with_context(|| format!("staging {}", planned.file.display()))?;
staged_file
.as_file_mut()
.sync_all()
.with_context(|| format!("flushing staged edit for {}", planned.file.display()))?;
staged.push(StagedEdit {
index: planned.index,
file: planned.file,
replacements: planned.replacements,
staged_file,
});
}
Ok(staged)
}
fn edit_backup_path(file: &Path, index: usize) -> PathBuf {
let parent = file.parent().unwrap_or_else(|| Path::new("."));
let name = file
.file_name()
.map(|value| value.to_string_lossy().into_owned())
.unwrap_or_else(|| "edit-target".to_string());
let stamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
parent.join(format!(
".{name}.tsift-edit-{stamp}-{}-{index}.bak",
std::process::id()
))
}
fn rollback_applied_edits(applied: &[AppliedEdit]) -> Result<()> {
let mut rollback_errors = Vec::new();
for entry in applied.iter().rev() {
if let Err(err) = fs::remove_file(&entry.file)
&& err.kind() != std::io::ErrorKind::NotFound
{
rollback_errors.push(format!(
"removing {} during rollback: {}",
entry.file.display(),
err
));
continue;
}
if let Err(err) = fs::rename(&entry.backup_path, &entry.file) {
rollback_errors.push(format!(
"restoring {} during rollback: {}",
entry.file.display(),
err
));
}
}
if rollback_errors.is_empty() {
Ok(())
} else {
bail!(rollback_errors.join("; "));
}
}
fn cleanup_edit_backups(applied: &[AppliedEdit]) {
for entry in applied {
let _ = fs::remove_file(&entry.backup_path);
}
}
fn ok_results_from_applied(applied: &[AppliedEdit]) -> Vec<EditResult> {
applied
.iter()
.map(|entry| EditResult {
file: entry.file.clone(),
status: EditStatus::Ok,
error: None,
replacements: Some(entry.replacements),
})
.collect()
}
pub(crate) fn apply_edit_plan_atomically(plan: Vec<PlannedEdit>) -> Result<Vec<EditResult>> {
apply_edit_plan_atomically_inner(plan, |_, _| Ok(()))
}
fn apply_edit_plan_atomically_inner<F>(
plan: Vec<PlannedEdit>,
mut before_swap: F,
) -> Result<Vec<EditResult>>
where
F: FnMut(usize, &Path) -> Result<()>,
{
let staged = stage_edit_plan(plan)?;
let mut applied = Vec::with_capacity(staged.len());
for (commit_index, staged_edit) in staged.into_iter().enumerate() {
if let Err(err) = before_swap(commit_index, &staged_edit.file) {
match rollback_applied_edits(&applied) {
Ok(()) => cleanup_edit_backups(&applied),
Err(rollback_error) => {
return Err(err.context(format!("rollback also failed: {rollback_error}")));
}
}
return Err(err);
}
let backup_path = edit_backup_path(&staged_edit.file, staged_edit.index);
if let Err(err) = fs::rename(&staged_edit.file, &backup_path) {
match rollback_applied_edits(&applied) {
Ok(()) => cleanup_edit_backups(&applied),
Err(rollback_error) => {
bail!(
"moving {} into backup slot failed: {}; rollback also failed: {}",
staged_edit.file.display(),
err,
rollback_error
);
}
}
bail!(
"moving {} into backup slot failed: {}",
staged_edit.file.display(),
err
);
}
match staged_edit.staged_file.persist(&staged_edit.file) {
Ok(_) => applied.push(AppliedEdit {
index: staged_edit.index,
file: staged_edit.file,
replacements: staged_edit.replacements,
backup_path,
}),
Err(err) => {
let persist_error = err.error;
drop(err.file);
let restore_error = fs::rename(&backup_path, &staged_edit.file).err();
let rollback_error = rollback_applied_edits(&applied).err();
if rollback_error.is_none() {
cleanup_edit_backups(&applied);
}
let mut message = format!(
"committing {} failed: {}",
staged_edit.file.display(),
persist_error
);
if let Some(restore_error) = restore_error {
message.push_str(&format!(
"; restoring original {} failed: {}",
staged_edit.file.display(),
restore_error
));
}
if let Some(rollback_error) = rollback_error {
message.push_str(&format!("; rollback also failed: {rollback_error}"));
}
bail!(message);
}
}
}
applied.sort_by_key(|entry| entry.index);
let results = ok_results_from_applied(&applied);
cleanup_edit_backups(&applied);
Ok(results)
}
fn resolve_query_index_target(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<SearchIndexTarget> {
let cfg = config::Config::load(root)?;
if let Some(scope_name) = scope {
let scope = config::Config::resolve_submodule(root, scope_name)?;
return Ok(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
});
}
if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
return Ok(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
});
}
if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
return Ok(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
});
}
let db_path = root.join(".tsift/index.db");
if db_path.exists() {
return Ok(SearchIndexTarget {
label: "index".to_string(),
db_path,
source_root: root.to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", root.display()),
});
}
let scopes = config::Config::submodule_dirs(root)?;
if scopes.is_empty() {
return Ok(SearchIndexTarget {
label: "index".to_string(),
db_path,
source_root: root.to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", root.display()),
});
}
let available_scopes = scopes
.iter()
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>()
.join(", ");
let indexed_scopes = scopes
.iter()
.filter(|scope| cfg.db_path_for(root, &scope.id).exists())
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>();
let indexed_label = if indexed_scopes.is_empty() {
"none".to_string()
} else {
indexed_scopes.join(", ")
};
bail!(
"workspace root {} has no shared root index at {}. Read-only graph queries require `--scope <scope>` when the workspace is indexed into `.tsift/indexes/*/index.db`. Available scopes: {}. Indexed scopes: {}.",
root.display(),
db_path.display(),
available_scopes,
indexed_label
);
}
fn resolve_query_db_path(root: &Path, path_hint: &Path, scope: Option<&str>) -> Result<PathBuf> {
Ok(resolve_query_index_target(root, path_hint, scope)?.db_path)
}
fn ensure_query_index_current(root: &Path, target: &SearchIndexTarget) -> Result<()> {
let state = inspect_search_index(target)?;
let Some(reason) = index_reason_for_state(state) else {
return Ok(());
};
match apply_search_index_update(root, target) {
Ok(_) => {
index::inspect_scope_invalidate_all();
Ok(())
}
Err(err) if is_active_writer_lock_error(&err) && target.db_path.exists() => {
eprintln!(
"note: active tsift writer detected; skipping graph-query autoindex because {}. \
Continuing with the current read-only index snapshot; graph results may lag. \
Retry `{}` after the active writer finishes for fresh graph results.",
index_reason_detail(target, reason),
target.reindex_cmd
);
Ok(())
}
Err(err) => Err(err),
}
}
pub(crate) fn open_index_db(path: &std::path::Path, scope: Option<&str>) -> Result<index::IndexDb> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let target = resolve_query_index_target(&root, path, scope)?;
ensure_query_index_current(&root, &target)?;
let db_path = target.db_path;
if !db_path.exists() {
bail!(
"no index found at {}. Run `tsift index` first.",
db_path.display()
);
}
index::IndexDb::open_read_only_resilient(&db_path)
}
pub(crate) fn query_tagpath_root(
root: &std::path::Path,
path_hint: &std::path::Path,
scope: Option<&str>,
) -> Result<PathBuf> {
if let Some(scope_name) = scope {
return Ok(config::Config::resolve_submodule(root, scope_name)?.source_root);
}
if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
return Ok(scope.source_root);
}
Ok(root.to_path_buf())
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct TraversalNode {
handle: String,
kind: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
ref_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
properties: BTreeMap<String, String>,
expand: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct TraversalEdge {
from: String,
to: String,
relation: String,
#[serde(skip_serializing_if = "Option::is_none")]
label: Option<String>,
weight: usize,
}
#[derive(Clone, Debug, Default)]
struct TraversalGraphBuild {
nodes: BTreeMap<String, TraversalNode>,
edges: Vec<TraversalEdge>,
edge_keys: BTreeSet<(String, String, String)>,
warnings: Vec<String>,
}
pub(crate) const GRAPH_PROJECTION_VERSION: &str = "tsift-traversal-v1";
const GRAPH_DB_EVIDENCE_CONTRACT_VERSION: &str = "graph-db-evidence-v1";
const WORKER_PROMPT_PACKET_CONTRACT_VERSION: &str = "worker-prompt-packet-v1";
const CONFLICT_MATRIX_CONTRACT_VERSION: &str = "conflict-matrix-v1";
const CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION: &str =
"context-pack-graph-orchestration-v1";
const SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION: &str = "session-review-follow-up-v1";
const DISPATCH_TRACE_CONTRACT_VERSION: &str = "dispatch-trace-v1";
const DEPENDENCY_DAG_CONTRACT_VERSION: &str = "dependency-dag-v1";
const GRAPH_PROJECTION_META_KIND: &str = "projection_meta";
const GRAPH_DB_RANKED_NEIGHBOR_CAP: usize = 12;
const GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP: usize = 16;
const GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP: usize = 64;
#[derive(Debug, Serialize, PartialEq)]
struct TraversalTotals {
nodes: usize,
edges: usize,
}
#[derive(Debug, Serialize, PartialEq)]
struct TraversalPathReport {
from: TraversalNode,
to: TraversalNode,
hops: usize,
nodes: Vec<TraversalNode>,
edges: Vec<TraversalEdge>,
}
#[derive(Debug, Serialize, PartialEq)]
struct TraversalRecommendation {
handle: String,
kind: String,
label: String,
reason: String,
score: usize,
expand: String,
}
#[derive(Debug, Serialize, PartialEq)]
struct TraversalReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
mode: String,
totals: TraversalTotals,
#[serde(skip_serializing_if = "Option::is_none")]
query: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
target: Option<String>,
nodes: Vec<TraversalNode>,
edges: Vec<TraversalEdge>,
#[serde(skip_serializing_if = "Option::is_none")]
shortest_path: Option<TraversalPathReport>,
recommendations: Vec<TraversalRecommendation>,
exploration: ExplorationPacket,
truncated: bool,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Debug, Serialize, PartialEq)]
struct SemanticRelatedReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
query: String,
embedding_model: String,
count: usize,
items: Vec<SemanticRelatedItem>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct SemanticRelatedItem {
handle: String,
kind: String,
label: String,
score: f64,
#[serde(skip_serializing_if = "Option::is_none")]
file_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
source_symbol: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
expand: String,
}
#[derive(Clone)]
struct TraversalSymbolIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
#[derive(Clone)]
struct TraversalFileIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
#[derive(Clone)]
struct TraversalRouteIndexEntry {
handle: String,
node: TraversalNode,
tokens: BTreeSet<String>,
}
struct TraversalCodeLookup<'a> {
symbols: &'a [TraversalSymbolIndexEntry],
files: &'a [TraversalFileIndexEntry],
routes: &'a [TraversalRouteIndexEntry],
symbol_index: HashMap<String, Vec<usize>>,
file_index: HashMap<String, Vec<usize>>,
route_index: HashMap<String, Vec<usize>>,
file_path_index: HashMap<String, String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationBudget {
project_size: String,
max_source_windows: usize,
lines_per_window: usize,
relationship_limit: usize,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationRelation {
from: String,
relation: String,
to: String,
#[serde(skip_serializing_if = "Option::is_none")]
label: Option<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationSourceWindow {
handle: String,
file: String,
start: usize,
end: usize,
reason: String,
expand: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationWorkerContext {
handle: String,
target: String,
summary: String,
expand: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ExplorationPacket {
budget: ExplorationBudget,
relationship_map: Vec<ExplorationRelation>,
source_windows: Vec<ExplorationSourceWindow>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
worker_context: Vec<ExplorationWorkerContext>,
no_reread_guidance: String,
}
impl TraversalGraphBuild {
fn add_node(&mut self, node: TraversalNode) {
self.nodes.entry(node.handle.clone()).or_insert(node);
}
fn add_edge(
&mut self,
from: &str,
to: &str,
relation: &str,
label: Option<String>,
weight: usize,
) {
if from == to || !self.nodes.contains_key(from) || !self.nodes.contains_key(to) {
return;
}
let key = (from.to_string(), to.to_string(), relation.to_string());
if self.edge_keys.insert(key) {
self.edges.push(TraversalEdge {
from: from.to_string(),
to: to.to_string(),
relation: relation.to_string(),
label,
weight,
});
}
}
}
pub(crate) fn graph_substrate_db_path(root: &Path, scope: Option<&str>) -> PathBuf {
match scope {
Some(scope) => root.join(".tsift/indexes").join(scope).join("graph.db"),
None => root.join(".tsift/graph.db"),
}
}
fn graph_projection_meta_id(scope: Option<&str>) -> String {
format!("projection:tsift-traversal:{}", scope.unwrap_or("root"))
}
fn content_hash<T: Serialize>(value: &T) -> Result<String> {
let bytes = serde_json::to_vec(value)?;
Ok(blake3::hash(&bytes).to_hex().to_string())
}
fn node_with_content_freshness(mut node: SubstrateGraphNode) -> Result<SubstrateGraphNode> {
let mut hashable = node.clone();
hashable.freshness = None;
node.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
Ok(node)
}
fn edge_with_content_freshness(mut edge: SubstrateGraphEdge) -> Result<SubstrateGraphEdge> {
let mut hashable = edge.clone();
hashable.freshness = None;
edge.freshness = Some(GraphFreshness::content_hash(content_hash(&hashable)?));
Ok(edge)
}
const SEMANTIC_EMBEDDING_DIM: usize = 32;
const SEMANTIC_EMBEDDING_MODEL: &str = "tsift-local-hash-v1";
fn semantic_related_kind_name(kind: SemanticRelatedKind) -> &'static str {
match kind {
SemanticRelatedKind::Concept => "concept",
SemanticRelatedKind::Entity => "entity",
SemanticRelatedKind::All => "all",
}
}
fn semantic_related_command(root: &Path, query: &str, kind: SemanticRelatedKind) -> String {
format!(
"tsift semantic {} --path {} --kind {} --limit 10",
shell_quote(query),
shell_quote(root.to_string_lossy().as_ref()),
semantic_related_kind_name(kind)
)
}
fn semantic_embedding(input: &str) -> Vec<f64> {
let mut vector = vec![0.0; SEMANTIC_EMBEDDING_DIM];
let mut tokens = traversal_tokens(input);
if tokens.is_empty() {
let trimmed = input.trim().to_ascii_lowercase();
if !trimmed.is_empty() {
tokens.insert(trimmed);
}
}
for token in tokens {
let hash = blake3::hash(token.as_bytes());
let bytes = hash.as_bytes();
let idx = usize::from(bytes[0]) % SEMANTIC_EMBEDDING_DIM;
let sign = if bytes[1] & 1 == 0 { 1.0 } else { -1.0 };
vector[idx] += sign;
}
let norm = vector.iter().map(|value| value * value).sum::<f64>().sqrt();
if norm > 0.0 {
for value in &mut vector {
*value /= norm;
}
}
vector
}
fn semantic_embedding_property(input: &str) -> String {
semantic_embedding(input)
.iter()
.map(|value| format!("{value:.6}"))
.collect::<Vec<_>>()
.join(",")
}
fn parse_semantic_embedding_property(value: &str) -> Option<Vec<f64>> {
let parsed = value
.split(',')
.map(str::trim)
.map(str::parse::<f64>)
.collect::<std::result::Result<Vec<_>, _>>()
.ok()?;
(parsed.len() == SEMANTIC_EMBEDDING_DIM).then_some(parsed)
}
fn semantic_cosine(left: &[f64], right: &[f64]) -> f64 {
if left.len() != right.len() {
return 0.0;
}
left.iter()
.zip(right.iter())
.map(|(left, right)| left * right)
.sum::<f64>()
}
fn semantic_entity_handle(name: &str, kind: &str) -> String {
stable_handle(
"gent",
&format!(
"entity:{}:{}",
kind.trim().to_ascii_lowercase(),
name.trim().to_ascii_lowercase()
),
)
}
fn semantic_concept_handle(label: &str) -> String {
stable_handle(
"gcon",
&format!("concept:{}", label.trim().to_ascii_lowercase()),
)
}
fn summary_source_handles(
summary: &summarize::Summary,
file_node_by_path: &BTreeMap<String, String>,
symbol_node_by_file_label: &BTreeMap<(String, String), String>,
) -> Vec<String> {
let mut handles = Vec::new();
if let Some(handle) = file_node_by_path.get(&summary.file_path) {
handles.push(handle.clone());
}
if let Some(handle) =
symbol_node_by_file_label.get(&(summary.file_path.clone(), summary.symbol_name.clone()))
&& !handles.iter().any(|existing| existing == handle)
{
handles.push(handle.clone());
}
handles
}
fn semantic_entity_node(
root: &Path,
summary: &summarize::Summary,
name: &str,
kind: &str,
description: &str,
provenance: &GraphProvenance,
) -> SubstrateGraphNode {
let handle = semantic_entity_handle(name, kind);
let detail = if description.trim().is_empty() {
format!("{kind} entity from cached summaries")
} else {
format!("{kind}: {description}")
};
SubstrateGraphNode::new(handle.clone(), "semantic_entity", name.to_string())
.with_property("handle", handle)
.with_property("ref_id", name.to_string())
.with_property("detail", detail)
.with_property("entity_kind", kind.to_string())
.with_property("description", description.to_string())
.with_property("source_file", summary.file_path.clone())
.with_property("source_symbol", summary.symbol_name.clone())
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property(
"embedding",
semantic_embedding_property(&format!("{name} {kind} {description}")),
)
.with_property(
"expand",
semantic_related_command(root, name, SemanticRelatedKind::Entity),
)
.with_provenance(provenance.clone())
}
fn semantic_concept_node(
root: &Path,
summary: &summarize::Summary,
label: &str,
provenance: &GraphProvenance,
) -> SubstrateGraphNode {
let handle = semantic_concept_handle(label);
SubstrateGraphNode::new(handle.clone(), "semantic_concept", label.to_string())
.with_property("handle", handle)
.with_property("ref_id", label.to_string())
.with_property("detail", "concept label from cached summaries".to_string())
.with_property("source_file", summary.file_path.clone())
.with_property("source_symbol", summary.symbol_name.clone())
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property("embedding", semantic_embedding_property(label))
.with_property(
"expand",
semantic_related_command(root, label, SemanticRelatedKind::Concept),
)
.with_provenance(provenance.clone())
}
fn insert_semantic_edge(
edge_map: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
edge: SubstrateGraphEdge,
) {
edge_map
.entry((edge.from_id.clone(), edge.to_id.clone(), edge.kind.clone()))
.or_insert(edge);
}
fn append_summary_semantic_projection_rows(
root: &Path,
graph: &TraversalGraphBuild,
provenance: &GraphProvenance,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<()> {
let summaries_db = root.join(".tsift/summaries.db");
if !summaries_db.exists() {
return Ok(());
}
let summary_db = summarize::SummaryDb::open_read_only_resilient(&summaries_db)?;
let summaries = summary_db.all()?;
if summaries.is_empty() {
return Ok(());
}
let file_node_by_path = graph
.nodes
.values()
.filter(|node| node.kind == "file")
.filter_map(|node| {
node.path
.as_ref()
.map(|path| (path.clone(), node.handle.clone()))
})
.collect::<BTreeMap<_, _>>();
let symbol_node_by_file_label = graph
.nodes
.values()
.filter(|node| node.kind == "symbol")
.filter_map(|node| {
Some((
(node.path.clone()?, node.label.clone()),
node.handle.clone(),
))
})
.collect::<BTreeMap<_, _>>();
let mut semantic_nodes = BTreeMap::<String, SubstrateGraphNode>::new();
let mut semantic_edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
for summary in &summaries {
let source_handles =
summary_source_handles(summary, &file_node_by_path, &symbol_node_by_file_label);
let mut entity_ids_by_name = BTreeMap::<String, String>::new();
if let Some(entities) = &summary.entities {
for entity in entities {
let node = semantic_entity_node(
root,
summary,
&entity.name,
&entity.kind,
&entity.description,
provenance,
);
let entity_id = node.id.clone();
entity_ids_by_name.insert(entity.name.to_ascii_lowercase(), entity_id.clone());
semantic_nodes.entry(entity_id.clone()).or_insert(node);
for source_handle in &source_handles {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
source_handle.clone(),
entity_id.clone(),
"mentions_entity",
)
.with_property("label", format!("summary entity: {}", entity.name))
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
}
let mut concept_ids = Vec::new();
if let Some(labels) = &summary.concept_labels {
for label in labels
.iter()
.map(|label| label.trim())
.filter(|label| !label.is_empty())
{
let node = semantic_concept_node(root, summary, label, provenance);
let concept_id = node.id.clone();
semantic_nodes.entry(concept_id.clone()).or_insert(node);
concept_ids.push(concept_id.clone());
for source_handle in &source_handles {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
source_handle.clone(),
concept_id.clone(),
"mentions_concept",
)
.with_property("label", format!("summary concept: {label}"))
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
}
for entity_id in entity_ids_by_name.values() {
for concept_id in &concept_ids {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
entity_id.clone(),
concept_id.clone(),
"tagged_concept",
)
.with_property("label", "entity concept label".to_string())
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
for idx in 0..concept_ids.len() {
for next_idx in (idx + 1)..concept_ids.len() {
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(
concept_ids[idx].clone(),
concept_ids[next_idx].clone(),
"related_concept",
)
.with_property("label", format!("co-occurs in {}", summary.symbol_name))
.with_property("source_file", summary.file_path.clone())
.with_provenance(provenance.clone()),
);
}
}
if let Some(relationships) = &summary.relationships {
for relationship in relationships {
let from_id = entity_ids_by_name
.get(&relationship.from.to_ascii_lowercase())
.cloned()
.unwrap_or_else(|| {
let node = semantic_entity_node(
root,
summary,
&relationship.from,
"unknown",
"",
provenance,
);
let id = node.id.clone();
semantic_nodes.entry(id.clone()).or_insert(node);
id
});
let to_id = entity_ids_by_name
.get(&relationship.to.to_ascii_lowercase())
.cloned()
.unwrap_or_else(|| {
let node = semantic_entity_node(
root,
summary,
&relationship.to,
"unknown",
"",
provenance,
);
let id = node.id.clone();
semantic_nodes.entry(id.clone()).or_insert(node);
id
});
insert_semantic_edge(
&mut semantic_edges,
SubstrateGraphEdge::new(from_id, to_id, "semantic_relation")
.with_property("relationship_kind", relationship.kind.clone())
.with_property("label", relationship.kind.clone())
.with_property("source_file", summary.file_path.clone())
.with_property("source_symbol", summary.symbol_name.clone())
.with_provenance(provenance.clone()),
);
}
}
}
for node in semantic_nodes.into_values() {
nodes.push(node_with_content_freshness(node)?);
}
for edge in semantic_edges.into_values() {
edges.push(edge_with_content_freshness(edge)?);
}
Ok(())
}
fn projection_content_hash(
nodes: &[SubstrateGraphNode],
edges: &[SubstrateGraphEdge],
) -> Result<String> {
#[derive(Serialize)]
struct Payload<'a> {
version: &'static str,
nodes: &'a [SubstrateGraphNode],
edges: &'a [SubstrateGraphEdge],
}
content_hash(&Payload {
version: GRAPH_PROJECTION_VERSION,
nodes,
edges,
})
}
pub(crate) fn graph_projection_content_hash(projection: &GraphProjection) -> Option<String> {
projection
.nodes
.iter()
.find(|node| node.kind == GRAPH_PROJECTION_META_KIND)
.and_then(|node| node.properties.get("content_hash").cloned())
}
fn traversal_projection_from_graph(
root: &Path,
scope: Option<&str>,
graph: &TraversalGraphBuild,
) -> Result<GraphProjection> {
let provenance = GraphProvenance::new(
"tsift.traverse",
format!("{}:{}", root.display(), scope.unwrap_or("root")),
);
let mut nodes = Vec::with_capacity(graph.nodes.len() + 1);
for node in graph.nodes.values() {
let mut projected =
SubstrateGraphNode::new(node.handle.clone(), node.kind.clone(), node.label.clone())
.with_property("handle", node.handle.clone())
.with_property("expand", node.expand.clone())
.with_provenance(provenance.clone());
if let Some(ref_id) = &node.ref_id {
projected = projected.with_property("ref_id", ref_id.clone());
}
if let Some(path) = &node.path {
projected = projected.with_property("path", path.clone());
}
if let Some(line) = node.line {
projected = projected.with_property("line", line.to_string());
}
if let Some(detail) = &node.detail {
projected = projected.with_property("detail", detail.clone());
}
for (key, value) in &node.properties {
projected = projected.with_property(key.clone(), value.clone());
}
nodes.push(node_with_content_freshness(projected)?);
}
let mut edges = Vec::with_capacity(graph.edges.len());
for edge in &graph.edges {
let mut projected =
SubstrateGraphEdge::new(edge.from.clone(), edge.to.clone(), edge.relation.clone())
.with_property("weight", edge.weight.to_string())
.with_provenance(provenance.clone());
if let Some(label) = &edge.label {
projected = projected.with_property("label", label.clone());
}
edges.push(edge_with_content_freshness(projected)?);
}
append_traversal_context_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
append_summary_semantic_projection_rows(root, graph, &provenance, &mut nodes, &mut edges)?;
let projection_hash = projection_content_hash(&nodes, &edges)?;
let meta = SubstrateGraphNode::new(
graph_projection_meta_id(scope),
GRAPH_PROJECTION_META_KIND,
"tsift traversal projection",
)
.with_property("projection_version", GRAPH_PROJECTION_VERSION)
.with_property("content_hash", projection_hash.clone())
.with_property("root", root.to_string_lossy().to_string())
.with_property("scope", scope.unwrap_or("root"))
.with_property("node_count", graph.nodes.len().to_string())
.with_property("edge_count", graph.edges.len().to_string())
.with_provenance(provenance)
.with_freshness(GraphFreshness::content_hash(projection_hash));
nodes.push(meta);
Ok(GraphProjection { nodes, edges })
}
#[allow(clippy::too_many_arguments)]
fn ensure_traversal_source_handle(
root: &Path,
provenance: &GraphProvenance,
file_node_by_path: &BTreeMap<String, String>,
node: &TraversalNode,
budget: &ExplorationBudget,
source_handle_by_node: &mut BTreeMap<String, String>,
seen_windows: &mut BTreeMap<(String, usize, usize), String>,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<Option<String>> {
if let Some(handle) = source_handle_by_node.get(&node.handle) {
return Ok(Some(handle.clone()));
}
let Some(window) = exploration_source_window_for_node(root, node, budget) else {
return Ok(None);
};
let window_key = (window.file.clone(), window.start, window.end);
let handle = if let Some(handle) = seen_windows.get(&window_key) {
handle.clone()
} else {
let label = format!("{}:{}-{}", window.file, window.start, window.end);
let projected = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
.with_property("handle", window.handle.clone())
.with_property("file", window.file.clone())
.with_property("start", window.start.to_string())
.with_property("end", window.end.to_string())
.with_property("reason", window.reason.clone())
.with_property("expand", window.expand.clone())
.with_provenance(provenance.clone());
nodes.push(node_with_content_freshness(projected)?);
if let Some(file_handle) = file_node_by_path.get(&window.file) {
let edge = SubstrateGraphEdge::new(
window.handle.clone(),
file_handle.clone(),
"expands_source",
)
.with_property("label", window.reason.clone())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(edge)?);
}
if node.kind != "file" {
let edge = SubstrateGraphEdge::new(
window.handle.clone(),
node.handle.clone(),
"anchors_source",
)
.with_property("label", window.reason.clone())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(edge)?);
}
seen_windows.insert(window_key, window.handle.clone());
window.handle
};
source_handle_by_node.insert(node.handle.clone(), handle.clone());
Ok(Some(handle))
}
fn push_traversal_backlog_target_handles<'a>(
backlog: &TraversalNode,
edges_by_from: &BTreeMap<&'a str, Vec<&'a TraversalEdge>>,
node_by_handle: &BTreeMap<&'a str, &'a TraversalNode>,
max_handles: usize,
seen_target_nodes: &mut BTreeSet<String>,
target_node_handles: &mut Vec<String>,
) {
for edge in edges_by_from
.get(backlog.handle.as_str())
.into_iter()
.flatten()
.filter(|edge| edge.relation == "mentions")
{
let Some(target_node) = node_by_handle.get(edge.to.as_str()) else {
continue;
};
if !matches!(target_node.kind.as_str(), "file" | "symbol" | "route") {
continue;
}
if target_node
.path
.as_deref()
.zip(backlog.path.as_deref())
.is_some_and(|(target_path, backlog_path)| {
target_path == backlog_path && target_path.ends_with(".md")
})
{
continue;
}
if seen_target_nodes.insert(target_node.handle.clone()) {
target_node_handles.push(target_node.handle.clone());
}
if target_node_handles.len() >= max_handles {
break;
}
}
}
fn append_traversal_context_projection_rows(
root: &Path,
graph: &TraversalGraphBuild,
provenance: &GraphProvenance,
nodes: &mut Vec<SubstrateGraphNode>,
edges: &mut Vec<SubstrateGraphEdge>,
) -> Result<()> {
let budget = exploration_budget_for_counts(graph.nodes.len(), graph.edges.len());
let file_node_by_path = graph
.nodes
.values()
.filter(|node| node.kind == "file")
.filter_map(|node| {
node.path
.as_ref()
.map(|path| (path.clone(), node.handle.clone()))
})
.collect::<BTreeMap<_, _>>();
let node_by_handle = graph
.nodes
.values()
.map(|node| (node.handle.as_str(), node))
.collect::<BTreeMap<_, _>>();
let mut edges_by_from = BTreeMap::<&str, Vec<&TraversalEdge>>::new();
for edge in &graph.edges {
edges_by_from
.entry(edge.from.as_str())
.or_default()
.push(edge);
}
for rows in edges_by_from.values_mut() {
rows.sort_by(|left, right| {
right
.weight
.cmp(&left.weight)
.then(left.relation.cmp(&right.relation))
.then(left.to.cmp(&right.to))
});
}
let mut seen_windows = BTreeMap::<(String, usize, usize), String>::new();
let mut source_handle_by_node = BTreeMap::<String, String>::new();
let mut code_context_count = 0usize;
let code_context_limit = budget.relationship_limit.min(8);
for node in graph.nodes.values() {
if !matches!(
node.kind.as_str(),
"backlog" | "job_packet" | "worker_result"
) {
continue;
}
let mut target_node_handles = Vec::new();
let mut fallback_target_handles = Vec::new();
let mut seen_target_nodes = BTreeSet::new();
if node.kind == "backlog" || node.kind == "worker_result" {
push_traversal_backlog_target_handles(
node,
&edges_by_from,
&node_by_handle,
budget.max_source_windows,
&mut seen_target_nodes,
&mut target_node_handles,
);
fallback_target_handles.push(node.handle.clone());
} else {
for edge in edges_by_from
.get(node.handle.as_str())
.into_iter()
.flatten()
.filter(|edge| edge.relation == "targets")
{
let Some(backlog) = node_by_handle.get(edge.to.as_str()) else {
continue;
};
fallback_target_handles.push(backlog.handle.clone());
push_traversal_backlog_target_handles(
backlog,
&edges_by_from,
&node_by_handle,
budget.max_source_windows,
&mut seen_target_nodes,
&mut target_node_handles,
);
if target_node_handles.len() >= budget.max_source_windows {
break;
}
}
if fallback_target_handles.is_empty() {
continue;
}
}
let code_context = !target_node_handles.is_empty();
if target_node_handles.is_empty() {
target_node_handles = dedupe_preserve_order(fallback_target_handles);
} else if code_context_count >= code_context_limit {
continue;
}
let mut worker_source_handles = Vec::new();
let mut seen_worker_handles = BTreeSet::new();
for target_handle in target_node_handles {
if worker_source_handles.len() >= budget.max_source_windows {
break;
}
let Some(target_node) = node_by_handle.get(target_handle.as_str()) else {
continue;
};
let Some(handle) = ensure_traversal_source_handle(
root,
provenance,
&file_node_by_path,
target_node,
&budget,
&mut source_handle_by_node,
&mut seen_windows,
nodes,
edges,
)?
else {
continue;
};
if seen_worker_handles.insert(handle.clone()) {
worker_source_handles.push(handle);
}
}
if worker_source_handles.is_empty() {
continue;
}
let target = node
.path
.clone()
.unwrap_or_else(|| root.to_string_lossy().to_string());
let summary = node.detail.clone().unwrap_or_else(|| node.label.clone());
let handle = stable_handle("xwrk", &format!("{}:{}:{}", target, node.handle, summary));
let projected = SubstrateGraphNode::new(handle.clone(), "worker_context", summary.clone())
.with_property("handle", handle.clone())
.with_property("target", target.clone())
.with_property("summary", summary)
.with_property(
"source_handle_count",
worker_source_handles.len().to_string(),
)
.with_property(
"expand",
format!(
"tsift --envelope context-pack {} --budget normal",
shell_quote(&target)
),
)
.with_provenance(provenance.clone());
nodes.push(node_with_content_freshness(projected)?);
let request_edge =
SubstrateGraphEdge::new(node.handle.clone(), handle.clone(), "requests_context")
.with_property("label", "bounded worker context".to_string())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(request_edge)?);
for source_handle in &worker_source_handles {
let scope_edge =
SubstrateGraphEdge::new(handle.clone(), source_handle.clone(), "scopes_source")
.with_property("label", "bounded worker source window".to_string())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(scope_edge)?);
}
if code_context {
code_context_count += 1;
}
}
Ok(())
}
fn traversal_node_from_graph_node(root: &Path, node: SubstrateGraphNode) -> TraversalNode {
let handle = node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone());
TraversalNode {
expand: node
.properties
.get("expand")
.cloned()
.unwrap_or_else(|| traversal_expand_command(root, &handle)),
handle,
kind: node.kind,
label: node.label,
ref_id: node.properties.get("ref_id").cloned(),
path: node.properties.get("path").cloned(),
line: node
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
detail: node.properties.get("detail").cloned(),
properties: node.properties,
}
}
fn traversal_graph_from_store(root: &Path, store: &impl GraphStore) -> Result<TraversalGraphBuild> {
let mut graph = TraversalGraphBuild::default();
for node in store.all_nodes()? {
if node.kind == GRAPH_PROJECTION_META_KIND {
continue;
}
graph.add_node(traversal_node_from_graph_node(root, node));
}
for edge in store.all_edges()? {
graph.add_edge(
&edge.from_id,
&edge.to_id,
&edge.kind,
edge.properties.get("label").cloned(),
edge.properties
.get("weight")
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(1),
);
}
Ok(graph)
}
pub(crate) fn convex_rows_from_graph_store(
store: &impl GraphStore,
) -> Result<ConvexProjectionRows> {
Ok(GraphProjection {
nodes: store.all_nodes()?,
edges: store.all_edges()?,
}
.to_convex_rows())
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
struct ConvexRequiredIndex {
table: String,
name: String,
fields: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexSyncChunk {
operation: String,
chunk: usize,
count: usize,
keys: Vec<String>,
max_attempts: usize,
retry_policy: String,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexTransportSummary {
endpoint_env: String,
endpoint_configured: bool,
auth_token_env: String,
auth_configured: bool,
remote_snapshot: bool,
applied_chunks: usize,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexTransportReceipt {
operation: String,
chunk: usize,
attempt: usize,
status: String,
message: Option<String>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct ConvexTransportRequest<'a> {
operation: &'a str,
chunk: usize,
projection_version: &'a str,
projection_hash: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
projection_meta_id: Option<&'a str>,
node_rows: Vec<ConvexNodeRow>,
edge_rows: Vec<ConvexEdgeRow>,
keys: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
cursor: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
limit: Option<usize>,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct ConvexTransportResponse {
status: Option<String>,
message: Option<String>,
rows: Option<ConvexProjectionRows>,
#[serde(default)]
meta: Option<ConvexSnapshotMeta>,
#[serde(default)]
page: Option<ConvexSnapshotPage>,
}
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
struct ConvexSnapshotMeta {
// Captured for completeness/debugging; not currently consumed by the
// freshness diff (indexes are already validated against the required set
// via `convex_required_indexes`, and `page_size` is informational only).
#[serde(default)]
#[allow(dead_code)]
indexes: Vec<ConvexRequiredIndex>,
#[serde(default)]
#[allow(dead_code)]
node_count: Option<usize>,
#[serde(default)]
#[allow(dead_code)]
edge_count: Option<usize>,
#[serde(default)]
projection_hash: Option<String>,
#[serde(default)]
#[allow(dead_code)]
page_size: Option<usize>,
}
/// Paginated snapshot page response. `rows` is either node rows or edge rows
/// depending on which operation was called; we deserialize as raw values to
/// keep the transport struct shared between both shapes, then narrow per call
/// site.
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
struct ConvexSnapshotPage {
rows: Vec<serde_json::Value>,
#[serde(default)]
next_cursor: Option<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexProjectionFreshness {
status: String,
fail_closed: bool,
local_hash: Option<String>,
snapshot_hash: Option<String>,
missing_nodes: Vec<String>,
stale_nodes: Vec<String>,
missing_edges: Vec<String>,
stale_edges: Vec<String>,
diagnostics: Vec<String>,
}
const DEFAULT_CONVEX_GRAPH_URL_ENV: &str = "TSIFT_CONVEX_GRAPH_URL";
impl ConvexProjectionFreshness {
fn current(local_hash: Option<String>, snapshot_hash: Option<String>) -> Self {
Self {
status: "current".to_string(),
fail_closed: false,
local_hash,
snapshot_hash,
missing_nodes: Vec::new(),
stale_nodes: Vec::new(),
missing_edges: Vec::new(),
stale_edges: Vec::new(),
diagnostics: Vec::new(),
}
}
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct ConvexSyncReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
dry_run: bool,
projection_version: String,
projection_hash: Option<String>,
required_indexes: Vec<ConvexRequiredIndex>,
node_upserts: Vec<ConvexNodeRow>,
edge_upserts: Vec<ConvexEdgeRow>,
node_tombstones: Vec<String>,
edge_tombstones: Vec<String>,
chunks: Vec<ConvexSyncChunk>,
freshness: ConvexProjectionFreshness,
transport: Option<ConvexTransportSummary>,
receipts: Vec<ConvexTransportReceipt>,
diagnostics: Vec<String>,
warnings: Vec<String>,
}
fn convex_required_indexes() -> Vec<ConvexRequiredIndex> {
vec![
ConvexRequiredIndex {
table: "nodes".to_string(),
name: "by_external_id".to_string(),
fields: vec!["externalId".to_string()],
},
ConvexRequiredIndex {
table: "nodes".to_string(),
name: "by_kind".to_string(),
fields: vec!["kind".to_string()],
},
ConvexRequiredIndex {
table: "edges".to_string(),
name: "by_edge_key".to_string(),
fields: vec!["edgeKey".to_string()],
},
ConvexRequiredIndex {
table: "edges".to_string(),
name: "by_from_kind".to_string(),
fields: vec!["fromExternalId".to_string(), "kind".to_string()],
},
ConvexRequiredIndex {
table: "edges".to_string(),
name: "by_to_kind".to_string(),
fields: vec!["toExternalId".to_string(), "kind".to_string()],
},
]
}
pub(crate) fn load_convex_projection_rows(path: &Path) -> Result<ConvexProjectionRows> {
let content = fs::read_to_string(path)
.with_context(|| format!("reading Convex projection snapshot {}", path.display()))?;
serde_json::from_str(&content)
.with_context(|| format!("parsing Convex projection snapshot {}", path.display()))
}
fn convex_projection_row_diagnostics(rows: &ConvexProjectionRows) -> Vec<String> {
let mut diagnostics = Vec::new();
let mut node_counts = BTreeMap::<&str, usize>::new();
for row in &rows.nodes {
*node_counts.entry(row.external_id.as_str()).or_default() += 1;
}
for (external_id, count) in node_counts.iter().filter(|(_, count)| **count > 1) {
diagnostics.push(format!(
"Convex snapshot contains duplicate node externalId {external_id} ({count} rows)"
));
}
let node_ids = node_counts.keys().copied().collect::<BTreeSet<_>>();
let mut edge_counts = BTreeMap::<&str, usize>::new();
for edge in &rows.edges {
*edge_counts.entry(edge.edge_key.as_str()).or_default() += 1;
if !node_ids.contains(edge.from_external_id.as_str()) {
diagnostics.push(format!(
"Convex snapshot edge {} references missing from node {}",
edge.edge_key, edge.from_external_id
));
}
if !node_ids.contains(edge.to_external_id.as_str()) {
diagnostics.push(format!(
"Convex snapshot edge {} references missing to node {}",
edge.edge_key, edge.to_external_id
));
}
let expected_key =
ConvexEdgeRow::stable_key(&edge.from_external_id, &edge.to_external_id, &edge.kind);
if edge.edge_key != expected_key {
diagnostics.push(format!(
"Convex snapshot edge {} has non-canonical key; expected {} for ({}, {}, {})",
edge.edge_key, expected_key, edge.from_external_id, edge.kind, edge.to_external_id
));
}
}
for (edge_key, count) in edge_counts.iter().filter(|(_, count)| **count > 1) {
diagnostics.push(format!(
"Convex snapshot contains duplicate edgeKey {edge_key} ({count} rows)"
));
}
diagnostics
}
pub(crate) fn validate_convex_projection_rows(rows: &ConvexProjectionRows) -> Result<()> {
let diagnostics = convex_projection_row_diagnostics(rows);
if diagnostics.is_empty() {
Ok(())
} else {
bail!("{}", diagnostics.join("; "))
}
}
pub(crate) struct ConvexHttpTransport {
endpoint: String,
auth_token_env: String,
auth_token: Option<String>,
}
impl ConvexHttpTransport {
fn from_options(endpoint: Option<&str>, auth_token_env: &str) -> Result<Self> {
let endpoint = endpoint
.map(str::to_string)
.or_else(|| env::var(DEFAULT_CONVEX_GRAPH_URL_ENV).ok())
.context("Convex transport requires --endpoint or TSIFT_CONVEX_GRAPH_URL")?;
let auth_token = env::var(auth_token_env)
.ok()
.filter(|value| !value.trim().is_empty());
Ok(Self {
endpoint,
auth_token_env: auth_token_env.to_string(),
auth_token,
})
}
fn summary(&self, remote_snapshot: bool, applied_chunks: usize) -> ConvexTransportSummary {
ConvexTransportSummary {
endpoint_env: DEFAULT_CONVEX_GRAPH_URL_ENV.to_string(),
endpoint_configured: true,
auth_token_env: self.auth_token_env.clone(),
auth_configured: self.auth_token.is_some(),
remote_snapshot,
applied_chunks,
}
}
fn post(&self, request: &ConvexTransportRequest<'_>) -> Result<ConvexTransportResponse> {
let mut builder = ureq::post(&self.endpoint);
if let Some(token) = &self.auth_token {
builder = builder.header("Authorization", &format!("Bearer {token}"));
}
builder
.send_json(request)
.with_context(|| format!("calling Convex graph transport {}", self.endpoint))?
.body_mut()
.read_json::<ConvexTransportResponse>()
.with_context(|| format!("parsing Convex graph transport response {}", self.endpoint))
}
/// Fetch a full snapshot of the Convex graph backend.
///
/// Uses the paginated `snapshot_meta` + `snapshot_nodes_page` +
/// `snapshot_edges_page` triplet so the call works on tables larger than
/// ~5k rows (the single-shot `snapshot` query hits Convex's 15s per-request
/// syscall budget at that scale; see `#convexsnapshotscale`).
///
/// Falls back to the legacy single-shot `snapshot` operation if the
/// backend doesn't recognize `snapshot_meta` (older deployments that
/// haven't redeployed the new schema).
fn fetch_snapshot(
&self,
projection_version: &str,
scope: Option<&str>,
local_hash: Option<&str>,
local_rows: Option<&ConvexProjectionRows>,
) -> Result<(ConvexProjectionRows, Vec<String>)> {
match self.fetch_snapshot_paginated(projection_version, scope, local_hash, local_rows) {
Ok(rows) => Ok(rows),
Err(err) => {
// Only fall through to the legacy path if the failure looks
// like "operation unknown" (older backend). Any other failure
// (HTTP timeout, deserialization mismatch) should surface so
// the operator sees the real cause.
let msg = format!("{err:#}");
let is_unknown_op = msg.contains("unknown operation")
|| msg.contains("snapshot_meta")
|| msg.contains("404");
if !is_unknown_op {
return Err(err);
}
self.fetch_snapshot_legacy(projection_version)
.map(|rows| (rows, Vec::new()))
}
}
}
fn fetch_snapshot_legacy(&self, projection_version: &str) -> Result<ConvexProjectionRows> {
let response = self.post(&ConvexTransportRequest {
operation: "snapshot",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: None,
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: None,
limit: None,
})?;
response
.rows
.context("Convex snapshot response did not include rows")
}
fn fetch_snapshot_paginated(
&self,
projection_version: &str,
scope: Option<&str>,
local_hash: Option<&str>,
local_rows: Option<&ConvexProjectionRows>,
) -> Result<(ConvexProjectionRows, Vec<String>)> {
let projection_meta_id = graph_projection_meta_id(scope);
let meta_response = self.post(&ConvexTransportRequest {
operation: "snapshot_meta",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: Some(&projection_meta_id),
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: None,
limit: None,
})?;
if matches!(meta_response.status.as_deref(), Some("error")) {
anyhow::bail!(
"Convex snapshot_meta returned error: {}",
meta_response.message.unwrap_or_default()
);
}
let meta = meta_response
.meta
.context("Convex snapshot_meta response did not include meta")?;
if let (Some(remote_hash), Some(local_hash), Some(local_rows)) =
(meta.projection_hash.as_deref(), local_hash, local_rows)
&& remote_hash == local_hash
{
return Ok((
local_rows.clone(),
vec![
"remote projection hash matched local graph; skipped full row-page snapshot diff"
.to_string(),
],
));
}
let mut nodes: Vec<ConvexNodeRow> = Vec::with_capacity(meta.node_count.unwrap_or_default());
let mut node_cursor: Option<String> = None;
loop {
let response = self.post(&ConvexTransportRequest {
operation: "snapshot_nodes_page",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: None,
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: node_cursor.clone(),
limit: None,
})?;
let page = response
.page
.context("Convex snapshot_nodes_page response did not include page")?;
for raw in page.rows {
let row: ConvexNodeRow =
serde_json::from_value(raw).context("decoding Convex snapshot node row")?;
nodes.push(row);
}
match page.next_cursor {
Some(next) => node_cursor = Some(next),
None => break,
}
}
let mut edges: Vec<ConvexEdgeRow> = Vec::with_capacity(meta.edge_count.unwrap_or_default());
let mut edge_cursor: Option<String> = None;
loop {
let response = self.post(&ConvexTransportRequest {
operation: "snapshot_edges_page",
chunk: 0,
projection_version,
projection_hash: None,
projection_meta_id: None,
node_rows: Vec::new(),
edge_rows: Vec::new(),
keys: Vec::new(),
cursor: edge_cursor.clone(),
limit: None,
})?;
let page = response
.page
.context("Convex snapshot_edges_page response did not include page")?;
for raw in page.rows {
let row: ConvexEdgeRow =
serde_json::from_value(raw).context("decoding Convex snapshot edge row")?;
edges.push(row);
}
match page.next_cursor {
Some(next) => edge_cursor = Some(next),
None => break,
}
}
Ok((ConvexProjectionRows { nodes, edges }, Vec::new()))
}
fn apply_chunk(
&self,
report: &ConvexSyncReport,
chunk: &ConvexSyncChunk,
) -> Result<ConvexTransportReceipt> {
let node_rows = if chunk.operation == "upsert_nodes" {
report
.node_upserts
.iter()
.filter(|row| chunk.keys.contains(&row.external_id))
.cloned()
.collect()
} else {
Vec::new()
};
let edge_rows = if chunk.operation == "upsert_edges" {
report
.edge_upserts
.iter()
.filter(|row| chunk.keys.contains(&row.edge_key))
.cloned()
.collect()
} else {
Vec::new()
};
let request = ConvexTransportRequest {
operation: &chunk.operation,
chunk: chunk.chunk,
projection_version: &report.projection_version,
projection_hash: report.projection_hash.as_deref(),
projection_meta_id: None,
node_rows,
edge_rows,
keys: chunk.keys.clone(),
cursor: None,
limit: None,
};
let mut last_error = None;
for attempt in 1..=chunk.max_attempts {
match self.post(&request) {
Ok(response) => {
return Ok(ConvexTransportReceipt {
operation: chunk.operation.clone(),
chunk: chunk.chunk,
attempt,
status: response.status.unwrap_or_else(|| "ok".to_string()),
message: response.message,
});
}
Err(err) => {
last_error = Some(err);
if attempt < chunk.max_attempts {
std::thread::sleep(Duration::from_millis(100 * attempt as u64));
}
}
}
}
Err(last_error.unwrap_or_else(|| anyhow::anyhow!("Convex transport chunk failed")))
.with_context(|| format!("applying Convex {} chunk {}", chunk.operation, chunk.chunk))
}
}
fn convex_projection_hash(rows: &ConvexProjectionRows, scope: Option<&str>) -> Option<String> {
let meta_id = graph_projection_meta_id(scope);
rows.nodes
.iter()
.find(|row| row.external_id == meta_id && row.kind == GRAPH_PROJECTION_META_KIND)
.and_then(|row| row.properties.get("content_hash").cloned())
}
fn convex_projection_freshness(
local: &ConvexProjectionRows,
snapshot: Option<&ConvexProjectionRows>,
scope: Option<&str>,
) -> ConvexProjectionFreshness {
let local_hash = convex_projection_hash(local, scope);
let Some(snapshot) = snapshot else {
return ConvexProjectionFreshness {
status: "unchecked".to_string(),
fail_closed: false,
local_hash,
snapshot_hash: None,
missing_nodes: Vec::new(),
stale_nodes: Vec::new(),
missing_edges: Vec::new(),
stale_edges: Vec::new(),
diagnostics: vec![
"no Convex snapshot supplied; sync output is a local dry-run plan".to_string(),
],
};
};
let snapshot_hash = convex_projection_hash(snapshot, scope);
let snapshot_nodes = snapshot
.nodes
.iter()
.map(|row| (row.external_id.as_str(), row))
.collect::<BTreeMap<_, _>>();
let snapshot_edges = snapshot
.edges
.iter()
.map(|row| (row.edge_key.as_str(), row))
.collect::<BTreeMap<_, _>>();
let mut missing_nodes = Vec::new();
let mut stale_nodes = Vec::new();
for row in &local.nodes {
match snapshot_nodes.get(row.external_id.as_str()) {
Some(snapshot_row) if *snapshot_row == row => {}
Some(_) => stale_nodes.push(row.external_id.clone()),
None => missing_nodes.push(row.external_id.clone()),
}
}
let mut missing_edges = Vec::new();
let mut stale_edges = Vec::new();
for row in &local.edges {
match snapshot_edges.get(row.edge_key.as_str()) {
Some(snapshot_row) if *snapshot_row == row => {}
Some(_) => stale_edges.push(row.edge_key.clone()),
None => missing_edges.push(row.edge_key.clone()),
}
}
let hash_current = local_hash.is_some() && local_hash == snapshot_hash;
let rows_current = missing_nodes.is_empty()
&& stale_nodes.is_empty()
&& missing_edges.is_empty()
&& stale_edges.is_empty();
if hash_current && rows_current {
return ConvexProjectionFreshness::current(local_hash, snapshot_hash);
}
let mut diagnostics = Vec::new();
if local_hash != snapshot_hash {
diagnostics.push(format!(
"projection hash mismatch: local={} snapshot={}",
local_hash.as_deref().unwrap_or("missing"),
snapshot_hash.as_deref().unwrap_or("missing")
));
}
if !missing_nodes.is_empty() || !missing_edges.is_empty() {
diagnostics.push(format!(
"Convex snapshot is missing {} node(s) and {} edge(s)",
missing_nodes.len(),
missing_edges.len()
));
}
if !stale_nodes.is_empty() || !stale_edges.is_empty() {
diagnostics.push(format!(
"Convex snapshot has {} stale node row(s) and {} stale edge row(s)",
stale_nodes.len(),
stale_edges.len()
));
}
ConvexProjectionFreshness {
status: "stale".to_string(),
fail_closed: true,
local_hash,
snapshot_hash,
missing_nodes,
stale_nodes,
missing_edges,
stale_edges,
diagnostics,
}
}
pub(crate) fn verify_convex_projection_snapshot(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> Result<()> {
let graph_db = graph_substrate_db_path(root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let local = convex_rows_from_graph_store(&store)?;
let snapshot = load_convex_projection_rows(snapshot_path)?;
validate_convex_projection_rows(&snapshot)?;
let freshness = convex_projection_freshness(&local, Some(&snapshot), scope);
if freshness.fail_closed {
bail!(
"Convex graph projection is not current for {}: {}",
root.display(),
freshness.diagnostics.join("; ")
);
}
Ok(())
}
fn convex_rows_diff(
local: &ConvexProjectionRows,
snapshot: Option<&ConvexProjectionRows>,
) -> (
Vec<ConvexNodeRow>,
Vec<ConvexEdgeRow>,
Vec<String>,
Vec<String>,
) {
let Some(snapshot) = snapshot else {
return (
local.nodes.clone(),
local.edges.clone(),
Vec::new(),
Vec::new(),
);
};
let local_nodes = local
.nodes
.iter()
.map(|row| (row.external_id.as_str(), row))
.collect::<BTreeMap<_, _>>();
let local_edges = local
.edges
.iter()
.map(|row| (row.edge_key.as_str(), row))
.collect::<BTreeMap<_, _>>();
let snapshot_nodes = snapshot
.nodes
.iter()
.map(|row| (row.external_id.as_str(), row))
.collect::<BTreeMap<_, _>>();
let snapshot_edges = snapshot
.edges
.iter()
.map(|row| (row.edge_key.as_str(), row))
.collect::<BTreeMap<_, _>>();
let node_upserts = local
.nodes
.iter()
.filter(|row| {
snapshot_nodes
.get(row.external_id.as_str())
.is_none_or(|snapshot_row| *snapshot_row != *row)
})
.cloned()
.collect::<Vec<_>>();
let edge_upserts = local
.edges
.iter()
.filter(|row| {
snapshot_edges
.get(row.edge_key.as_str())
.is_none_or(|snapshot_row| *snapshot_row != *row)
})
.cloned()
.collect::<Vec<_>>();
let node_tombstones = snapshot
.nodes
.iter()
.filter(|row| !local_nodes.contains_key(row.external_id.as_str()))
.map(|row| row.external_id.clone())
.collect::<Vec<_>>();
let edge_tombstones = snapshot
.edges
.iter()
.filter(|row| !local_edges.contains_key(row.edge_key.as_str()))
.map(|row| row.edge_key.clone())
.collect::<Vec<_>>();
(node_upserts, edge_upserts, node_tombstones, edge_tombstones)
}
fn push_sync_chunks(
chunks: &mut Vec<ConvexSyncChunk>,
operation: &str,
keys: Vec<String>,
size: usize,
) {
if keys.is_empty() {
return;
}
for (idx, chunk) in keys.chunks(size).enumerate() {
chunks.push(ConvexSyncChunk {
operation: operation.to_string(),
chunk: idx + 1,
count: chunk.len(),
keys: chunk.to_vec(),
max_attempts: 3,
retry_policy:
"retry the whole chunk; rows are idempotent by externalId/edgeKey, stop on a repeated partial failure"
.to_string(),
});
}
}
pub(crate) fn build_convex_sync_report_with_snapshot(
path: &Path,
scope: Option<&str>,
snapshot: Option<ConvexProjectionRows>,
chunk_size: usize,
dry_run: bool,
) -> Result<ConvexSyncReport> {
if chunk_size == 0 {
bail!("--chunk-size must be greater than zero");
}
let root = lint::resolve_project_root_or_canonical_path(path)?;
let (graph, _refresh) = write_traversal_graph_store(&root, path, scope)?;
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let local = convex_rows_from_graph_store(&store)?;
let freshness = convex_projection_freshness(&local, snapshot.as_ref(), scope);
let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
convex_rows_diff(&local, snapshot.as_ref());
let mut chunks = Vec::new();
push_sync_chunks(
&mut chunks,
"delete_edges",
edge_tombstones.clone(),
chunk_size,
);
push_sync_chunks(
&mut chunks,
"upsert_nodes",
node_upserts
.iter()
.map(|row| row.external_id.clone())
.collect(),
chunk_size,
);
push_sync_chunks(
&mut chunks,
"upsert_edges",
edge_upserts
.iter()
.map(|row| row.edge_key.clone())
.collect(),
chunk_size,
);
push_sync_chunks(
&mut chunks,
"delete_nodes",
node_tombstones.clone(),
chunk_size,
);
let mut diagnostics = vec![
"apply node upserts before edge upserts; apply edge tombstones before node tombstones"
.to_string(),
];
if dry_run {
diagnostics.push("dry-run only: no Convex network mutation was attempted".to_string());
}
if freshness.fail_closed {
diagnostics.push(
"Convex-backed traverse/context-pack reads must fail closed until this plan is applied"
.to_string(),
);
}
Ok(ConvexSyncReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
dry_run,
projection_version: GRAPH_PROJECTION_VERSION.to_string(),
projection_hash: convex_projection_hash(&local, scope),
required_indexes: convex_required_indexes(),
node_upserts,
edge_upserts,
node_tombstones,
edge_tombstones,
chunks,
freshness,
transport: None,
receipts: Vec::new(),
diagnostics,
warnings: graph.warnings,
})
}
#[cfg(test)]
fn build_convex_sync_report(
path: &Path,
scope: Option<&str>,
snapshot_path: Option<&Path>,
chunk_size: usize,
) -> Result<ConvexSyncReport> {
let snapshot = snapshot_path.map(load_convex_projection_rows).transpose()?;
build_convex_sync_report_with_snapshot(path, scope, snapshot, chunk_size, true)
}
pub(crate) fn print_convex_sync_human(report: &ConvexSyncReport, compact: bool) {
if compact {
println!(
"convex-sync nodes:+{} -{} edges:+{} -{} chunks:{} freshness:{}",
report.node_upserts.len(),
report.node_tombstones.len(),
report.edge_upserts.len(),
report.edge_tombstones.len(),
report.chunks.len(),
report.freshness.status
);
return;
}
println!(
"Convex graph sync {}",
if report.dry_run { "dry-run" } else { "apply" }
);
println!("root: {}", report.root);
println!("graph_db: {}", report.graph_db);
println!(
"upserts: {} node(s), {} edge(s)",
report.node_upserts.len(),
report.edge_upserts.len()
);
println!(
"tombstones: {} node(s), {} edge(s)",
report.node_tombstones.len(),
report.edge_tombstones.len()
);
println!("chunks: {}", report.chunks.len());
println!("freshness: {}", report.freshness.status);
if let Some(transport) = &report.transport {
println!(
"transport: endpoint_env={} auth_env={} applied_chunks={}",
transport.endpoint_env, transport.auth_token_env, transport.applied_chunks
);
}
for receipt in &report.receipts {
println!(
"receipt: {} chunk {} attempt {} {}",
receipt.operation, receipt.chunk, receipt.attempt, receipt.status
);
}
for diagnostic in report
.diagnostics
.iter()
.chain(report.freshness.diagnostics.iter())
{
println!("- {}", diagnostic);
}
}
pub(crate) struct ConvexSyncOptions<'a> {
path: &'a Path,
scope: Option<&'a str>,
snapshot: Option<&'a Path>,
chunk_size: usize,
remote_snapshot: bool,
apply: bool,
endpoint: Option<&'a str>,
auth_token_env: &'a str,
}
#[derive(Serialize)]
struct GraphDbSchemaField {
name: &'static str,
value_type: &'static str,
description: &'static str,
}
#[derive(Serialize)]
struct GraphDbSchemaOperation {
command: &'static str,
description: &'static str,
}
#[derive(Serialize)]
struct GraphDbSchemaContract {
name: &'static str,
version: &'static str,
description: &'static str,
}
#[derive(Serialize)]
struct GraphDbSchema {
contract_versions: Vec<GraphDbSchemaContract>,
node_fields: Vec<GraphDbSchemaField>,
edge_fields: Vec<GraphDbSchemaField>,
operations: Vec<GraphDbSchemaOperation>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbFreshnessReport {
status: String,
fail_closed: bool,
projection_version: Option<String>,
content_hash: Option<String>,
source_watermark: Option<String>,
diagnostics: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct GraphEffectivenessReadiness {
status: String,
fail_closed: bool,
reason: String,
diagnostics: Vec<String>,
next_commands: Vec<String>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct GraphDbPropertyFilter {
key: String,
value: String,
}
#[derive(Clone, Debug, Default)]
struct GraphDbQueryOptions {
cursor: Option<String>,
limit: Option<usize>,
property_filters: Vec<GraphDbPropertyFilter>,
}
#[derive(Clone, Debug, Serialize, PartialEq)]
struct GraphDbPageReport {
#[serde(skip_serializing_if = "Option::is_none")]
cursor: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
limit: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
next_cursor: Option<String>,
returned_nodes: usize,
returned_edges: usize,
truncated: bool,
property_filters: Vec<GraphDbPropertyFilter>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
diagnostics: Vec<String>,
}
type GraphDbRankedNeighbor = resolution::RankedNeighbor;
#[derive(Clone, Debug, Serialize, PartialEq)]
struct GraphDbKnowledgeRetrieval {
mode: String,
query: String,
seed_kind: String,
seed_limit: usize,
seed_count: usize,
depth: usize,
limit: usize,
node_count: usize,
edge_count: usize,
truncated: bool,
traversal: String,
freshness_boundary: String,
privacy_boundary: String,
diagnostics: Vec<String>,
}
struct GraphDbSemanticSeededSubgraph {
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
truncated: bool,
diagnostics: Vec<String>,
}
type GraphDbNeighborhoodRankingGate = resolution::NeighborhoodRankingGate;
#[derive(Serialize)]
struct GraphDbReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
backend: String,
query: String,
freshness: GraphDbFreshnessReport,
#[serde(skip_serializing_if = "Option::is_none")]
schema: Option<GraphDbSchema>,
#[serde(skip_serializing_if = "Option::is_none")]
node: Option<SubstrateGraphNode>,
#[serde(skip_serializing_if = "Option::is_none")]
edge: Option<SubstrateGraphEdge>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
nodes: Vec<SubstrateGraphNode>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
edges: Vec<SubstrateGraphEdge>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ranked_neighbors: Vec<GraphDbRankedNeighbor>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
semantic_related: Vec<SemanticRelatedItem>,
#[serde(skip_serializing_if = "Option::is_none")]
neighborhood_ranking_gate: Option<GraphDbNeighborhoodRankingGate>,
#[serde(skip_serializing_if = "Option::is_none")]
knowledge_retrieval: Option<GraphDbKnowledgeRetrieval>,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<substrate::GraphPath>,
#[serde(skip_serializing_if = "Option::is_none")]
page: Option<GraphDbPageReport>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
struct ExperimentalReadOnlyGraphStore {
backend: GraphDbExperimentalBackend,
nodes: BTreeMap<String, SubstrateGraphNode>,
edges: BTreeMap<String, SubstrateGraphEdge>,
node_ids_by_kind: BTreeMap<String, Vec<String>>,
outgoing_edge_keys_by_from: BTreeMap<String, Vec<String>>,
}
impl ExperimentalReadOnlyGraphStore {
fn from_rows(backend: GraphDbExperimentalBackend, rows: &ConvexProjectionRows) -> Result<Self> {
validate_convex_projection_rows(rows)?;
let nodes = rows
.nodes
.iter()
.map(|row| {
let node = SubstrateGraphNode {
id: row.external_id.clone(),
kind: row.kind.clone(),
label: row.label.clone(),
properties: row.properties.clone(),
provenance: row.provenance.clone(),
freshness: row.freshness.clone(),
};
(node.id.clone(), node)
})
.collect::<BTreeMap<_, _>>();
let edges = rows
.edges
.iter()
.map(|row| {
let edge = SubstrateGraphEdge {
id: row.edge_key.clone(),
from_id: row.from_external_id.clone(),
to_id: row.to_external_id.clone(),
kind: row.kind.clone(),
properties: row.properties.clone(),
provenance: row.provenance.clone(),
freshness: row.freshness.clone(),
};
(graph_db_edge_key(&edge), edge)
})
.collect::<BTreeMap<_, _>>();
let mut node_ids_by_kind = BTreeMap::<String, Vec<String>>::new();
for node in nodes.values() {
node_ids_by_kind
.entry(node.kind.clone())
.or_default()
.push(node.id.clone());
}
for ids in node_ids_by_kind.values_mut() {
ids.sort();
}
let mut outgoing_edge_keys_by_from = BTreeMap::<String, Vec<String>>::new();
for edge in edges.values() {
outgoing_edge_keys_by_from
.entry(edge.from_id.clone())
.or_default()
.push(graph_db_edge_key(edge));
}
for edge_keys in outgoing_edge_keys_by_from.values_mut() {
edge_keys.sort_by(|left_key, right_key| {
let left = &edges[left_key];
let right = &edges[right_key];
left.to_id
.cmp(&right.to_id)
.then(left.kind.cmp(&right.kind))
.then(left_key.cmp(right_key))
});
}
Ok(Self {
backend,
nodes,
edges,
node_ids_by_kind,
outgoing_edge_keys_by_from,
})
}
}
impl GraphStore for ExperimentalReadOnlyGraphStore {
fn upsert_node(&self, _node: &SubstrateGraphNode) -> Result<()> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn upsert_edge(&self, _edge: &SubstrateGraphEdge) -> Result<()> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn delete_node(&self, _id: &str) -> Result<usize> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn delete_edge(&self, _from_id: &str, _to_id: &str, _kind: &str) -> Result<usize> {
bail!("{} backend-eval adapter is read-only", self.backend.name())
}
fn node(&self, id: &str) -> Result<Option<SubstrateGraphNode>> {
Ok(self.nodes.get(id).cloned())
}
fn all_nodes(&self) -> Result<Vec<SubstrateGraphNode>> {
Ok(self.nodes.values().cloned().collect())
}
fn all_edges(&self) -> Result<Vec<SubstrateGraphEdge>> {
let mut edges = self.edges.values().cloned().collect::<Vec<_>>();
edges.sort_by(|left, right| {
left.from_id
.cmp(&right.from_id)
.then(left.kind.cmp(&right.kind))
.then(left.to_id.cmp(&right.to_id))
});
Ok(edges)
}
fn graph_counts(&self) -> Result<(usize, usize)> {
Ok((self.nodes.len(), self.edges.len()))
}
fn sample_edge(&self, kind: Option<&str>) -> Result<Option<SubstrateGraphEdge>> {
let mut edges = self
.edges
.values()
.filter(|edge| edge.from_id != edge.to_id)
.filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
.cloned()
.collect::<Vec<_>>();
edges.sort_by(|left, right| {
left.from_id
.cmp(&right.from_id)
.then(left.kind.cmp(&right.kind))
.then(left.to_id.cmp(&right.to_id))
});
Ok(edges.into_iter().next())
}
fn sample_edge_with_property(
&self,
) -> Result<Option<(SubstrateGraphEdge, GraphPropertyFilter)>> {
Ok(self
.edges
.values()
.filter(|edge| edge.from_id != edge.to_id)
.filter_map(|edge| {
edge.properties.iter().next().map(|(key, value)| {
(
edge,
GraphPropertyFilter {
key: key.clone(),
value: value.clone(),
},
)
})
})
.min_by(|(left_edge, left_filter), (right_edge, right_filter)| {
left_filter
.key
.cmp(&right_filter.key)
.then(left_filter.value.cmp(&right_filter.value))
.then_with(|| graph_db_edge_key(left_edge).cmp(&graph_db_edge_key(right_edge)))
})
.map(|(edge, filter)| (edge.clone(), filter)))
}
fn nodes_by_kind(&self, kind: &str) -> Result<Vec<SubstrateGraphNode>> {
Ok(self
.node_ids_by_kind
.get(kind)
.into_iter()
.flatten()
.filter_map(|id| self.nodes.get(id).cloned())
.collect())
}
fn outgoing_edges(&self, from_id: &str, kind: Option<&str>) -> Result<Vec<SubstrateGraphEdge>> {
Ok(self
.outgoing_edge_keys_by_from
.get(from_id)
.into_iter()
.flatten()
.filter_map(|key| self.edges.get(key))
.filter(|edge| kind.is_none_or(|kind| edge.kind == kind))
.cloned()
.collect())
}
fn edges_between_nodes(&self, node_ids: &BTreeSet<String>) -> Result<Vec<SubstrateGraphEdge>> {
Ok(self
.edges
.values()
.filter(|edge| node_ids.contains(&edge.from_id) && node_ids.contains(&edge.to_id))
.cloned()
.collect())
}
fn shortest_path(
&self,
from_id: &str,
to_id: &str,
kind: Option<&str>,
) -> Result<Option<substrate::GraphPath>> {
if from_id == to_id {
return Ok(Some(substrate::GraphPath {
nodes: vec![from_id.to_string()],
hops: 0,
}));
}
let mut queue = VecDeque::new();
let mut parent = BTreeMap::<String, String>::new();
parent.insert(from_id.to_string(), String::new());
queue.push_back(from_id.to_string());
while let Some(current) = queue.pop_front() {
for edge in self.outgoing_edges(¤t, kind)? {
if parent.contains_key(&edge.to_id) {
continue;
}
parent.insert(edge.to_id.clone(), current.clone());
if edge.to_id == to_id {
let mut nodes = vec![to_id.to_string()];
let mut cursor = to_id;
while let Some(previous) = parent.get(cursor) {
if previous.is_empty() {
break;
}
nodes.push(previous.clone());
cursor = previous;
}
nodes.reverse();
return Ok(Some(substrate::GraphPath {
hops: nodes.len().saturating_sub(1),
nodes,
}));
}
queue.push_back(edge.to_id);
}
}
Ok(None)
}
fn reachable_nodes_by_kinds(
&self,
from_id: &str,
kinds: &[&str],
depth: usize,
limit: usize,
) -> Result<BTreeMap<String, Vec<(SubstrateGraphNode, substrate::GraphPath)>>> {
let requested = kinds.iter().copied().collect::<BTreeSet<_>>();
let mut rows = requested
.iter()
.map(|kind| {
(
(*kind).to_string(),
BTreeMap::<String, (SubstrateGraphNode, substrate::GraphPath)>::new(),
)
})
.collect::<BTreeMap<_, _>>();
if requested.is_empty() {
return Ok(BTreeMap::new());
}
let mut seen = BTreeSet::from([from_id.to_string()]);
let mut queue = VecDeque::from([(from_id.to_string(), vec![from_id.to_string()])]);
while let Some((current, path)) = queue.pop_front() {
let current_depth = path.len().saturating_sub(1);
if current_depth >= depth {
continue;
}
for edge in self.outgoing_edges(¤t, None)? {
if !seen.insert(edge.to_id.clone()) {
continue;
}
let Some(node) = self.nodes.get(&edge.to_id).cloned() else {
continue;
};
let mut next_path = path.clone();
next_path.push(edge.to_id.clone());
let graph_path = substrate::GraphPath {
hops: next_path.len().saturating_sub(1),
nodes: next_path.clone(),
};
if requested.contains(node.kind.as_str()) {
rows.entry(node.kind.clone())
.or_default()
.entry(node.id.clone())
.or_insert((node.clone(), graph_path));
}
queue.push_back((edge.to_id, next_path));
}
}
Ok(rows
.into_iter()
.map(|(kind, values)| {
let mut values = values.into_values().collect::<Vec<_>>();
values.sort_by(|(left_node, left_path), (right_node, right_path)| {
left_path
.hops
.cmp(&right_path.hops)
.then(left_node.label.cmp(&right_node.label))
.then(left_node.id.cmp(&right_node.id))
});
if limit > 0 && values.len() > limit {
values.truncate(limit);
}
(kind, values)
})
.collect())
}
}
pub(crate) const GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS: usize = 64;
pub(crate) const GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS: [usize; 3] = [128, 256, 512];
pub(crate) const GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS: usize = 1;
const GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT: f64 = 10.0;
pub(crate) const GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT: f64 = 1000.0;
const GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS: usize = 3;
const CONFLICT_MATRIX_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-prep-v1";
const CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION: &str = "conflict-matrix-graph-prep-v1";
const GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION: &str = "backend-eval-full-projection-v5";
#[derive(Clone, Serialize, Deserialize)]
pub(crate) struct GraphDbBackendEvalPhaseTiming {
name: String,
duration_micros: u128,
detail: String,
}
#[derive(Serialize, Deserialize)]
struct GraphDbBackendEvalFullProjectionCache {
version: String,
key: String,
source_watermark: String,
projection: GraphProjection,
warnings: Vec<String>,
}
#[derive(Clone, Default)]
struct GraphDbBackendEvalFullProjectionCacheStats {
hit: bool,
disk_bytes: u64,
json_bytes: u64,
pruned_files: usize,
pruned_bytes: u64,
}
#[derive(Serialize)]
struct GraphDbBackendEvalRawSourceWatermarkRow {
path: String,
bytes: u64,
content_hash: String,
}
#[derive(Clone)]
struct GraphDbBackendEvalFullProjectionSourceWatermark {
value: String,
detail: String,
}
#[derive(Serialize)]
pub(crate) struct GraphDbBackendEvalConfig {
high_degree_nodes: usize,
high_degree_fanout: usize,
deep_chain_nodes: usize,
deep_chain_fanout: usize,
depth: usize,
limit: usize,
impact_limit: usize,
path_max_hops: usize,
path_direct_hop_budget: usize,
path_deep_chain_hop_budget: usize,
path_extended_hop_budgets: Vec<usize>,
path_hop_policy: String,
path_probe_strategy: String,
path_query_plan_checks: Vec<String>,
full_projection_enabled: bool,
full_projection_profile: String,
normalization_row_unit: usize,
}
#[derive(Clone)]
struct GraphDbBackendEvalSignature {
operation: String,
value: serde_json::Value,
}
#[derive(Serialize)]
struct GraphDbBackendEvalOperation {
name: String,
supported: bool,
status: String,
duration_micros: u128,
#[serde(skip_serializing_if = "Option::is_none")]
rows: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
}
#[derive(Serialize)]
struct GraphDbBackendEvalParity {
matches_sqlite: bool,
diagnostics: Vec<String>,
}
#[derive(Serialize)]
struct GraphDbBackendEvalBackendReport {
backend: String,
adapter: String,
read_only: bool,
projection_load: String,
operations: Vec<GraphDbBackendEvalOperation>,
total_micros: u128,
parity: GraphDbBackendEvalParity,
lock_behavior: String,
install_portability: String,
}
#[derive(Serialize)]
struct GraphDbBackendEvalDataset {
name: String,
target_count: usize,
nodes: usize,
edges: usize,
backends: Vec<GraphDbBackendEvalBackendReport>,
}
#[derive(Serialize)]
struct GraphDbBackendPromotionDecision {
backend: String,
decision: String,
reasons: Vec<String>,
gate: GraphDbBackendPromotionGate,
}
#[derive(Serialize)]
struct GraphDbBackendEvalPerformanceGate {
baseline_fixture: String,
ci_profile: String,
opt_in_real_profile: String,
full_projection_cache_hit_gate: String,
allowed_regression_percent: f64,
minimum_sample_runs: usize,
normalized_metric_unit: String,
required_metrics: Vec<String>,
digest_command: String,
repeated_sample_command: String,
hop_cap_promotion: GraphDbHopCapPromotionGate,
backend_adapter_spike: GraphDbBackendAdapterSpikeGate,
}
#[derive(Serialize)]
struct GraphDbHopCapPromotionGate {
status: String,
current_default_hops: usize,
candidate_hop_tiers: Vec<usize>,
required_backend: String,
required_workloads: Vec<String>,
required_metrics: Vec<String>,
allowed_regression_percent: f64,
minimum_sample_runs: usize,
decision_rule: String,
}
#[derive(Serialize)]
struct GraphDbBackendAdapterSpikeGate {
status: String,
candidate_backends: Vec<GraphDbBackendAdapterSpikeCandidate>,
required_workloads: Vec<String>,
required_checks: Vec<String>,
decision_rule: String,
evidence_plan: String,
}
#[derive(Serialize)]
struct GraphDbBackendAdapterSpikeCandidate {
backend: String,
adapter_label: String,
projection_load: String,
lock_behavior: String,
install_portability: String,
}
#[derive(Serialize)]
pub(crate) struct GraphDbBackendEvalReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
label: String,
baseline_backend: String,
candidates: Vec<String>,
targets: Vec<String>,
config: GraphDbBackendEvalConfig,
phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
datasets: Vec<GraphDbBackendEvalDataset>,
promotion: Vec<GraphDbBackendPromotionDecision>,
performance_gate: GraphDbBackendEvalPerformanceGate,
metrics: BTreeMap<String, f64>,
metric_digest_command: String,
warnings: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct GraphDbDoctorCheck {
name: String,
status: String,
fail_closed: bool,
diagnostics: Vec<String>,
repair_commands: Vec<String>,
}
#[derive(Serialize)]
pub(crate) struct GraphDbDoctorReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
backend: String,
graph_db: String,
#[serde(skip_serializing_if = "Option::is_none")]
convex_snapshot: Option<String>,
status: String,
fail_closed: bool,
checks: Vec<GraphDbDoctorCheck>,
repair_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
required_indexes: Vec<ConvexRequiredIndex>,
}
#[derive(Serialize)]
struct GraphDbDriftSummary {
node_upserts: usize,
edge_upserts: usize,
node_tombstones: usize,
edge_tombstones: usize,
stale_nodes: usize,
stale_edges: usize,
stale_projection_metadata: usize,
duplicate_failures: usize,
orphan_failures: usize,
missing_required_indexes: usize,
}
#[derive(Serialize)]
struct GraphDbDriftReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
convex_snapshot: String,
status: String,
graph_reads_allowed: bool,
projection_version: String,
local_hash: Option<String>,
snapshot_hash: Option<String>,
summary: GraphDbDriftSummary,
node_upserts: Vec<String>,
edge_upserts: Vec<String>,
node_tombstones: Vec<String>,
edge_tombstones: Vec<String>,
stale_nodes: Vec<String>,
stale_edges: Vec<String>,
diagnostics: Vec<String>,
next_commands: Vec<String>,
required_indexes: Vec<ConvexRequiredIndex>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct GraphDbTombstoneCounts {
nodes: usize,
edges: usize,
total: usize,
}
#[derive(Clone, Serialize)]
struct GraphDbOperatorCounts {
nodes: usize,
edges: usize,
tombstones: GraphDbTombstoneCounts,
#[serde(skip_serializing_if = "Option::is_none")]
file_size_bytes: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
freelist_bytes: Option<u64>,
}
#[derive(Clone, Serialize)]
struct GraphDbCompactionPolicy {
status: String,
tombstone_scan_rows: usize,
live_rows: usize,
file_size_bytes: Option<u64>,
freelist_bytes: Option<u64>,
safe_to_prune_tombstones: bool,
requires_convex_reconciliation: bool,
recommendations: Vec<String>,
proof: Vec<String>,
}
#[derive(Serialize)]
pub(crate) struct GraphDbRefreshSummary {
scope: String,
projection_version: String,
mode: String,
#[serde(skip_serializing_if = "Option::is_none")]
source_watermark: Option<String>,
tombstoned_nodes: usize,
tombstoned_edges: usize,
upserted_nodes: usize,
upserted_edges: usize,
unchanged_nodes: usize,
unchanged_edges: usize,
upserted_properties: usize,
unchanged_properties: usize,
deleted_properties: usize,
deleted_nodes: usize,
deleted_edges: usize,
pruned_tombstones: usize,
#[serde(skip_serializing_if = "Option::is_none")]
file_size_bytes_before: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
file_size_bytes_after: Option<u64>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
phase_timings: Vec<GraphDbBackendEvalPhaseTiming>,
}
#[derive(Serialize)]
struct GraphDbOperatorReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
operation: String,
status: String,
materialized: bool,
freshness: GraphDbFreshnessReport,
readiness: GraphEffectivenessReadiness,
counts: GraphDbOperatorCounts,
#[serde(skip_serializing_if = "Option::is_none")]
refresh: Option<GraphDbRefreshSummary>,
compaction: GraphDbCompactionPolicy,
#[serde(skip_serializing_if = "Option::is_none")]
recovery: Option<index::ReadOnlyRecovery>,
next_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Serialize)]
pub(crate) struct GraphDbCompactionReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
graph_db: String,
applied: bool,
pruned_tombstones: usize,
counts_before: GraphDbOperatorCounts,
counts_after: GraphDbOperatorCounts,
compaction_before: GraphDbCompactionPolicy,
compaction_after: GraphDbCompactionPolicy,
reclaimed_bytes: i64,
next_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbEvidencePath {
to: String,
kind: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<substrate::GraphPath>,
#[serde(skip_serializing_if = "Option::is_none")]
expand: Option<String>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbFixtureCoverage {
test: String,
fixture: String,
assertions: Vec<String>,
}
#[derive(Clone, Serialize, Deserialize)]
struct GraphDbEvidenceReport {
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
backend: String,
contract_version: String,
target: String,
packet_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
projection_hash: Option<String>,
freshness: GraphDbFreshnessReport,
target_node: SubstrateGraphNode,
worker_context: Vec<SubstrateGraphNode>,
source_handles: Vec<SubstrateGraphNode>,
worker_results: Vec<SubstrateGraphNode>,
semantic_related: Vec<SubstrateGraphNode>,
shortest_paths: Vec<GraphDbEvidencePath>,
next_commands: Vec<String>,
replay_commands: Vec<String>,
repair_commands: Vec<String>,
fixture_coverage: GraphDbFixtureCoverage,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
pub(crate) struct GraphDbEvidenceInput<'a, S: GraphStore> {
root: &'a Path,
scope: Option<&'a str>,
backend: &'a str,
target: &'a str,
depth: usize,
limit: usize,
store: &'a S,
freshness: GraphDbFreshnessReport,
warnings: Vec<String>,
}
impl GraphDbDoctorReport {
fn new(
root: &Path,
scope: Option<&str>,
backend: &str,
graph_db: &Path,
convex_snapshot: Option<&Path>,
) -> Self {
Self {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
backend: backend.to_string(),
graph_db: graph_db.to_string_lossy().to_string(),
convex_snapshot: convex_snapshot.map(|path| path.to_string_lossy().to_string()),
status: "ok".to_string(),
fail_closed: false,
checks: Vec::new(),
repair_commands: Vec::new(),
required_indexes: Vec::new(),
}
}
fn push_check(&mut self, check: GraphDbDoctorCheck) {
self.checks.push(check);
}
fn finalize(&mut self) {
self.fail_closed = self.checks.iter().any(|check| check.fail_closed);
self.status = if self.fail_closed {
"fail_closed"
} else {
"ok"
}
.to_string();
let mut commands = BTreeSet::new();
for check in &self.checks {
commands.extend(check.repair_commands.iter().cloned());
}
self.repair_commands = commands.into_iter().collect();
}
fn summary(&self) -> String {
self.checks
.iter()
.filter(|check| check.fail_closed)
.flat_map(|check| check.diagnostics.iter())
.take(3)
.cloned()
.collect::<Vec<_>>()
.join("; ")
}
}
fn graph_db_doctor_check(
name: impl Into<String>,
diagnostics: Vec<String>,
repair_commands: Vec<String>,
) -> GraphDbDoctorCheck {
let fail_closed = !diagnostics.is_empty();
GraphDbDoctorCheck {
name: name.into(),
status: if fail_closed { "fail_closed" } else { "ok" }.to_string(),
fail_closed,
diagnostics,
repair_commands: if fail_closed {
repair_commands
} else {
Vec::new()
},
}
}
pub(crate) fn graph_db_scope_arg(scope: Option<&str>) -> String {
scope
.map(|scope| format!(" --scope {}", shell_quote(scope)))
.unwrap_or_default()
}
fn graph_db_refresh_command(root: &Path, scope: Option<&str>) -> String {
format!(
"tsift graph-db --path {}{} refresh --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
)
}
fn graph_db_rebuild_command(root: &Path, scope: Option<&str>) -> String {
graph_db_refresh_command(root, scope)
}
fn graph_db_backup_rebuild_command(root: &Path, scope: Option<&str>, graph_db: &Path) -> String {
let backup = format!("{}.bak", graph_db.to_string_lossy());
format!(
"mv {} {} && {}",
shell_quote(graph_db.to_string_lossy().as_ref()),
shell_quote(&backup),
graph_db_rebuild_command(root, scope)
)
}
fn convex_refresh_command(root: &Path, scope: Option<&str>) -> String {
format!(
"tsift convex-sync {}{} --remote-snapshot --apply --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
)
}
fn open_sqlite_graph_db_readonly(graph_db: &Path) -> Result<substrate::SqliteReadOnlyConnection> {
substrate::open_graph_read_only_connection_resilient(graph_db)
}
fn sqlite_table_exists(conn: &Connection, table: &str) -> Result<bool> {
conn.query_row(
"SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
[table],
|row| row.get::<_, bool>(0),
)
.map_err(Into::into)
}
fn sqlite_known_table_count(conn: &Connection, table: &str) -> Result<usize> {
let sql = match table {
"graph_nodes" => "SELECT COUNT(*) FROM graph_nodes",
"graph_edges" => "SELECT COUNT(*) FROM graph_edges",
"graph_tombstones" => "SELECT COUNT(*) FROM graph_tombstones",
other => bail!("unsupported graph count table {other}"),
};
conn.query_row(sql, [], |row| row.get::<_, usize>(0))
.map_err(Into::into)
}
fn sqlite_tombstone_counts(conn: &Connection) -> Result<GraphDbTombstoneCounts> {
if !sqlite_table_exists(conn, "graph_tombstones")? {
return Ok(GraphDbTombstoneCounts {
nodes: 0,
edges: 0,
total: 0,
});
}
let mut stmt =
conn.prepare("SELECT row_kind, COUNT(*) FROM graph_tombstones GROUP BY row_kind")?;
let mut rows = stmt.query([])?;
let mut nodes = 0usize;
let mut edges = 0usize;
while let Some(row) = rows.next()? {
let row_kind: String = row.get(0)?;
let count: usize = row.get(1)?;
match row_kind.as_str() {
"node" => nodes = count,
"edge" => edges = count,
_ => {}
}
}
Ok(GraphDbTombstoneCounts {
nodes,
edges,
total: nodes + edges,
})
}
fn sqlite_graph_counts_from_cache(
conn: &Connection,
scope: &str,
) -> Result<Option<GraphDbOperatorCounts>> {
if !sqlite_table_exists(conn, "graph_operator_stats")? {
return Ok(None);
}
let row = conn
.query_row(
r#"
SELECT nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes
FROM graph_operator_stats
WHERE scope = ?1
"#,
[scope],
|row| {
Ok((
row.get::<_, usize>(0)?,
row.get::<_, usize>(1)?,
row.get::<_, usize>(2)?,
row.get::<_, usize>(3)?,
row.get::<_, Option<i64>>(4)?,
row.get::<_, Option<i64>>(5)?,
))
},
)
.optional()?;
Ok(row.map(
|(nodes, edges, tombstone_nodes, tombstone_edges, file_size_bytes, freelist_bytes)| {
GraphDbOperatorCounts {
nodes,
edges,
tombstones: GraphDbTombstoneCounts {
nodes: tombstone_nodes,
edges: tombstone_edges,
total: tombstone_nodes + tombstone_edges,
},
file_size_bytes: file_size_bytes
.and_then(|value| u64::try_from(value).ok())
.or_else(|| sqlite_database_size_bytes(conn).ok()),
freelist_bytes: freelist_bytes
.and_then(|value| u64::try_from(value).ok())
.or_else(|| sqlite_database_freelist_bytes(conn).ok()),
}
},
))
}
fn sqlite_graph_counts(conn: &Connection, scope: &str) -> Result<GraphDbOperatorCounts> {
if let Some(counts) = sqlite_graph_counts_from_cache(conn, scope)? {
return Ok(counts);
}
let nodes = if sqlite_table_exists(conn, "graph_nodes")? {
sqlite_known_table_count(conn, "graph_nodes")?
} else {
0
};
let edges = if sqlite_table_exists(conn, "graph_edges")? {
sqlite_known_table_count(conn, "graph_edges")?
} else {
0
};
Ok(GraphDbOperatorCounts {
nodes,
edges,
tombstones: sqlite_tombstone_counts(conn)?,
file_size_bytes: sqlite_database_size_bytes(conn).ok(),
freelist_bytes: sqlite_database_freelist_bytes(conn).ok(),
})
}
pub(crate) fn graph_db_compaction_policy(
root: &Path,
scope: Option<&str>,
counts: &GraphDbOperatorCounts,
prune_confirmed: bool,
) -> GraphDbCompactionPolicy {
let live_rows = counts.nodes + counts.edges;
let tombstone_scan_rows = counts.tombstones.total;
let tombstone_heavy = tombstone_scan_rows > live_rows.max(1);
let freelist_heavy = counts
.file_size_bytes
.zip(counts.freelist_bytes)
.is_some_and(|(file_size, freelist)| freelist > 0 && freelist >= file_size / 20);
let status = if tombstone_heavy || freelist_heavy {
"recommended"
} else {
"not_needed"
}
.to_string();
let mut recommendations = vec![
convex_refresh_command(root, scope),
graph_db_refresh_command(root, scope),
format!(
"tsift graph-db --path {}{} compact --apply --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
),
];
if prune_confirmed {
recommendations.push(format!(
"tsift graph-db --path {}{} compact --apply --prune-tombstones --confirmed-convex-reconciled --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
}
let proof = vec![
format!("{live_rows} live graph row(s)"),
format!("{tombstone_scan_rows} retained tombstone row(s) scanned by status/doctor"),
format!(
"graph.db file_size={} byte(s), freelist={} byte(s)",
counts.file_size_bytes.unwrap_or(0),
counts.freelist_bytes.unwrap_or(0)
),
];
GraphDbCompactionPolicy {
status,
tombstone_scan_rows,
live_rows,
file_size_bytes: counts.file_size_bytes,
freelist_bytes: counts.freelist_bytes,
safe_to_prune_tombstones: prune_confirmed,
requires_convex_reconciliation: tombstone_scan_rows > 0 && !prune_confirmed,
recommendations,
proof,
}
}
fn sqlite_database_size_bytes(conn: &Connection) -> Result<u64> {
let page_count: u64 = conn.query_row("PRAGMA page_count", [], |row| row.get(0))?;
let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
Ok(page_count.saturating_mul(page_size))
}
fn sqlite_database_freelist_bytes(conn: &Connection) -> Result<u64> {
let freelist_count: u64 = conn.query_row("PRAGMA freelist_count", [], |row| row.get(0))?;
let page_size: u64 = conn.query_row("PRAGMA page_size", [], |row| row.get(0))?;
Ok(freelist_count.saturating_mul(page_size))
}
fn sqlite_graph_tombstone_retention_diagnostics(
conn: &Connection,
scope: &str,
) -> Result<Vec<String>> {
if !sqlite_table_exists(conn, "graph_tombstones")? {
return Ok(Vec::new());
}
let cached = sqlite_graph_counts_from_cache(conn, scope)?;
let counts = match cached.clone() {
Some(counts) => counts,
None => sqlite_graph_counts(conn, scope)?,
};
let live_rows = counts.nodes + counts.edges;
let file_size = counts.file_size_bytes.unwrap_or(0);
let freelist = counts.freelist_bytes.unwrap_or(0);
let stale_live_tombstones = if cached.is_some() {
0
} else {
let mut live_keys = BTreeSet::new();
if sqlite_table_exists(conn, "graph_nodes")? {
let mut stmt = conn.prepare("SELECT id FROM graph_nodes")?;
for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
live_keys.insert(format!("node:{}", row?));
}
}
if sqlite_table_exists(conn, "graph_edges")? {
let mut stmt = conn.prepare("SELECT edge_key FROM graph_edges")?;
for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
live_keys.insert(format!("edge:{}", row?));
}
}
let mut stale_live_tombstones = 0usize;
let mut stmt = conn.prepare("SELECT row_key FROM graph_tombstones ORDER BY row_key")?;
for row in stmt.query_map([], |row| row.get::<_, String>(0))? {
if live_keys.contains(&row?) {
stale_live_tombstones += 1;
}
}
stale_live_tombstones
};
let mut diagnostics = Vec::new();
if stale_live_tombstones > 0 {
diagnostics.push(format!(
"{stale_live_tombstones} tombstone(s) reference rows that are live again; the next graph-db refresh prunes those stale tombstones before inserting new deletion markers"
));
}
if counts.tombstones.total > live_rows.max(1) {
let source = if cached.is_some() {
"cached refresh stats"
} else {
"live row scan"
};
diagnostics.push(format!(
"tombstone retention exceeds live graph rows: {} tombstone(s) vs {} live row(s) from {}; graph.db file_size={} byte(s), freelist={} byte(s), status/doctor tombstone scans inspect {} extra row(s). Run convex-sync against the remote snapshot before rebuild/compaction if a remote consumer may still need deletion reconciliation.",
counts.tombstones.total,
live_rows,
source,
file_size,
freelist,
counts.tombstones.total
));
}
Ok(diagnostics)
}
fn sqlite_graph_freshness_from_conn(
conn: &Connection,
scope: &str,
) -> Result<GraphDbFreshnessReport> {
if !sqlite_table_exists(conn, "graph_projection_versions")? {
return Ok(GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph projection metadata table is missing; refresh graph.db before trusting reads"
.to_string(),
],
});
}
let version = conn
.query_row(
r#"
SELECT projection_version, content_hash, source_watermark
FROM graph_projection_versions
WHERE scope = ?1
"#,
[scope],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
)
.optional()?;
let Some((projection_version, content_hash, source_watermark)) = version else {
return Ok(GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph projection metadata is missing; refresh graph.db before trusting reads"
.to_string(),
],
});
};
let mut diagnostics = Vec::new();
if projection_version != GRAPH_PROJECTION_VERSION {
diagnostics.push(format!(
"projection version mismatch: expected {} got {}",
GRAPH_PROJECTION_VERSION, projection_version
));
}
if content_hash.is_none() {
diagnostics.push("projection content hash is missing".to_string());
}
let fail_closed = !diagnostics.is_empty();
Ok(GraphDbFreshnessReport {
status: if fail_closed { "stale" } else { "current" }.to_string(),
fail_closed,
projection_version: Some(projection_version),
content_hash,
source_watermark,
diagnostics,
})
}
fn graph_db_operator_next_commands(
root: &Path,
scope: Option<&str>,
include_refresh: bool,
) -> Vec<String> {
let mut commands = Vec::new();
if include_refresh {
commands.push(graph_db_refresh_command(root, scope));
}
commands.push(format!(
"tsift graph-db --path {}{} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.push(format!(
"tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot <rows.json> drift --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.push(format!(
"tsift convex-sync {}{} --remote-snapshot --apply --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands
}
pub(crate) fn graph_db_read_recovery_diagnostic(recovery: index::ReadOnlyRecovery) -> String {
match recovery {
index::ReadOnlyRecovery::SnapshotFallback => {
"graph.db read recovered through snapshot fallback after a rollback-journal lock on the live database".to_string()
}
index::ReadOnlyRecovery::SnapshotFallbackWal => {
"graph.db read recovered through WAL-aware snapshot fallback after copying live -wal/-shm sidecars".to_string()
}
}
}
fn sqlite_string_set(conn: &Connection, sql: &str) -> Result<BTreeSet<String>> {
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut values = BTreeSet::new();
for row in rows {
values.insert(row?);
}
Ok(values)
}
fn sqlite_column_names(conn: &Connection, table: &str) -> Result<BTreeSet<String>> {
let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
let mut columns = BTreeSet::new();
for row in rows {
columns.insert(row?);
}
Ok(columns)
}
fn sqlite_graph_schema_diagnostics(conn: &Connection) -> Result<Vec<String>> {
let mut diagnostics = Vec::new();
let user_version: i64 =
conn.pragma_query_value(None, "user_version", |row| row.get::<_, i64>(0))?;
if user_version > SQLITE_GRAPH_SCHEMA_VERSION {
diagnostics.push(format!(
"graph.db schema version {user_version} is newer than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
));
} else if user_version < SQLITE_GRAPH_SCHEMA_VERSION {
diagnostics.push(format!(
"graph.db schema version {user_version} is older than supported version {SQLITE_GRAPH_SCHEMA_VERSION}"
));
}
let tables = sqlite_string_set(
conn,
"SELECT name FROM sqlite_master WHERE type = 'table' ORDER BY name",
)?;
let required_tables = [
(
"graph_nodes",
vec![
"id",
"kind",
"label",
"properties_json",
"provenance_json",
"freshness_json",
"row_hash",
"source_watermark",
],
),
(
"graph_edges",
vec![
"edge_key",
"from_id",
"to_id",
"kind",
"properties_json",
"provenance_json",
"freshness_json",
"row_hash",
"source_watermark",
],
),
(
"graph_projection_versions",
vec![
"scope",
"projection_version",
"content_hash",
"source_watermark",
"observed_at_unix",
],
),
(
"graph_tombstones",
vec!["row_key", "row_kind", "deleted_at_unix"],
),
("graph_node_properties", vec!["node_id", "key", "value"]),
("graph_edge_properties", vec!["edge_key", "key", "value"]),
];
for (table, required_columns) in required_tables {
if !tables.contains(table) {
diagnostics.push(format!("graph.db schema drift: missing table {table}"));
continue;
}
let columns = sqlite_column_names(conn, table)?;
for column in required_columns {
if !columns.contains(column) {
diagnostics.push(format!(
"graph.db schema drift: missing column {table}.{column}"
));
}
}
}
let indexes = sqlite_string_set(
conn,
"SELECT name FROM sqlite_master WHERE type = 'index' ORDER BY name",
)?;
for index in [
"idx_graph_nodes_kind",
"idx_graph_edges_from_kind",
"idx_graph_edges_to_kind",
"idx_graph_edges_edge_key",
"idx_graph_node_properties_key_value_node",
"idx_graph_edge_properties_key_value_edge",
] {
if !indexes.contains(index) {
diagnostics.push(format!("graph.db schema drift: missing index {index}"));
}
}
if tables.contains("graph_edges") {
let mut stmt = conn.prepare("PRAGMA foreign_key_list(graph_edges)")?;
let rows = stmt.query_map([], |row| {
Ok((row.get::<_, String>(3)?, row.get::<_, String>(4)?))
})?;
let mut fks = BTreeSet::new();
for row in rows {
fks.insert(row?);
}
for expected in [
("from_id".to_string(), "id".to_string()),
("to_id".to_string(), "id".to_string()),
] {
if !fks.contains(&expected) {
diagnostics.push(format!(
"graph.db schema drift: missing graph_edges foreign key {} -> graph_nodes.{}",
expected.0, expected.1
));
}
}
}
Ok(diagnostics)
}
fn sqlite_query_diagnostics(conn: &Connection, sql: &str) -> Result<Vec<String>> {
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
let mut diagnostics = Vec::new();
for row in rows {
diagnostics.push(row?);
}
Ok(diagnostics)
}
fn sqlite_graph_duplicate_diagnostics(conn: &Connection) -> Result<Vec<String>> {
let mut diagnostics = sqlite_query_diagnostics(
conn,
r#"
SELECT 'duplicate graph_nodes.id ' || id || ' (' || COUNT(*) || ' rows)'
FROM graph_nodes
GROUP BY id
HAVING COUNT(*) > 1
ORDER BY id
"#,
)?;
diagnostics.extend(sqlite_query_diagnostics(
conn,
r#"
SELECT 'duplicate graph_edges key ' || from_id || ' -' || kind || '-> ' || to_id || ' (' || COUNT(*) || ' rows)'
FROM graph_edges
GROUP BY from_id, to_id, kind
HAVING COUNT(*) > 1
ORDER BY from_id, kind, to_id
"#,
)?);
diagnostics.extend(sqlite_query_diagnostics(
conn,
r#"
SELECT 'duplicate graph_edges.edge_key ' || edge_key || ' (' || COUNT(*) || ' rows)'
FROM graph_edges
GROUP BY edge_key
HAVING COUNT(*) > 1
ORDER BY edge_key
"#,
)?);
Ok(diagnostics)
}
fn sqlite_graph_orphan_diagnostics(conn: &Connection) -> Result<Vec<String>> {
sqlite_query_diagnostics(
conn,
r#"
SELECT 'orphan edge missing from node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
FROM graph_edges e
LEFT JOIN graph_nodes n ON n.id = e.from_id
WHERE n.id IS NULL
UNION ALL
SELECT 'orphan edge missing to node: ' || e.from_id || ' -' || e.kind || '-> ' || e.to_id
FROM graph_edges e
LEFT JOIN graph_nodes n ON n.id = e.to_id
WHERE n.id IS NULL
ORDER BY 1
"#,
)
}
fn sqlite_graph_json_diagnostics(conn: &Connection) -> Result<Vec<String>> {
let mut diagnostics = Vec::new();
let mut node_stmt = conn.prepare(
"SELECT id, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
)?;
let node_rows = node_stmt.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, Option<String>>(3)?,
))
})?;
for row in node_rows {
let (id, properties_json, provenance_json, freshness_json) = row?;
if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
diagnostics.push(format!(
"graph_nodes {id} properties_json is invalid: {err}"
));
}
if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
diagnostics.push(format!(
"graph_nodes {id} provenance_json is invalid: {err}"
));
}
if let Some(freshness_json) = freshness_json
&& let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
{
diagnostics.push(format!("graph_nodes {id} freshness_json is invalid: {err}"));
}
}
let mut edge_stmt = conn.prepare(
"SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
)?;
let edge_rows = edge_stmt.query_map([], |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
row.get::<_, String>(4)?,
row.get::<_, String>(5)?,
row.get::<_, Option<String>>(6)?,
))
})?;
for row in edge_rows {
let (edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json) =
row?;
let edge = format!("{edge_key} {from_id} -{kind}-> {to_id}");
if let Err(err) = serde_json::from_str::<BTreeMap<String, String>>(&properties_json) {
diagnostics.push(format!(
"graph_edges {edge} properties_json is invalid: {err}"
));
}
if let Err(err) = serde_json::from_str::<Vec<GraphProvenance>>(&provenance_json) {
diagnostics.push(format!(
"graph_edges {edge} provenance_json is invalid: {err}"
));
}
if let Some(freshness_json) = freshness_json
&& let Err(err) = serde_json::from_str::<GraphFreshness>(&freshness_json)
{
diagnostics.push(format!(
"graph_edges {edge} freshness_json is invalid: {err}"
));
}
}
Ok(diagnostics)
}
fn sqlite_graph_projection_metadata_diagnostics(
conn: &Connection,
scope: Option<&str>,
) -> Result<Vec<String>> {
let mut diagnostics = Vec::new();
let scope_key = scope.unwrap_or("root");
let version = conn
.query_row(
r#"
SELECT projection_version, content_hash, source_watermark
FROM graph_projection_versions
WHERE scope = ?1
"#,
[scope_key],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
)
.optional()?;
let Some((projection_version, content_hash, _source_watermark)) = version else {
diagnostics.push(format!(
"graph projection metadata is missing for scope {scope_key}"
));
return Ok(diagnostics);
};
if projection_version != GRAPH_PROJECTION_VERSION {
diagnostics.push(format!(
"projection version mismatch: expected {GRAPH_PROJECTION_VERSION} got {projection_version}"
));
}
if content_hash.is_none() {
diagnostics.push("projection content hash is missing".to_string());
}
let meta_id = graph_projection_meta_id(scope);
let meta_properties = conn
.query_row(
"SELECT properties_json FROM graph_nodes WHERE id = ?1 AND kind = ?2",
(&meta_id, GRAPH_PROJECTION_META_KIND),
|row| row.get::<_, String>(0),
)
.optional()?;
let Some(meta_properties) = meta_properties else {
diagnostics.push(format!("projection_meta node {meta_id} is missing"));
return Ok(diagnostics);
};
let properties = serde_json::from_str::<BTreeMap<String, String>>(&meta_properties)
.with_context(|| format!("parsing projection_meta properties for {meta_id}"))?;
if properties.get("projection_version").map(String::as_str) != Some(GRAPH_PROJECTION_VERSION) {
diagnostics.push(format!(
"projection_meta node {meta_id} has stale projection_version"
));
}
if properties.get("content_hash") != content_hash.as_ref() {
diagnostics.push(format!(
"projection_meta node {meta_id} content_hash does not match graph_projection_versions"
));
}
Ok(diagnostics)
}
pub(crate) fn sqlite_convex_rows_from_conn(conn: &Connection) -> Result<ConvexProjectionRows> {
let mut node_stmt = conn.prepare(
"SELECT id, kind, label, properties_json, provenance_json, freshness_json FROM graph_nodes ORDER BY id",
)?;
let node_rows = node_stmt.query_map([], |row| {
let properties_json: String = row.get(3)?;
let provenance_json: String = row.get(4)?;
let freshness_json: Option<String> = row.get(5)?;
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
properties_json,
provenance_json,
freshness_json,
))
})?;
let mut nodes = Vec::new();
for row in node_rows {
let (external_id, kind, label, properties_json, provenance_json, freshness_json) = row?;
nodes.push(ConvexNodeRow {
external_id,
kind,
label,
properties: serde_json::from_str(&properties_json)?,
provenance: serde_json::from_str(&provenance_json)?,
freshness: freshness_json
.map(|value| serde_json::from_str(&value))
.transpose()?,
});
}
let mut edge_stmt = conn.prepare(
"SELECT edge_key, from_id, to_id, kind, properties_json, provenance_json, freshness_json FROM graph_edges ORDER BY from_id, kind, to_id",
)?;
let edge_rows = edge_stmt.query_map([], |row| {
let properties_json: String = row.get(4)?;
let provenance_json: String = row.get(5)?;
let freshness_json: Option<String> = row.get(6)?;
Ok((
row.get::<_, String>(0)?,
row.get::<_, String>(1)?,
row.get::<_, String>(2)?,
row.get::<_, String>(3)?,
properties_json,
provenance_json,
freshness_json,
))
})?;
let mut edges = Vec::new();
for row in edge_rows {
let (
edge_key,
from_external_id,
to_external_id,
kind,
properties_json,
provenance_json,
freshness_json,
) = row?;
edges.push(ConvexEdgeRow {
edge_key,
from_external_id,
to_external_id,
kind,
properties: serde_json::from_str(&properties_json)?,
provenance: serde_json::from_str(&provenance_json)?,
freshness: freshness_json
.map(|value| serde_json::from_str(&value))
.transpose()?,
});
}
Ok(ConvexProjectionRows { nodes, edges })
}
fn convex_required_index_label(index: &ConvexRequiredIndex) -> String {
format!("{}.{}({})", index.table, index.name, index.fields.join(","))
}
fn convex_snapshot_index_value(value: &serde_json::Value) -> Option<&serde_json::Value> {
value
.get("indexes")
.or_else(|| value.get("requiredIndexes"))
.or_else(|| {
value
.get("metadata")
.and_then(|metadata| metadata.get("indexes"))
})
}
fn convex_snapshot_declared_indexes(
value: &serde_json::Value,
) -> Result<Option<Vec<ConvexRequiredIndex>>> {
convex_snapshot_index_value(value)
.map(|indexes| {
serde_json::from_value::<Vec<ConvexRequiredIndex>>(indexes.clone())
.context("parsing Convex snapshot index metadata")
})
.transpose()
}
fn convex_snapshot_index_diagnostics(value: &serde_json::Value) -> Result<Vec<String>> {
let required = convex_required_indexes();
let Some(declared) = convex_snapshot_declared_indexes(value)? else {
return Ok(vec![format!(
"Convex snapshot index metadata is missing; required indexes not confirmed: {}",
required
.iter()
.map(convex_required_index_label)
.collect::<Vec<_>>()
.join(", ")
)]);
};
let declared = declared.into_iter().collect::<BTreeSet<_>>();
let missing = required
.iter()
.filter(|index| !declared.contains(*index))
.map(convex_required_index_label)
.collect::<Vec<_>>();
if missing.is_empty() {
Ok(Vec::new())
} else {
Ok(vec![format!(
"Convex snapshot is missing required index metadata: {}",
missing.join(", ")
)])
}
}
pub(crate) fn load_convex_projection_snapshot_value(
snapshot_path: &Path,
) -> Result<(ConvexProjectionRows, serde_json::Value)> {
let content = fs::read_to_string(snapshot_path).with_context(|| {
format!(
"reading Convex projection snapshot {}",
snapshot_path.display()
)
})?;
let value = serde_json::from_str::<serde_json::Value>(&content).with_context(|| {
format!(
"parsing Convex projection snapshot {}",
snapshot_path.display()
)
})?;
let rows = serde_json::from_value::<ConvexProjectionRows>(value.clone())
.with_context(|| format!("parsing Convex projection rows {}", snapshot_path.display()))?;
Ok((rows, value))
}
pub(crate) fn append_sqlite_graph_doctor_checks(
report: &mut GraphDbDoctorReport,
root: &Path,
scope: Option<&str>,
graph_db: &Path,
) -> Option<substrate::SqliteReadOnlyConnection> {
let rebuild = graph_db_rebuild_command(root, scope);
let backup_rebuild = graph_db_backup_rebuild_command(root, scope, graph_db);
if !graph_db.exists() {
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_exists",
vec![format!("graph.db is missing at {}", graph_db.display())],
vec![rebuild],
));
return None;
}
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_exists",
Vec::new(),
vec![rebuild.clone()],
));
let conn = match open_sqlite_graph_db_readonly(graph_db) {
Ok(conn) => conn,
Err(err) => {
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_open",
vec![err.to_string()],
vec![backup_rebuild],
));
return None;
}
};
report.push_check(graph_db_doctor_check(
"sqlite_graph_db_open",
Vec::new(),
vec![rebuild.clone()],
));
if let Some(recovery) = conn.recovery() {
report.push_check(GraphDbDoctorCheck {
name: "sqlite_graph_db_read_recovery".to_string(),
status: "recovered".to_string(),
fail_closed: false,
diagnostics: vec![graph_db_read_recovery_diagnostic(recovery)],
repair_commands: Vec::new(),
});
}
let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_schema",
schema_diagnostics,
vec![backup_rebuild.clone()],
));
let metadata_diagnostics = sqlite_graph_projection_metadata_diagnostics(conn.conn(), scope)
.unwrap_or_else(|err| {
vec![format!(
"graph projection metadata inspection failed: {err}"
)]
});
report.push_check(graph_db_doctor_check(
"sqlite_projection_metadata",
metadata_diagnostics,
vec![rebuild.clone()],
));
let duplicate_diagnostics = sqlite_graph_duplicate_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("duplicate id inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_duplicate_ids",
duplicate_diagnostics,
vec![backup_rebuild.clone()],
));
let orphan_diagnostics = sqlite_graph_orphan_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("orphan edge inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_orphan_edges",
orphan_diagnostics,
vec![rebuild.clone()],
));
let json_diagnostics = sqlite_graph_json_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("graph row JSON inspection failed: {err}")]);
report.push_check(graph_db_doctor_check(
"sqlite_row_json",
json_diagnostics,
vec![backup_rebuild],
));
let tombstone_diagnostics =
sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
.unwrap_or_else(|err| {
vec![format!(
"graph tombstone retention inspection failed: {err}"
)]
});
report.push_check(GraphDbDoctorCheck {
name: "sqlite_tombstone_retention".to_string(),
status: if tombstone_diagnostics.is_empty() {
"ok".to_string()
} else {
"warning".to_string()
},
fail_closed: false,
diagnostics: tombstone_diagnostics,
repair_commands: Vec::new(),
});
let compaction_check = match sqlite_graph_counts(conn.conn(), scope.unwrap_or("root")) {
Ok(counts) => {
let policy = graph_db_compaction_policy(root, scope, &counts, false);
GraphDbDoctorCheck {
name: "sqlite_compaction_policy".to_string(),
status: policy.status.clone(),
fail_closed: false,
diagnostics: policy.proof,
repair_commands: if policy.status == "recommended" {
policy.recommendations
} else {
Vec::new()
},
}
}
Err(err) => GraphDbDoctorCheck {
name: "sqlite_compaction_policy".to_string(),
status: "warning".to_string(),
fail_closed: false,
diagnostics: vec![format!("graph compaction policy inspection failed: {err}")],
repair_commands: Vec::new(),
},
};
report.push_check(compaction_check);
Some(conn)
}
pub(crate) fn append_convex_snapshot_doctor_checks(
report: &mut GraphDbDoctorReport,
root: &Path,
scope: Option<&str>,
local_rows: Option<&ConvexProjectionRows>,
snapshot_path: Option<&Path>,
) {
let repair = convex_refresh_command(root, scope);
let Some(snapshot_path) = snapshot_path else {
report.push_check(graph_db_doctor_check(
"convex_snapshot_present",
vec!["--backend convex-snapshot requires --convex-snapshot <rows.json>".to_string()],
vec![format!(
"tsift convex-sync {}{} --json > convex-rows.json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
)],
));
return;
};
report.push_check(graph_db_doctor_check(
"convex_snapshot_present",
Vec::new(),
vec![repair.clone()],
));
let (snapshot, snapshot_value) = match load_convex_projection_snapshot_value(snapshot_path) {
Ok(snapshot) => snapshot,
Err(err) => {
report.push_check(graph_db_doctor_check(
"convex_snapshot_parse",
vec![err.to_string()],
vec![repair],
));
return;
}
};
report.push_check(graph_db_doctor_check(
"convex_snapshot_parse",
Vec::new(),
vec![repair.clone()],
));
let row_diagnostics = convex_projection_row_diagnostics(&snapshot);
report.push_check(graph_db_doctor_check(
"convex_snapshot_rows",
row_diagnostics,
vec![repair.clone()],
));
let index_diagnostics = convex_snapshot_index_diagnostics(&snapshot_value)
.unwrap_or_else(|err| vec![err.to_string()]);
report.required_indexes = convex_required_indexes();
report.push_check(graph_db_doctor_check(
"convex_required_indexes",
index_diagnostics,
vec![
"Add the indexes from examples/convex-graph/schema.ts, then redeploy the Convex app"
.to_string(),
],
));
if let Some(local_rows) = local_rows {
let freshness = convex_projection_freshness(local_rows, Some(&snapshot), scope);
report.push_check(graph_db_doctor_check(
"convex_projection_freshness",
freshness.diagnostics,
vec![repair],
));
} else {
report.push_check(graph_db_doctor_check(
"convex_projection_freshness",
vec![
"local SQLite graph.db could not be read, so Convex freshness cannot be verified"
.to_string(),
],
vec![graph_db_rebuild_command(root, scope)],
));
}
}
fn graph_db_convex_snapshot_doctor_command(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> String {
format!(
"tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(snapshot_path.to_string_lossy().as_ref())
)
}
fn graph_db_convex_snapshot_read_command(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> String {
format!(
"tsift graph-db --path {}{} --backend convex-snapshot --convex-snapshot {} schema --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(snapshot_path.to_string_lossy().as_ref())
)
}
fn convex_sync_snapshot_diff_command(
root: &Path,
scope: Option<&str>,
snapshot_path: &Path,
) -> String {
format!(
"tsift convex-sync {}{} --snapshot {} --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(snapshot_path.to_string_lossy().as_ref())
)
}
pub(crate) struct GraphDbDriftInput<'a> {
root: &'a Path,
scope: Option<&'a str>,
graph_db: &'a Path,
snapshot_path: &'a Path,
local: &'a ConvexProjectionRows,
snapshot: &'a ConvexProjectionRows,
snapshot_value: &'a serde_json::Value,
warnings: Vec<String>,
}
pub(crate) fn graph_db_drift_report(input: GraphDbDriftInput<'_>) -> GraphDbDriftReport {
let GraphDbDriftInput {
root,
scope,
graph_db,
snapshot_path,
local,
snapshot,
snapshot_value,
warnings,
} = input;
let freshness = convex_projection_freshness(local, Some(snapshot), scope);
let (node_upserts, edge_upserts, node_tombstones, edge_tombstones) =
convex_rows_diff(local, Some(snapshot));
let row_diagnostics = convex_projection_row_diagnostics(snapshot);
let index_diagnostics = convex_snapshot_index_diagnostics(snapshot_value)
.unwrap_or_else(|err| vec![format!("Convex snapshot index metadata failed: {err}")]);
let local_hash = freshness.local_hash.clone();
let snapshot_hash = freshness.snapshot_hash.clone();
let stale_nodes = freshness.stale_nodes.clone();
let stale_edges = freshness.stale_edges.clone();
let duplicate_failures = row_diagnostics
.iter()
.filter(|diagnostic| diagnostic.contains("duplicate"))
.count();
let orphan_failures = row_diagnostics
.iter()
.filter(|diagnostic| diagnostic.contains("references missing"))
.count();
let missing_required_indexes = index_diagnostics.len();
let stale_projection_metadata =
usize::from(local_hash != snapshot_hash || snapshot_hash.is_none());
let hard_failures = duplicate_failures + orphan_failures + missing_required_indexes;
let has_drift = freshness.fail_closed
|| !node_upserts.is_empty()
|| !edge_upserts.is_empty()
|| !node_tombstones.is_empty()
|| !edge_tombstones.is_empty();
let status = if hard_failures > 0 {
"fail_closed"
} else if has_drift {
"drift"
} else {
"current"
}
.to_string();
let mut diagnostics = Vec::new();
diagnostics.extend(row_diagnostics);
diagnostics.extend(index_diagnostics);
diagnostics.extend(freshness.diagnostics.clone());
if has_drift {
diagnostics.push(format!(
"projection diff: {} node upsert(s), {} edge upsert(s), {} node tombstone(s), {} edge tombstone(s)",
node_upserts.len(),
edge_upserts.len(),
node_tombstones.len(),
edge_tombstones.len()
));
}
let mut next_commands = vec![graph_db_convex_snapshot_doctor_command(
root,
scope,
snapshot_path,
)];
if status == "current" {
next_commands.push(graph_db_convex_snapshot_read_command(
root,
scope,
snapshot_path,
));
} else {
next_commands.push(convex_sync_snapshot_diff_command(
root,
scope,
snapshot_path,
));
next_commands.push(convex_refresh_command(root, scope));
}
GraphDbDriftReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
convex_snapshot: snapshot_path.to_string_lossy().to_string(),
status: status.clone(),
graph_reads_allowed: status == "current",
projection_version: GRAPH_PROJECTION_VERSION.to_string(),
local_hash,
snapshot_hash,
summary: GraphDbDriftSummary {
node_upserts: node_upserts.len(),
edge_upserts: edge_upserts.len(),
node_tombstones: node_tombstones.len(),
edge_tombstones: edge_tombstones.len(),
stale_nodes: stale_nodes.len(),
stale_edges: stale_edges.len(),
stale_projection_metadata,
duplicate_failures,
orphan_failures,
missing_required_indexes,
},
node_upserts: node_upserts
.into_iter()
.map(|row| row.external_id)
.collect(),
edge_upserts: edge_upserts.into_iter().map(|row| row.edge_key).collect(),
node_tombstones,
edge_tombstones,
stale_nodes,
stale_edges,
diagnostics,
next_commands,
required_indexes: convex_required_indexes(),
warnings,
}
}
pub(crate) fn print_graph_db_drift_human(report: &GraphDbDriftReport) {
println!(
"graph-db drift status: {} reads_allowed: {}",
report.status, report.graph_reads_allowed
);
println!("graph_db: {}", report.graph_db);
println!("convex_snapshot: {}", report.convex_snapshot);
println!(
"upserts: {} node(s), {} edge(s)",
report.summary.node_upserts, report.summary.edge_upserts
);
println!(
"tombstones: {} node(s), {} edge(s)",
report.summary.node_tombstones, report.summary.edge_tombstones
);
for diagnostic in &report.diagnostics {
println!("diagnostic: {diagnostic}");
}
for command in &report.next_commands {
println!("next: {command}");
}
}
pub(crate) fn print_graph_db_doctor_human(report: &GraphDbDoctorReport) {
println!(
"graph-db doctor backend: {} status: {}",
report.backend, report.status
);
println!("graph_db: {}", report.graph_db);
if let Some(snapshot) = &report.convex_snapshot {
println!("convex_snapshot: {snapshot}");
}
for check in &report.checks {
println!("check: {} {}", check.name, check.status);
for diagnostic in &check.diagnostics {
println!(" diagnostic: {diagnostic}");
}
}
for command in &report.repair_commands {
println!("repair: {command}");
}
}
pub(crate) fn graph_db_operator_report_from_disk(
root: &Path,
scope: Option<&str>,
graph_db: &Path,
operation: &str,
refresh: Option<GraphDbRefreshSummary>,
warnings: Vec<String>,
) -> Result<GraphDbOperatorReport> {
if !graph_db.exists() {
let next_commands = graph_db_operator_next_commands(root, scope, true);
let counts = GraphDbOperatorCounts {
nodes: 0,
edges: 0,
tombstones: GraphDbTombstoneCounts {
nodes: 0,
edges: 0,
total: 0,
},
file_size_bytes: None,
freelist_bytes: None,
};
return Ok(GraphDbOperatorReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
operation: operation.to_string(),
status: "missing".to_string(),
materialized: false,
freshness: GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph.db is missing; run graph-db refresh before trusting graph reads"
.to_string(),
],
},
readiness: graph_effectiveness_blocked(
"graph_db_missing",
vec![
"graph.db is missing; materialize the projection before relying on graph effectiveness".to_string(),
],
next_commands.clone(),
),
counts: counts.clone(),
refresh,
compaction: graph_db_compaction_policy(root, scope, &counts, false),
recovery: None,
next_commands,
warnings,
});
}
let conn = open_sqlite_graph_db_readonly(graph_db)?;
let recovery = conn.recovery();
let mut warnings = warnings;
if let Some(recovery) = recovery {
warnings.push(graph_db_read_recovery_diagnostic(recovery));
}
let mut freshness = sqlite_graph_freshness_from_conn(conn.conn(), scope.unwrap_or("root"))?;
let schema_diagnostics = sqlite_graph_schema_diagnostics(conn.conn())
.unwrap_or_else(|err| vec![format!("graph.db schema inspection failed: {err}")]);
if !schema_diagnostics.is_empty() {
freshness.diagnostics.extend(schema_diagnostics);
freshness.fail_closed = true;
freshness.status = "stale".to_string();
}
let counts = sqlite_graph_counts(conn.conn(), scope.unwrap_or("root"))?;
warnings.extend(
sqlite_graph_tombstone_retention_diagnostics(conn.conn(), scope.unwrap_or("root"))
.unwrap_or_else(|err| {
vec![format!(
"graph tombstone retention inspection failed: {err}"
)]
}),
);
let status = if freshness.fail_closed {
"stale"
} else {
"current"
}
.to_string();
Ok(GraphDbOperatorReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
graph_db: graph_db.to_string_lossy().to_string(),
operation: operation.to_string(),
status,
materialized: true,
freshness,
readiness: graph_db_semantic_readiness(root, scope),
compaction: graph_db_compaction_policy(root, scope, &counts, false),
counts,
refresh,
recovery,
next_commands: graph_db_operator_next_commands(root, scope, false),
warnings,
})
}
fn print_graph_db_operator_human(report: &GraphDbOperatorReport) {
println!(
"graph-db {} status: {} materialized: {}",
report.operation, report.status, report.materialized
);
println!("graph_db: {}", report.graph_db);
println!(
"projection: version={} hash={} watermark={}",
report
.freshness
.projection_version
.as_deref()
.unwrap_or("<missing>"),
report
.freshness
.content_hash
.as_deref()
.unwrap_or("<missing>"),
report
.freshness
.source_watermark
.as_deref()
.unwrap_or("<missing>")
);
println!(
"rows: {} node(s), {} edge(s), {} tombstone(s)",
report.counts.nodes, report.counts.edges, report.counts.tombstones.total
);
println!(
"readiness: {} reason: {} fail_closed: {}",
report.readiness.status, report.readiness.reason, report.readiness.fail_closed
);
if let Some(file_size) = report.counts.file_size_bytes {
println!(
"storage: {} byte(s), {} free byte(s)",
file_size,
report.counts.freelist_bytes.unwrap_or(0)
);
}
if let Some(refresh) = &report.refresh {
println!(
"refresh: {} tombstoned node(s), {} tombstoned edge(s)",
refresh.tombstoned_nodes, refresh.tombstoned_edges
);
println!(
"delta: {} node upsert(s), {} edge upsert(s), {} property row upsert(s), {} unchanged node(s), {} unchanged edge(s), {} unchanged property row(s), {} deleted property row(s), {} pruned tombstone(s)",
refresh.upserted_nodes,
refresh.upserted_edges,
refresh.upserted_properties,
refresh.unchanged_nodes,
refresh.unchanged_edges,
refresh.unchanged_properties,
refresh.deleted_properties,
refresh.pruned_tombstones
);
}
println!(
"compaction: {} tombstone_scan_rows={} live_rows={}",
report.compaction.status,
report.compaction.tombstone_scan_rows,
report.compaction.live_rows
);
for proof in &report.compaction.proof {
println!("compaction proof: {proof}");
}
if let Some(recovery) = report.recovery {
println!("recovery: {}", graph_db_read_recovery_diagnostic(recovery));
}
for diagnostic in &report.freshness.diagnostics {
println!("diagnostic: {diagnostic}");
}
for diagnostic in &report.readiness.diagnostics {
println!("readiness diagnostic: {diagnostic}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
for command in &report.readiness.next_commands {
println!("readiness next: {command}");
}
for command in &report.next_commands {
println!("next: {command}");
}
}
pub(crate) fn print_graph_db_operator_report(
report: &GraphDbOperatorReport,
format: OutputFormat,
) -> Result<()> {
if format.json_output {
print_json_or_envelope(
report,
&format,
"graph-db",
&report.operation,
ToolEnvelopeSummary {
text: format!(
"Graph DB {} status {} with {} node(s), {} edge(s), {} tombstone(s)",
report.operation,
report.status,
report.counts.nodes,
report.counts.edges,
report.counts.tombstones.total
),
metrics: vec![
envelope_metric("operation", &report.operation),
envelope_metric("status", &report.status),
envelope_metric("nodes", report.counts.nodes),
envelope_metric("edges", report.counts.edges),
envelope_metric("tombstones", report.counts.tombstones.total),
envelope_metric("compaction", &report.compaction.status),
envelope_metric("readiness", &report.readiness.status),
],
},
false,
report.next_commands.clone(),
)
} else {
print_graph_db_operator_human(report);
Ok(())
}
}
fn status_run_command_without_notes(run: &str) -> &str {
run.split_once(" (")
.map(|(command, _)| command)
.unwrap_or(run)
}
fn graph_db_status_summarize_command(report: &status::StatusReport) -> String {
report
.recommendations
.run
.as_deref()
.filter(|command| command.contains("summarize --extract"))
.map(status_run_command_without_notes)
.unwrap_or("tsift summarize --extract .")
.to_string()
}
fn graph_db_semantic_readiness(root: &Path, scope: Option<&str>) -> GraphEffectivenessReadiness {
let report = match status::check_status(root) {
Ok(report) => report,
Err(err) => {
return graph_effectiveness_blocked(
"status_check_unavailable",
vec![format!(
"semantic readiness could not inspect summary cache after graph-db refresh: {err:#}"
)],
vec![graph_db_refresh_command(root, scope)],
);
}
};
match &report.summaries {
status::SummaryStatus::Available {
cached_files,
total_indexed_files,
coverage_pct,
..
} => {
let mut readiness = graph_effectiveness_ready("semantic_rows_available");
readiness.diagnostics.push(format!(
"summary cache has {cached_files}/{total_indexed_files} indexed file(s) cached ({coverage_pct}% coverage); graph semantic rows are available"
));
readiness
}
status::SummaryStatus::None { .. } => {
let summarize = graph_db_status_summarize_command(&report);
graph_effectiveness_blocked(
"summary_cache_empty",
vec![format!(
"summary cache empty: graph-db materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
summarize,
root.display(),
graph_db_refresh_command(root, scope)
)],
vec![summarize, graph_db_refresh_command(root, scope)],
)
}
status::SummaryStatus::Unavailable => graph_effectiveness_blocked(
"summary_cache_unavailable",
vec![
"summary cache unavailable because the source index is missing; build the index before relying on semantic graph evidence".to_string(),
],
report
.recommendations
.run
.clone()
.into_iter()
.chain(std::iter::once(graph_db_refresh_command(root, scope)))
.collect(),
),
}
}
pub(crate) fn graph_db_operator_status_warnings(root: &Path, scope: Option<&str>) -> Vec<String> {
let report = match status::check_status(root) {
Ok(report) => report,
Err(err) => {
return vec![format!(
"status check unavailable after graph-db refresh: {err:#}"
)];
}
};
let summarize_run = if matches!(report.summaries, status::SummaryStatus::None { .. }) {
Some(graph_db_status_summarize_command(&report))
} else {
None
};
let mut warnings = report.reminders;
if matches!(report.summaries, status::SummaryStatus::None { .. }) {
let run = summarize_run.unwrap_or_else(|| "tsift summarize --extract .".to_string());
warnings.push(format!(
"summary cache empty: graph-db refresh materialized code/session rows but semantic rows are unavailable; run `{}` from {} and rerun `{}` before relying on semantic evidence",
run,
root.display(),
graph_db_refresh_command(root, scope)
));
}
dedupe_preserve_order(warnings)
}
pub(crate) fn print_graph_db_compaction_human(report: &GraphDbCompactionReport) {
println!(
"graph-db compact applied:{} pruned_tombstones:{} reclaimed:{} byte(s)",
report.applied, report.pruned_tombstones, report.reclaimed_bytes
);
println!("graph_db: {}", report.graph_db);
println!(
"before: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
report.counts_before.nodes,
report.counts_before.edges,
report.counts_before.tombstones.total,
report.counts_before.file_size_bytes.unwrap_or(0),
report.counts_before.freelist_bytes.unwrap_or(0)
);
println!(
"after: {} node(s), {} edge(s), {} tombstone(s), file={} free={}",
report.counts_after.nodes,
report.counts_after.edges,
report.counts_after.tombstones.total,
report.counts_after.file_size_bytes.unwrap_or(0),
report.counts_after.freelist_bytes.unwrap_or(0)
);
for proof in &report.compaction_after.proof {
println!("proof: {proof}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
for command in &report.next_commands {
println!("next: {command}");
}
}
fn parse_graph_db_property_filters(raw: &[String]) -> Result<Vec<GraphDbPropertyFilter>> {
raw.iter()
.map(|value| {
let (key, filter_value) = value
.split_once('=')
.with_context(|| format!("graph-db --property expects KEY=VALUE, got {value:?}"))?;
let key = key.trim();
let filter_value = filter_value.trim();
if key.is_empty() || filter_value.is_empty() {
bail!("graph-db --property expects non-empty KEY=VALUE, got {value:?}");
}
Ok(GraphDbPropertyFilter {
key: key.to_string(),
value: filter_value.to_string(),
})
})
.collect()
}
fn graph_db_query_options(
cursor: Option<String>,
limit: Option<usize>,
property_filters: &[String],
) -> Result<GraphDbQueryOptions> {
Ok(GraphDbQueryOptions {
cursor,
limit: limit.filter(|limit| *limit > 0),
property_filters: parse_graph_db_property_filters(property_filters)?,
})
}
fn graph_db_query_options_for_store(options: &GraphDbQueryOptions) -> GraphQueryOptions {
GraphQueryOptions {
cursor: options.cursor.clone(),
limit: options.limit,
property_filters: options
.property_filters
.iter()
.map(|filter| GraphPropertyFilter {
key: filter.key.clone(),
value: filter.value.clone(),
})
.collect(),
}
}
fn graph_db_page_report_from_store(
page: GraphQueryPage,
property_filters: Vec<GraphDbPropertyFilter>,
) -> GraphDbPageReport {
GraphDbPageReport {
cursor: page.cursor,
limit: page.limit,
next_cursor: page.next_cursor,
returned_nodes: page.returned_nodes,
returned_edges: page.returned_edges,
truncated: page.truncated,
property_filters,
diagnostics: page.diagnostics,
}
}
fn graph_db_neighborhood_ranking_gate(
ranked_neighbor_cap: usize,
) -> GraphDbNeighborhoodRankingGate {
GraphDbNeighborhoodRankingGate {
status: "held_default_order_unchanged".to_string(),
ranked_output_default: false,
default_order: "stable_node_id".to_string(),
default_change_gate: "community_search_quality_metrics".to_string(),
required_workloads: metric_digest::COMMUNITY_SEARCH_WORKLOADS
.iter()
.map(|workload| (*workload).to_string())
.collect(),
required_metrics: metric_digest::COMMUNITY_SEARCH_REQUIRED_METRICS
.iter()
.map(|metric| (*metric).to_string())
.collect(),
max_duration_regression_percent: metric_digest::COMMUNITY_MAX_DURATION_REGRESSION_PERCENT,
min_handle_coverage_pct: metric_digest::COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
min_duplicate_name_precision: metric_digest::COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
min_top_community_stability: metric_digest::COMMUNITY_MIN_TOP_COMMUNITY_STABILITY,
diagnostics: vec![
"ranked_neighbors is additive; neighborhood nodes remain ordered by stable node id for cursor pagination".to_string(),
format!(
"ranked_neighbors is score-capped at {ranked_neighbor_cap} entries so previews stay bounded while cursor pagination remains exhaustive"
),
"changing the default neighborhood order requires the community-search gate to pass for every required workload".to_string(),
],
}
}
fn graph_db_ranked_neighbor_cap(limit: Option<usize>) -> usize {
match limit {
Some(0) | None => GRAPH_DB_RANKED_NEIGHBOR_CAP,
Some(limit) => limit.clamp(1, GRAPH_DB_RANKED_NEIGHBOR_CAP),
}
}
fn graph_db_ranked_neighbors(
center_id: &str,
nodes: &[SubstrateGraphNode],
edges: &[SubstrateGraphEdge],
cap: usize,
) -> Vec<GraphDbRankedNeighbor> {
resolution::ranked_neighbors_capped(center_id, nodes, edges, cap)
}
fn graph_db_edge_key(edge: &SubstrateGraphEdge) -> String {
if edge.id.is_empty() {
substrate::ConvexEdgeRow::stable_key(&edge.from_id, &edge.to_id, &edge.kind)
} else {
edge.id.clone()
}
}
fn graph_db_schema() -> GraphDbSchema {
GraphDbSchema {
contract_versions: vec![
GraphDbSchemaContract {
name: "graph_db_evidence",
version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
description: "graph-db evidence JSON packet including packet_id, projection hash, worker context, source handles, worker results, semantic rows, replay commands, and repair commands",
},
GraphDbSchemaContract {
name: "worker_prompt_packet",
version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
description: "conflict-matrix worker prompt packet with owned scope, scheduler fields, stable graph handles, expected tests, expansion commands, token budget, semantic ranking reasons, worker feedback closure controls, and fail-closed prompt text",
},
GraphDbSchemaContract {
name: "conflict_matrix",
version: CONFLICT_MATRIX_CONTRACT_VERSION,
description: "parallel-dispatch decision report keyed by graph evidence packets, scheduler block fields, hard file/symbol/test/config gates, and soft worker-feedback closure ranking",
},
GraphDbSchemaContract {
name: "context_pack_graph_orchestration",
version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
description: "context-pack graph orchestration summary with projection freshness, evidence packet ids, ownership blocks, and follow-up graph commands",
},
GraphDbSchemaContract {
name: "session_review_follow_up",
version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
description: "session-review next-context follow-up command contract for resumable digest/context-pack commands",
},
GraphDbSchemaContract {
name: "dispatch_trace",
version: DISPATCH_TRACE_CONTRACT_VERSION,
description: "operator review trace linking backlog, job packets, worker results, source handles, semantic rows, scheduler fields, evidence packet ids, worker feedback closure controls, and worker prompt packets",
},
GraphDbSchemaContract {
name: "dependency_dag",
version: DEPENDENCY_DAG_CONTRACT_VERSION,
description: "topological planning DAG for agent-doc backlog targets with replayable dependency edges, topo batches, and cycle diagnostics",
},
],
node_fields: vec![
GraphDbSchemaField {
name: "id",
value_type: "string",
description: "Stable provider-neutral node id",
},
GraphDbSchemaField {
name: "kind",
value_type: "string",
description: "Application-defined node family such as file, symbol, or backlog",
},
GraphDbSchemaField {
name: "label",
value_type: "string",
description: "Human-readable label",
},
GraphDbSchemaField {
name: "properties",
value_type: "object<string,string>",
description: "Adapter-specific string properties",
},
GraphDbSchemaField {
name: "provenance",
value_type: "array",
description: "Source system and source reference metadata",
},
GraphDbSchemaField {
name: "freshness",
value_type: "object|null",
description: "Optional content hash and observed timestamp",
},
],
edge_fields: vec![
GraphDbSchemaField {
name: "id",
value_type: "string",
description: "Stable provider-neutral edge id derived from from_id, kind, and to_id",
},
GraphDbSchemaField {
name: "from_id",
value_type: "string",
description: "Source node id",
},
GraphDbSchemaField {
name: "to_id",
value_type: "string",
description: "Target node id",
},
GraphDbSchemaField {
name: "kind",
value_type: "string",
description: "Application-defined edge relation",
},
GraphDbSchemaField {
name: "properties",
value_type: "object<string,string>",
description: "Adapter-specific string properties",
},
GraphDbSchemaField {
name: "provenance",
value_type: "array",
description: "Source system and source reference metadata",
},
GraphDbSchemaField {
name: "freshness",
value_type: "object|null",
description: "Optional content hash and observed timestamp",
},
],
operations: vec![
GraphDbSchemaOperation {
command: "refresh",
description: "Materialize .tsift/graph.db explicitly with delta upserts/deletes, row hash watermarks, tombstone pruning, projection metadata, row counts, and operator next commands",
},
GraphDbSchemaOperation {
command: "status",
description: "Inspect .tsift/graph.db freshness, projection metadata, row counts, tombstone counts, file-size impact, and operator next commands without refreshing",
},
GraphDbSchemaOperation {
command: "doctor",
description: "Validate graph.db or Convex snapshot health and return fail-closed repair diagnostics plus non-fatal SQLite tombstone-retention warnings",
},
GraphDbSchemaOperation {
command: "drift",
description: "Compare local SQLite projection rows with a Convex snapshot and return upsert, tombstone, metadata, duplicate, orphan, and next-command diagnostics",
},
GraphDbSchemaOperation {
command: "compact [--apply] [--prune-tombstones --confirmed-convex-reconciled]",
description: "Return or apply the post-reconciliation SQLite graph compaction policy, including WAL checkpoint/VACUUM proof and guarded tombstone pruning",
},
GraphDbSchemaOperation {
command: "backend-eval [--candidate duckdb-duckpgq|falkordb|ladybug|kuzu] [--target ID] [--full-projection]",
description: "Benchmark experimental read-only GraphStore backend prototypes against SQLite on bounded real, optional full-project, and synthetic projections across refresh/status/path tiers/evidence/conflict-matrix/dispatch-trace and emit promotion hold/eligibility gates",
},
GraphDbSchemaOperation {
command: "evidence <target> [--depth N] [--limit N]",
description: "Return a bounded versioned graph-db handoff packet for a backlog id or job packet handle, including packet_id, projection hash, worker_context rows, source_handle rows, worker_result rows, semantic_concept/entity rows, shortest paths, replay commands, repair commands, and next commands",
},
GraphDbSchemaOperation {
command: "related <phrase> [--kind concept|entity|all] [--depth N] [--seed-limit N] [--limit N]",
description: "Resolve a natural-language phrase to cached semantic concept/entity seed nodes, then return an incident/outgoing GraphStore neighborhood around those seeds for general knowledge retrieval without changing stable neighborhood pagination defaults",
},
GraphDbSchemaOperation {
command: "dispatch-trace [target...] --path <session> [--format json|html]",
description: "Export a compact graph-backed dispatch trace with evidence packet ids, worker-result feedback closure summaries, graph links, and conflict-matrix worker prompt packets",
},
GraphDbSchemaOperation {
command: "dependency-dag [target...] --path <session>",
description: "Extract a versioned agent-doc dependency DAG from backlog ids, explicit depends-on text, shared file/symbol/test/config evidence, semantic overlap, and worker-result follow-up ids",
},
GraphDbSchemaOperation {
command: "schema",
description: "Return record and operation schemas",
},
GraphDbSchemaOperation {
command: "node <id>",
description: "Return one node by stable id",
},
GraphDbSchemaOperation {
command: "edge <id>",
description: "Return one edge by stable edge id",
},
GraphDbSchemaOperation {
command: "edges [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
description: "Return edge records ordered by stable edge id with SQLite-pushed edge-property filtering and cursor pagination",
},
GraphDbSchemaOperation {
command: "incident <id> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor EDGE_ID] [--limit N]",
description: "Return incoming and outgoing edges incident to one node, ordered by stable edge id with optional kind and edge-property filters",
},
GraphDbSchemaOperation {
command: "kind <kind> [--property KEY=VALUE] [--cursor ID] [--limit N]",
description: "Return nodes of one kind ordered by id with SQLite-pushed property filtering/cursor pagination and query-plan diagnostics",
},
GraphDbSchemaOperation {
command: "neighborhood <id> --depth <n> [--edge-kind <kind>] [--property KEY=VALUE] [--cursor ID] [--limit N]",
description: "Return a directed outgoing subgraph around a node using batched SQLite recursive traversal plus pushed filters/paging when available; JSON also includes additive ranked_neighbors while default nodes remain stable-id ordered",
},
GraphDbSchemaOperation {
command: "path <from> <to> [--edge-kind <kind>] [--max-hops N]",
description: "Return the shortest directed path by node id, optionally bounded by hop count",
},
],
}
}
pub(crate) fn sqlite_graph_freshness(
store: &SqliteGraphStore,
scope: &str,
) -> Result<GraphDbFreshnessReport> {
let version = store.projection_version(scope)?;
let Some(version) = version else {
return Ok(GraphDbFreshnessReport {
status: "missing".to_string(),
fail_closed: true,
projection_version: None,
content_hash: None,
source_watermark: None,
diagnostics: vec![
"graph projection metadata is missing; rebuild the graph before trusting reads"
.to_string(),
],
});
};
let mut diagnostics = Vec::new();
let fail_closed =
version.projection_version != GRAPH_PROJECTION_VERSION || version.content_hash.is_none();
if version.projection_version != GRAPH_PROJECTION_VERSION {
diagnostics.push(format!(
"projection version mismatch: expected {} got {}",
GRAPH_PROJECTION_VERSION, version.projection_version
));
}
if version.content_hash.is_none() {
diagnostics.push("projection content hash is missing".to_string());
}
Ok(GraphDbFreshnessReport {
status: if fail_closed { "stale" } else { "current" }.to_string(),
fail_closed,
projection_version: Some(version.projection_version),
content_hash: version.content_hash,
source_watermark: version.source_watermark,
diagnostics,
})
}
pub(crate) fn convex_graph_freshness(
local: &ConvexProjectionRows,
snapshot: &ConvexProjectionRows,
scope: Option<&str>,
) -> GraphDbFreshnessReport {
let freshness = convex_projection_freshness(local, Some(snapshot), scope);
GraphDbFreshnessReport {
status: freshness.status,
fail_closed: freshness.fail_closed,
projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
content_hash: freshness.snapshot_hash,
source_watermark: None,
diagnostics: freshness.diagnostics,
}
}
pub(crate) fn tokensave_graph_freshness(store: &TokensaveDb) -> Result<GraphDbFreshnessReport> {
let (nodes, edges) = store.graph_counts()?;
let files = store.file_count()?;
Ok(GraphDbFreshnessReport {
status: "current".to_string(),
fail_closed: false,
projection_version: Some("tokensave-readonly".to_string()),
content_hash: None,
source_watermark: Some(store.db_path().to_string_lossy().to_string()),
diagnostics: vec![format!(
"tokensave read-only adapter opened {} node(s), {} edge(s), {} file(s)",
nodes, edges, files
)],
})
}
pub(crate) fn append_tokensave_graph_doctor_checks(report: &mut GraphDbDoctorReport, root: &Path) {
match TokensaveDb::discover(root) {
Ok(Some(store)) => {
report.push_check(GraphDbDoctorCheck {
name: "tokensave_db_open".to_string(),
status: "ok".to_string(),
fail_closed: false,
diagnostics: vec![format!(
"opened tokensave database at {}",
store.db_path().display()
)],
repair_commands: Vec::new(),
});
match (store.node_count(), store.edge_count(), store.file_count()) {
(Ok(nodes), Ok(edges), Ok(files)) => {
report.push_check(GraphDbDoctorCheck {
name: "tokensave_counts".to_string(),
status: "ok".to_string(),
fail_closed: false,
diagnostics: vec![format!(
"tokensave contains {} node(s), {} edge(s), {} file(s)",
nodes, edges, files
)],
repair_commands: Vec::new(),
});
}
(nodes, edges, files) => {
report.push_check(graph_db_doctor_check(
"tokensave_counts",
vec![format!(
"tokensave count inspection failed: nodes={:?} edges={:?} files={:?}",
nodes.err(),
edges.err(),
files.err()
)],
Vec::new(),
));
}
}
}
Ok(None) => report.push_check(graph_db_doctor_check(
"tokensave_db_exists",
vec![format!(
"tokensave database is missing at {}",
root.join(".tokensave").join("tokensave.db").display()
)],
Vec::new(),
)),
Err(err) => report.push_check(graph_db_doctor_check(
"tokensave_db_open",
vec![err.to_string()],
Vec::new(),
)),
}
}
pub(crate) fn graph_db_resolve_evidence_target(
store: &impl GraphStore,
target: &str,
) -> Result<Option<SubstrateGraphNode>> {
store.resolve_evidence_target(
target,
&[
"backlog",
"job_packet",
"worker_result",
"worker_context",
"source_handle",
],
)
}
fn graph_db_reachable_nodes_by_kind(
store: &impl GraphStore,
from_id: &str,
kind: &str,
depth: usize,
limit: usize,
) -> Result<Vec<(SubstrateGraphNode, substrate::GraphPath)>> {
store.reachable_nodes_by_kind(from_id, kind, depth, limit)
}
fn graph_db_evidence_completed_queue_drift_warnings(
store: &impl GraphStore,
target: &SubstrateGraphNode,
worker_results: &[SubstrateGraphNode],
) -> Result<Vec<String>> {
let ref_id = target.properties.get("ref_id").map(String::as_str);
let has_completed_result = worker_results.iter().any(|node| {
node.properties.get("status").map(String::as_str) == Some("completed")
&& node.properties.get("ref_id").map(String::as_str) == ref_id
});
if !has_completed_result {
return Ok(Vec::new());
}
let active_jobs = store
.nodes_by_kind("job_packet")?
.into_iter()
.filter(|node| {
node.properties.get("ref_id").map(String::as_str) == ref_id
&& node.label.starts_with("do #")
})
.collect::<Vec<_>>();
if active_jobs.is_empty() {
return Ok(Vec::new());
}
let repair = match (target.properties.get("path"), ref_id) {
(Some(path), Some(id)) => format!(
"repair with `agent-doc write --commit {} --done {}` or the next `agent-doc finalize --done {}` closeout",
shell_quote(path),
shell_quote(id),
shell_quote(id)
),
_ => {
"repair by marking the queue item done/reaping it in the agent-doc session".to_string()
}
};
Ok(vec![format!(
"queue-head drift: target {} has {} active queued do packet(s) but already has a completed worker_result; {repair}; do not redispatch or reactivate the completed item",
target.label,
active_jobs.len()
)])
}
fn graph_db_evidence_next_commands(
root: &Path,
scope: Option<&str>,
target: &SubstrateGraphNode,
worker_context: &[SubstrateGraphNode],
source_handles: &[SubstrateGraphNode],
worker_results: &[SubstrateGraphNode],
semantic_related: &[SubstrateGraphNode],
) -> Vec<String> {
let mut commands = BTreeSet::new();
if let Some(expand) = target.properties.get("expand") {
commands.insert(expand.clone());
}
for worker in worker_context {
if let Some(expand) = worker.properties.get("expand") {
commands.insert(expand.clone());
}
}
for source in source_handles {
if let Some(expand) = source.properties.get("expand") {
commands.insert(expand.clone());
}
}
for result in worker_results {
if let Some(expand) = result.properties.get("expand") {
commands.insert(expand.clone());
}
}
for semantic in semantic_related {
if let Some(expand) = semantic.properties.get("expand") {
commands.insert(expand.clone());
}
}
commands.insert(format!(
"tsift graph-db --path {}{} status --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.insert(format!(
"tsift graph-db --path {}{} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
));
commands.into_iter().collect()
}
fn graph_db_repair_commands(root: &Path, scope: Option<&str>) -> Vec<String> {
vec![
format!(
"tsift graph-db --path {}{} refresh --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
),
format!(
"tsift graph-db --path {}{} doctor --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope)
),
]
}
fn graph_db_evidence_replay_commands(
root: &Path,
scope: Option<&str>,
target: &str,
depth: usize,
limit: usize,
) -> Vec<String> {
vec![
format!(
"tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(target),
depth,
limit
),
format!(
"tsift conflict-matrix --path {} {} --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(target)
),
]
}
fn graph_db_evidence_packet_id(
target: &str,
target_node: &SubstrateGraphNode,
freshness: &GraphDbFreshnessReport,
) -> String {
stable_handle(
"gevd",
&format!(
"{}:{}:{}:{}",
GRAPH_DB_EVIDENCE_CONTRACT_VERSION,
target,
target_node.id,
freshness.content_hash.as_deref().unwrap_or("no-hash")
),
)
}
pub(crate) fn graph_db_evidence_report_from_store<S: GraphStore>(
input: GraphDbEvidenceInput<'_, S>,
) -> Result<GraphDbEvidenceReport> {
let GraphDbEvidenceInput {
root,
scope,
backend,
target,
depth,
limit,
store,
freshness,
mut warnings,
} = input;
let repair_commands = graph_db_repair_commands(root, scope);
if freshness.fail_closed {
bail!(
"graph database evidence failed closed for {} backend: {}; repair: {}",
backend,
freshness.diagnostics.join("; "),
repair_commands.join("; ")
);
}
let target_node = graph_db_resolve_evidence_target(store, target)?
.with_context(|| format!("graph-db evidence target not found: {target}"))?;
let max_rows = if limit == 0 { usize::MAX } else { limit };
let mut reachable = store.reachable_nodes_by_kinds(
&target_node.id,
&[
"worker_context",
"source_handle",
"worker_result",
"semantic_concept",
"semantic_entity",
],
depth,
max_rows,
)?;
let worker_paths = reachable.remove("worker_context").unwrap_or_default();
let source_paths = reachable.remove("source_handle").unwrap_or_default();
let worker_result_paths = reachable.remove("worker_result").unwrap_or_default();
let mut semantic_paths = reachable.remove("semantic_concept").unwrap_or_default();
semantic_paths.extend(reachable.remove("semantic_entity").unwrap_or_default());
semantic_paths.sort_by(|(left_node, left_path), (right_node, right_path)| {
left_path
.hops
.cmp(&right_path.hops)
.then(left_node.kind.cmp(&right_node.kind))
.then(left_node.label.cmp(&right_node.label))
.then(left_node.id.cmp(&right_node.id))
});
if max_rows != usize::MAX && semantic_paths.len() > max_rows {
semantic_paths.truncate(max_rows);
}
let worker_context = worker_paths
.iter()
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let source_handles = source_paths
.iter()
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let worker_results = worker_result_paths
.iter()
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
let semantic_related = semantic_paths
.iter()
.map(|(node, _)| node.clone())
.collect::<Vec<_>>();
warnings.extend(graph_db_evidence_completed_queue_drift_warnings(
store,
&target_node,
&worker_results,
)?);
if worker_context.is_empty()
&& source_handles.is_empty()
&& worker_results.is_empty()
&& semantic_related.is_empty()
{
warnings.push(format!(
"graph-db evidence target {} resolved to a {} node but has no projection-linked context rows; add source/file tokens to the backlog text or rerun graph-db refresh after the session document is indexed",
target, target_node.kind
));
}
let shortest_paths = worker_paths
.iter()
.chain(source_paths.iter())
.chain(worker_result_paths.iter())
.chain(semantic_paths.iter())
.map(|(node, path)| GraphDbEvidencePath {
to: node.id.clone(),
kind: node.kind.clone(),
label: node.label.clone(),
path: Some(path.clone()),
expand: node.properties.get("expand").cloned(),
})
.collect::<Vec<_>>();
let next_commands = graph_db_evidence_next_commands(
root,
scope,
&target_node,
&worker_context,
&source_handles,
&worker_results,
&semantic_related,
);
let replay_commands = graph_db_evidence_replay_commands(root, scope, target, depth, limit);
let packet_id = graph_db_evidence_packet_id(target, &target_node, &freshness);
let projection_hash = freshness.content_hash.clone();
Ok(GraphDbEvidenceReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
backend: backend.to_string(),
contract_version: GRAPH_DB_EVIDENCE_CONTRACT_VERSION.to_string(),
target: target.to_string(),
packet_id,
projection_hash,
freshness,
target_node,
worker_context,
source_handles,
worker_results,
semantic_related,
shortest_paths,
next_commands,
replay_commands,
repair_commands,
fixture_coverage: GraphDbFixtureCoverage {
test: "graph_db_evidence_packet_covers_backlog_job_worker_context_and_source_handles"
.to_string(),
fixture: "tests/graph_db_conformance.rs::graph_db_project".to_string(),
assertions: vec![
"backlog id and job packet handle resolve to graph nodes".to_string(),
"worker_context rows are reachable from queued work".to_string(),
"source_handle rows are reachable through bounded shortest paths".to_string(),
"worker_result rows are reachable from completed or blocked work".to_string(),
],
},
warnings,
})
}
fn print_graph_db_evidence_human(report: &GraphDbEvidenceReport) {
println!(
"graph-db evidence backend: {} target: {} [{}] packet:{}",
report.backend, report.target_node.id, report.target_node.kind, report.packet_id
);
println!(
"evidence: {} worker_context row(s), {} source_handle row(s), {} worker_result row(s), {} semantic row(s), {} path(s)",
report.worker_context.len(),
report.source_handles.len(),
report.worker_results.len(),
report.semantic_related.len(),
report.shortest_paths.len()
);
for path in &report.shortest_paths {
if let Some(graph_path) = &path.path {
println!(
"path: {} hop(s) {}",
graph_path.hops,
graph_path.nodes.join(" -> ")
);
}
}
for command in &report.next_commands {
println!("next: {command}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
}
pub(crate) fn print_graph_db_evidence_report(
report: &GraphDbEvidenceReport,
format: OutputFormat,
) -> Result<()> {
if format.json_output {
print_json_or_envelope(
report,
&format,
"graph-db",
"evidence",
ToolEnvelopeSummary {
text: format!(
"Graph DB evidence for {} returned {} worker context row(s), {} source handle(s), {} worker result row(s), {} semantic row(s), and {} shortest path(s)",
report.target,
report.worker_context.len(),
report.source_handles.len(),
report.worker_results.len(),
report.semantic_related.len(),
report.shortest_paths.len()
),
metrics: vec![
envelope_metric("backend", &report.backend),
envelope_metric("worker_context", report.worker_context.len()),
envelope_metric("source_handles", report.source_handles.len()),
envelope_metric("worker_results", report.worker_results.len()),
envelope_metric("semantic_related", report.semantic_related.len()),
envelope_metric("paths", report.shortest_paths.len()),
],
},
false,
report.next_commands.clone(),
)
} else {
print_graph_db_evidence_human(report);
Ok(())
}
}
pub(crate) fn graph_db_report_from_store(
root: &Path,
scope: Option<&str>,
backend: &str,
query: GraphDbQuery,
store: &impl GraphStore,
freshness: GraphDbFreshnessReport,
warnings: Vec<String>,
) -> Result<GraphDbReport> {
if freshness.fail_closed {
bail!(
"graph database read failed closed for {} backend: {}",
backend,
freshness.diagnostics.join("; ")
);
}
let mut report = GraphDbReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
backend: backend.to_string(),
query: format!("{query:?}"),
freshness,
schema: None,
node: None,
edge: None,
nodes: Vec::new(),
edges: Vec::new(),
ranked_neighbors: Vec::new(),
semantic_related: Vec::new(),
neighborhood_ranking_gate: None,
knowledge_retrieval: None,
path: None,
page: None,
warnings,
};
match query {
GraphDbQuery::Refresh => {
bail!("graph-db refresh must be handled by the refresh command path");
}
GraphDbQuery::Status => {
bail!("graph-db status must be handled by the status command path");
}
GraphDbQuery::Doctor => {
bail!("graph-db doctor must be handled by the doctor command path");
}
GraphDbQuery::Drift => {
bail!("graph-db drift must be handled by the drift command path");
}
GraphDbQuery::Compact { .. } => {
bail!("graph-db compact must be handled by the compact command path");
}
GraphDbQuery::BackendEval { .. } => {
bail!("graph-db backend-eval must be handled by the benchmark command path");
}
GraphDbQuery::Evidence { .. } => {
bail!("graph-db evidence must be handled by the evidence command path");
}
GraphDbQuery::Related {
query,
kind,
depth,
seed_limit,
limit,
} => {
let semantic =
semantic_related_report_from_store(root, scope, &query, seed_limit, kind, store)?;
let SemanticRelatedReport {
items,
warnings: semantic_warnings,
..
} = semantic;
report.warnings.extend(semantic_warnings);
let seed_ids = items
.iter()
.map(|item| item.handle.clone())
.collect::<Vec<_>>();
let subgraph = graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit)?;
let seed_count = seed_ids.len();
report.semantic_related = items;
report.nodes = subgraph.nodes;
report.edges = subgraph.edges;
if let Some(seed_id) = seed_ids.first() {
let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(Some(limit));
report.ranked_neighbors = graph_db_ranked_neighbors(
seed_id,
&report.nodes,
&report.edges,
ranked_neighbor_cap,
);
report.neighborhood_ranking_gate =
Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
}
report.knowledge_retrieval = Some(GraphDbKnowledgeRetrieval {
mode: "semantic_seeded_neighborhood".to_string(),
query,
seed_kind: semantic_related_kind_name(kind).to_string(),
seed_limit,
seed_count,
depth,
limit,
node_count: report.nodes.len(),
edge_count: report.edges.len(),
truncated: subgraph.truncated,
traversal: "incident_plus_outgoing_edges".to_string(),
freshness_boundary:
"semantic rows must come from refreshed summary graph records".to_string(),
privacy_boundary:
"GraphStore stores substrate records only; user consent, deletion policy, persona policy, and LiveKit session state stay in the avatar/agent adapter"
.to_string(),
diagnostics: subgraph.diagnostics,
});
}
GraphDbQuery::Schema => {
report.schema = Some(graph_db_schema());
}
GraphDbQuery::Node { id } => {
report.node = store.node(&id)?;
}
GraphDbQuery::Edge { id } => {
report.edge = store.edge(&id)?;
}
GraphDbQuery::Edges {
edge_kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
let paged = store.paged_edges(
edge_kind.as_deref(),
graph_db_query_options_for_store(&options),
)?;
report.edges = paged.edges;
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
GraphDbQuery::Incident {
id,
edge_kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
let paged = store.paged_incident_edges(
&id,
edge_kind.as_deref(),
graph_db_query_options_for_store(&options),
)?;
report.edges = paged.edges;
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
GraphDbQuery::Kind {
kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
let paged =
store.paged_nodes_by_kind(&kind, graph_db_query_options_for_store(&options))?;
report.nodes = paged.nodes;
report.edges = paged.edges;
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
GraphDbQuery::Neighborhood {
id,
depth,
edge_kind,
cursor,
limit,
property_filters,
} => {
let options = graph_db_query_options(cursor, limit, &property_filters)?;
if let Some(paged) = store.paged_neighborhood(
&id,
depth,
edge_kind.as_deref(),
graph_db_query_options_for_store(&options),
)? {
report.nodes = paged.nodes;
report.edges = paged.edges;
let ranked_neighbor_cap = graph_db_ranked_neighbor_cap(options.limit);
report.ranked_neighbors = graph_db_ranked_neighbors(
&id,
&report.nodes,
&report.edges,
ranked_neighbor_cap,
);
report.neighborhood_ranking_gate =
Some(graph_db_neighborhood_ranking_gate(ranked_neighbor_cap));
report.page = Some(graph_db_page_report_from_store(
paged.page,
options.property_filters,
));
}
}
GraphDbQuery::Path {
from,
to,
edge_kind,
max_hops,
} => {
report.path =
store.shortest_path_with_max_hops(&from, &to, edge_kind.as_deref(), max_hops)?;
if let Some(max_hops) = max_hops
&& report.path.is_none()
{
report.warnings.push(format!(
"no directed path found within --max-hops {}",
max_hops
));
}
}
}
Ok(report)
}
pub(crate) fn print_graph_db_human(report: &GraphDbReport, compact: bool) {
if compact {
println!(
"graph-db backend:{} query:{} nodes:{} edges:{} freshness:{}",
report.backend,
report.query,
report.nodes.len() + usize::from(report.node.is_some()),
report.edges.len() + usize::from(report.edge.is_some()),
report.freshness.status
);
return;
}
println!("graph-db backend: {}", report.backend);
println!("freshness: {}", report.freshness.status);
if let Some(schema) = &report.schema {
println!(
"schema: {} node fields, {} edge fields, {} operations",
schema.node_fields.len(),
schema.edge_fields.len(),
schema.operations.len()
);
}
if let Some(node) = &report.node {
println!("node: {} [{}] {}", node.id, node.kind, node.label);
}
if let Some(edge) = &report.edge {
println!(
"edge: {} {} -{}-> {}",
graph_db_edge_key(edge),
edge.from_id,
edge.kind,
edge.to_id
);
}
if let Some(knowledge) = &report.knowledge_retrieval {
println!(
"knowledge_retrieval: {} seeds:{} depth:{} traversal:{}",
knowledge.mode, knowledge.seed_count, knowledge.depth, knowledge.traversal
);
}
for item in &report.semantic_related {
println!(
"semantic_seed: {:.3} [{}] {} ({})",
item.score, item.kind, item.label, item.handle
);
}
for node in &report.nodes {
println!("node: {} [{}] {}", node.id, node.kind, node.label);
}
for edge in &report.edges {
println!(
"edge: {} {} -{}-> {}",
graph_db_edge_key(edge),
edge.from_id,
edge.kind,
edge.to_id
);
}
for neighbor in &report.ranked_neighbors {
println!(
"ranked_neighbor: #{} score:{} depth:{} {} [{}] {}",
neighbor.rank,
neighbor.score,
neighbor
.depth
.map(|depth| depth.to_string())
.unwrap_or_else(|| "unknown".to_string()),
neighbor.node_id,
neighbor.kind,
neighbor.label
);
}
if let Some(gate) = &report.neighborhood_ranking_gate {
println!(
"neighborhood_ranking_gate: {} default_order:{} ranked_output_default:{}",
gate.status, gate.default_order, gate.ranked_output_default
);
}
if let Some(path) = &report.path {
println!("path: {} hop(s) {}", path.hops, path.nodes.join(" -> "));
}
if let Some(page) = &report.page {
if let Some(next_cursor) = &page.next_cursor {
println!("next_cursor: {next_cursor}");
}
for diagnostic in &page.diagnostics {
println!("page: {diagnostic}");
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
}
pub(crate) fn graph_db_backend_eval_phase_timing(
name: &str,
duration_micros: u128,
detail: &str,
) -> GraphDbBackendEvalPhaseTiming {
GraphDbBackendEvalPhaseTiming {
name: name.to_string(),
duration_micros,
detail: detail.to_string(),
}
}
pub(crate) fn graph_db_backend_eval_timed_phase<T>(
phases: &mut Vec<GraphDbBackendEvalPhaseTiming>,
name: &str,
detail: &str,
run: impl FnOnce() -> Result<T>,
) -> Result<T> {
let started = Instant::now();
let result = run();
phases.push(graph_db_backend_eval_phase_timing(
name,
started.elapsed().as_micros(),
detail,
));
result
}
pub(crate) fn graph_db_backend_eval_refresh_total_micros(
phases: &[GraphDbBackendEvalPhaseTiming],
) -> u128 {
phases
.iter()
.filter(|phase| phase.name != "conflict_matrix_preparation")
.map(|phase| phase.duration_micros)
.sum()
}
pub(crate) fn graph_db_backend_eval_cached_refresh(
root: &Path,
scope: Option<&str>,
source_watermark: Option<&str>,
) -> Result<
Option<(
TraversalGraphBuild,
SqliteProjectionRefresh,
Vec<GraphDbBackendEvalPhaseTiming>,
)>,
> {
let Some(source_watermark) = source_watermark else {
return Ok(None);
};
let graph_db = graph_substrate_db_path(root, scope);
if !graph_db.exists() {
return Ok(None);
}
let started = Instant::now();
let store = match SqliteGraphStore::open_read_only_resilient(&graph_db) {
Ok(store) => store,
Err(_) => return Ok(None),
};
if store.has_user_triggers().unwrap_or(true) {
return Ok(None);
}
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
if freshness.fail_closed || freshness.source_watermark.as_deref() != Some(source_watermark) {
return Ok(None);
}
let phases = vec![
graph_db_backend_eval_phase_timing(
"source_graph_build",
started.elapsed().as_micros(),
"reused current graph.db projection because the source watermark matched; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
),
graph_db_backend_eval_phase_timing(
"projection_rows",
0,
"reused cached provider-neutral projection rows from graph.db",
),
graph_db_backend_eval_phase_timing(
"sqlite_open",
0,
"reused existing graph.db projection without opening a write transaction",
),
];
let refresh = SqliteProjectionRefresh {
scope: scope.unwrap_or("root").to_string(),
projection_version: freshness
.projection_version
.unwrap_or_else(|| GRAPH_PROJECTION_VERSION.to_string()),
source_watermark: Some(source_watermark.to_string()),
tombstoned_nodes: Vec::new(),
tombstoned_edges: Vec::new(),
upserted_nodes: 0,
upserted_edges: 0,
unchanged_nodes: 0,
unchanged_edges: 0,
upserted_properties: 0,
unchanged_properties: 0,
deleted_properties: 0,
deleted_nodes: 0,
deleted_edges: 0,
pruned_tombstones: 0,
file_size_bytes_before: None,
file_size_bytes_after: None,
phase_timings: Vec::new(),
};
Ok(Some((TraversalGraphBuild::default(), refresh, phases)))
}
pub(crate) fn graph_db_backend_eval_reused_cached_projection(
phases: &[GraphDbBackendEvalPhaseTiming],
) -> bool {
phases.iter().any(|phase| {
phase.name == "source_graph_build"
&& phase.detail.contains("reused current graph.db projection")
})
}
pub(crate) fn graph_db_backend_eval_update_source_watermark(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<()> {
let Some(source_watermark) = traversal_source_watermark(root, path_hint, scope, false)? else {
return Ok(());
};
let graph_db = graph_substrate_db_path(root, scope);
let mut store = SqliteGraphStore::open(&graph_db)?;
store.update_projection_source_watermark(scope.unwrap_or("root"), Some(source_watermark))?;
Ok(())
}
pub(crate) fn graph_db_backend_eval_refresh_with_profile(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<(
TraversalGraphBuild,
SqliteProjectionRefresh,
Vec<GraphDbBackendEvalPhaseTiming>,
)> {
let source_watermark = traversal_source_watermark(root, path_hint, scope, false)?;
if let Some(cached) =
graph_db_backend_eval_cached_refresh(root, scope, source_watermark.as_deref())?
{
return Ok(cached);
}
let mut phases = Vec::new();
let source_graph_detail = if hinted_markdown_file(root, path_hint).is_some() {
"bounded session projection: index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads; skips global call-edge materialization because full-projection is the complete-call-graph regression guard"
} else {
"index/source loading plus agent-doc session markdown scan, source-handle construction, and semantic summary reads when summaries are cached"
};
let source_graph = graph_db_backend_eval_timed_phase(
&mut phases,
"source_graph_build",
source_graph_detail,
|| build_traversal_graph_source_with_options(root, path_hint, scope, false),
)?;
let projection = graph_db_backend_eval_timed_phase(
&mut phases,
"projection_rows",
"provider-neutral GraphStore node/edge row construction before SQLite persistence",
|| traversal_projection_from_graph(root, scope, &source_graph),
)?;
let graph_db = graph_substrate_db_path(root, scope);
let mut store = graph_db_backend_eval_timed_phase(
&mut phases,
"sqlite_open",
"open the local SQLite graph.db with WAL and busy-timeout settings",
|| SqliteGraphStore::open(&graph_db),
)?;
let refreshed_source_watermark = traversal_source_watermark(root, path_hint, scope, false)
.ok()
.flatten();
let refresh = store.replace_projection_with_version(
scope.unwrap_or("root"),
&projection,
Some(GRAPH_PROJECTION_VERSION),
refreshed_source_watermark
.or(source_watermark)
.or_else(|| graph_projection_content_hash(&projection)),
)?;
phases.extend(
refresh
.phase_timings
.iter()
.map(|phase| GraphDbBackendEvalPhaseTiming {
name: phase.name.clone(),
duration_micros: phase.duration_micros,
detail: phase.detail.clone(),
}),
);
Ok((source_graph, refresh, phases))
}
fn graph_db_backend_eval_disk_cache_dir(root: &Path) -> PathBuf {
root.join(".tsift/backend-eval-cache")
}
fn graph_db_backend_eval_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
graph_db_backend_eval_disk_cache_dir(root)
.join(kind)
.join(format!("{key}.json.gz"))
}
fn graph_db_backend_eval_legacy_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
graph_db_backend_eval_disk_cache_dir(root)
.join(kind)
.join(format!("{key}.json"))
}
#[derive(Default, Clone)]
struct GraphDbBackendEvalDiskCacheReadProfile {
file_read_micros: u128,
gzip_decode_micros: u128,
serde_decode_micros: u128,
legacy: bool,
}
fn graph_db_backend_eval_read_disk_cache<T: for<'de> Deserialize<'de>>(
root: &Path,
kind: &str,
key: &str,
) -> Option<(T, u64, u64, GraphDbBackendEvalDiskCacheReadProfile)> {
let mut profile = GraphDbBackendEvalDiskCacheReadProfile::default();
let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
let read_started = Instant::now();
let read_result = fs::read(&path);
profile.file_read_micros = read_started.elapsed().as_micros();
if let Ok(bytes) = read_result {
let decode_started = Instant::now();
let mut decoder = GzDecoder::new(bytes.as_slice());
let mut decoded = Vec::new();
let decode_ok = decoder.read_to_end(&mut decoded).is_ok();
profile.gzip_decode_micros = decode_started.elapsed().as_micros();
if decode_ok {
let serde_started = Instant::now();
let parsed: Option<T> = serde_json::from_slice(&decoded).ok();
profile.serde_decode_micros = serde_started.elapsed().as_micros();
if let Some(value) = parsed {
return Some((value, bytes.len() as u64, decoded.len() as u64, profile));
}
}
}
let legacy_path = graph_db_backend_eval_legacy_disk_cache_path(root, kind, key);
let legacy_started = Instant::now();
let bytes = fs::read(legacy_path).ok()?;
profile.file_read_micros = profile
.file_read_micros
.saturating_add(legacy_started.elapsed().as_micros());
let serde_started = Instant::now();
let value = serde_json::from_slice(&bytes).ok()?;
profile.serde_decode_micros = profile
.serde_decode_micros
.saturating_add(serde_started.elapsed().as_micros());
profile.legacy = true;
Some((value, bytes.len() as u64, bytes.len() as u64, profile))
}
#[derive(Default, Clone)]
struct GraphDbBackendEvalDiskCacheWriteProfile {
serde_encode_micros: u128,
gzip_encode_micros: u128,
file_write_micros: u128,
}
fn graph_db_backend_eval_write_disk_cache<T: Serialize>(
root: &Path,
kind: &str,
key: &str,
value: &T,
) -> Option<(u64, u64, GraphDbBackendEvalDiskCacheWriteProfile)> {
let mut profile = GraphDbBackendEvalDiskCacheWriteProfile::default();
let path = graph_db_backend_eval_disk_cache_path(root, kind, key);
let parent = path.parent()?;
if fs::create_dir_all(parent).is_err() {
return None;
}
let serde_started = Instant::now();
let bytes = serde_json::to_vec(value).ok()?;
profile.serde_encode_micros = serde_started.elapsed().as_micros();
let gzip_started = Instant::now();
let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
if encoder.write_all(&bytes).is_err() {
return None;
}
let encoded = encoder.finish().ok()?;
profile.gzip_encode_micros = gzip_started.elapsed().as_micros();
let write_started = Instant::now();
if fs::write(&path, &encoded).is_err() {
return None;
}
profile.file_write_micros = write_started.elapsed().as_micros();
Some((encoded.len() as u64, bytes.len() as u64, profile))
}
fn graph_db_backend_eval_prune_disk_cache(root: &Path, kind: &str, keep_key: &str) -> (usize, u64) {
let dir = graph_db_backend_eval_disk_cache_dir(root).join(kind);
let Ok(entries) = fs::read_dir(dir) else {
return (0, 0);
};
let keep_name = format!("{keep_key}.json.gz");
let mut pruned_files = 0usize;
let mut pruned_bytes = 0u64;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
continue;
};
if name == keep_name {
continue;
}
let is_backend_eval_cache = name.ends_with(".json") || name.ends_with(".json.gz");
if !is_backend_eval_cache {
continue;
}
let bytes = entry.metadata().map(|metadata| metadata.len()).unwrap_or(0);
if fs::remove_file(&path).is_ok() {
pruned_files += 1;
pruned_bytes += bytes;
}
}
(pruned_files, pruned_bytes)
}
fn graph_db_backend_eval_full_projection_raw_watermark_rows(
root: &Path,
source_root: &Path,
) -> Result<Vec<GraphDbBackendEvalRawSourceWatermarkRow>> {
let mut rows = Vec::new();
let mut entries = walk::walk_files(source_root)?;
entries.sort_by(|left, right| left.path.cmp(&right.path));
for entry in entries {
if traversal_path_is_generated_artifact(root, source_root, &entry.path) {
continue;
}
let bytes = fs::read(&entry.path)
.with_context(|| format!("reading source input {}", entry.path.display()))?;
rows.push(GraphDbBackendEvalRawSourceWatermarkRow {
path: traversal_watermark_path(root, &entry.path),
bytes: bytes.len() as u64,
content_hash: content_hash(&bytes)?,
});
}
Ok(rows)
}
fn graph_db_backend_eval_full_projection_source_watermark(
root: &Path,
scope: Option<&str>,
) -> Result<GraphDbBackendEvalFullProjectionSourceWatermark> {
let path_hint = root;
let mut detail_parts = Vec::new();
let mut parts = vec![
format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
format!("cache_version:{GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION}"),
"watermark_kind:stable_full_projection_inputs".to_string(),
format!("scope:{}", scope.unwrap_or("root")),
format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
];
let gate = prepare_agent_doc_index_gate(root, path_hint, scope, "full-projection cache key");
match gate.db_path.as_ref().filter(|db_path| db_path.exists()) {
Some(db_path) => {
let db = index::IndexDb::open_read_only_resilient(db_path)?;
parts.push("index_mode:indexed".to_string());
detail_parts.push("mode=indexed".to_string());
parts.push(format!(
"index_source_root:{}",
traversal_watermark_path(root, &gate.source_root)
));
let symbols = db
.all_symbols()?
.into_iter()
.filter(|symbol| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&symbol.file),
)
})
.collect::<Vec<_>>();
let symbols_hash = content_hash(&symbols)?;
detail_parts.push(format!("symbols={symbols_hash}"));
parts.push(format!("index_symbols:{symbols_hash}"));
let edges = db
.all_stored_edges()?
.into_iter()
.filter(|edge| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&edge.caller_file),
)
})
.collect::<Vec<_>>();
let edges_hash = content_hash(&edges)?;
detail_parts.push(format!("call_edges={edges_hash}"));
parts.push(format!("index_call_edges:{edges_hash}"));
let routes = db
.all_routes()?
.into_iter()
.filter(|route| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&route.file),
)
})
.collect::<Vec<_>>();
let routes_hash = content_hash(&routes)?;
detail_parts.push(format!("routes={routes_hash}"));
parts.push(format!("index_routes:{routes_hash}"));
}
None => {
parts.push("index_mode:raw_fallback".to_string());
detail_parts.push("mode=raw_fallback".to_string());
parts.push(format!(
"raw_source_root:{}",
traversal_watermark_path(root, &gate.source_root)
));
let raw_rows =
graph_db_backend_eval_full_projection_raw_watermark_rows(root, &gate.source_root)?;
let raw_hash = content_hash(&raw_rows)?;
detail_parts.push(format!("raw_source_files={raw_hash}"));
parts.push(format!("raw_source_files:{raw_hash}"));
}
}
parts.push("agent_doc_session_markdown:bounded_real_dataset_only".to_string());
detail_parts.push("session_markdown=bounded_real_dataset_only".to_string());
let summaries_start = parts.len();
push_traversal_summaries_watermark_part(root, &mut parts)?;
let summaries_hash = content_hash(&parts[summaries_start..].to_vec())?;
detail_parts.push(format!("summaries={summaries_hash}"));
let value = content_hash(&parts)?;
detail_parts.push(format!("watermark={value}"));
Ok(GraphDbBackendEvalFullProjectionSourceWatermark {
value,
detail: detail_parts.join(" "),
})
}
fn graph_db_backend_eval_full_projection_cache_key(
root: &Path,
scope: Option<&str>,
) -> Result<(String, String, String)> {
let source_watermark = graph_db_backend_eval_full_projection_source_watermark(root, scope)?;
let key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
root,
scope,
&source_watermark.value,
)?;
Ok((source_watermark.value, key, source_watermark.detail))
}
fn graph_db_backend_eval_full_projection_cache_key_for_watermark(
root: &Path,
scope: Option<&str>,
source_watermark: &str,
) -> Result<String> {
content_hash(&serde_json::json!({
"version": GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION,
"root": root.display().to_string(),
"scope": scope.unwrap_or("root"),
"source_watermark": source_watermark,
}))
}
pub(crate) fn graph_db_backend_eval_full_projection_with_profile(
root: &Path,
scope: Option<&str>,
) -> Result<(
GraphProjection,
Vec<String>,
Vec<GraphDbBackendEvalPhaseTiming>,
GraphDbBackendEvalFullProjectionCacheStats,
)> {
let (source_watermark, key, source_watermark_detail) =
graph_db_backend_eval_full_projection_cache_key(root, scope)?;
let lookup_started = Instant::now();
if let Some((cached, disk_bytes, json_bytes, read_profile)) =
graph_db_backend_eval_read_disk_cache::<GraphDbBackendEvalFullProjectionCache>(
root,
"full_projection",
&key,
)
&& cached.version == GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION
&& cached.key == key
&& cached.source_watermark == source_watermark
{
let lookup_overhead_micros = lookup_started
.elapsed()
.as_micros()
.saturating_sub(read_profile.file_read_micros)
.saturating_sub(read_profile.gzip_decode_micros)
.saturating_sub(read_profile.serde_decode_micros);
let prune_started = Instant::now();
let (pruned_files, pruned_bytes) =
graph_db_backend_eval_prune_disk_cache(root, "full_projection", &key);
let prune_micros = prune_started.elapsed().as_micros();
let cache_stats = GraphDbBackendEvalFullProjectionCacheStats {
hit: true,
disk_bytes,
json_bytes,
pruned_files,
pruned_bytes,
};
let read_detail_suffix = if read_profile.legacy {
" (legacy uncompressed cache path)"
} else {
""
};
return Ok((
cached.projection,
cached.warnings,
vec![
graph_db_backend_eval_phase_timing(
"full_projection.cache_lookup",
lookup_overhead_micros,
&format!(
"watermark/version check overhead around the cache load phases; {source_watermark_detail}"
),
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.file_read",
read_profile.file_read_micros,
&format!(
"read compressed cache bytes from .tsift/backend-eval-cache{read_detail_suffix}"
),
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.gzip_decode",
read_profile.gzip_decode_micros,
"gunzip the compressed projection cache bytes",
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.serde_decode",
read_profile.serde_decode_micros,
"serde_json deserialize the decoded projection cache payload",
),
graph_db_backend_eval_phase_timing(
"full_projection.cache.prune",
prune_micros,
"prune sibling cache files older than the current key",
),
graph_db_backend_eval_phase_timing(
"full_projection.source_graph_build",
0,
"reused cached full-project source graph; skipped code-index loading, session markdown scanning, source-handle construction, and semantic summary reads",
),
graph_db_backend_eval_phase_timing(
"full_projection.projection_rows",
0,
"reused cached provider-neutral full-project projection rows",
),
],
cache_stats,
));
}
let mut cache_stats = GraphDbBackendEvalFullProjectionCacheStats::default();
let mut phases = vec![graph_db_backend_eval_phase_timing(
"full_projection.cache_lookup",
lookup_started.elapsed().as_micros(),
&format!(
"no full-project projection cache entry matched the source watermark; {source_watermark_detail}"
),
)];
let full_source = graph_db_backend_eval_timed_phase(
&mut phases,
"full_projection.source_graph_build",
"opt-in full-project source graph build; uses the project root as the path hint so bounded session projections cannot hide full-graph regressions",
|| build_traversal_graph_source_with_options(root, root, scope, false),
)?;
let projection = graph_db_backend_eval_timed_phase(
&mut phases,
"full_projection.projection_rows",
"provider-neutral row construction for the opt-in full-project projection dataset",
|| traversal_projection_from_graph(root, scope, &full_source),
)?;
let warnings = full_source.warnings;
let refreshed_source_watermark =
graph_db_backend_eval_full_projection_source_watermark(root, scope)
.map(|watermark| watermark.value)
.unwrap_or_else(|_| source_watermark.clone());
let write_key = graph_db_backend_eval_full_projection_cache_key_for_watermark(
root,
scope,
&refreshed_source_watermark,
)?;
let cache = GraphDbBackendEvalFullProjectionCache {
version: GRAPH_DB_BACKEND_EVAL_FULL_PROJECTION_CACHE_VERSION.to_string(),
key: write_key.clone(),
source_watermark: refreshed_source_watermark,
projection: projection.clone(),
warnings: warnings.clone(),
};
if let Some((disk_bytes, json_bytes, write_profile)) =
graph_db_backend_eval_write_disk_cache(root, "full_projection", &write_key, &cache)
{
cache_stats.disk_bytes = disk_bytes;
cache_stats.json_bytes = json_bytes;
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.serde_encode",
write_profile.serde_encode_micros,
"serde_json serialize the projection cache payload before compression",
));
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.gzip_encode",
write_profile.gzip_encode_micros,
"gzip-compress the serialized projection cache payload",
));
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.file_write",
write_profile.file_write_micros,
"write the compressed projection cache bytes to .tsift/backend-eval-cache",
));
}
let prune_started = Instant::now();
let (pruned_files, pruned_bytes) =
graph_db_backend_eval_prune_disk_cache(root, "full_projection", &write_key);
phases.push(graph_db_backend_eval_phase_timing(
"full_projection.cache.prune",
prune_started.elapsed().as_micros(),
"prune sibling cache files older than the current key",
));
cache_stats.pruned_files = pruned_files;
cache_stats.pruned_bytes = pruned_bytes;
Ok((projection, warnings, phases, cache_stats))
}
fn graph_db_backend_eval_timed(
name: &str,
run: impl FnOnce() -> Result<(Option<usize>, serde_json::Value)>,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
let started = Instant::now();
match run() {
Ok((rows, value)) => (
GraphDbBackendEvalOperation {
name: name.to_string(),
supported: true,
status: "ok".to_string(),
duration_micros: started.elapsed().as_micros(),
rows,
error: None,
},
Some(GraphDbBackendEvalSignature {
operation: name.to_string(),
value,
}),
),
Err(err) => (
GraphDbBackendEvalOperation {
name: name.to_string(),
supported: false,
status: "error".to_string(),
duration_micros: started.elapsed().as_micros(),
rows: None,
error: Some(format!("{err:#}")),
},
None,
),
}
}
fn graph_db_backend_eval_parity(
sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
candidate_signatures: &[GraphDbBackendEvalSignature],
) -> GraphDbBackendEvalParity {
let Some(sqlite_signatures) = sqlite_signatures else {
return GraphDbBackendEvalParity {
matches_sqlite: true,
diagnostics: Vec::new(),
};
};
let sqlite = sqlite_signatures
.iter()
.map(|signature| (signature.operation.as_str(), &signature.value))
.collect::<BTreeMap<_, _>>();
let candidate = candidate_signatures
.iter()
.map(|signature| (signature.operation.as_str(), &signature.value))
.collect::<BTreeMap<_, _>>();
let mut diagnostics = Vec::new();
for (operation, sqlite_value) in sqlite {
match candidate.get(operation) {
Some(candidate_value) if *candidate_value == sqlite_value => {}
Some(_) => diagnostics.push(format!("{operation} output differed from SQLite")),
None => diagnostics.push(format!(
"{operation} did not complete for candidate backend"
)),
}
}
GraphDbBackendEvalParity {
matches_sqlite: diagnostics.is_empty(),
diagnostics,
}
}
pub(crate) fn graph_db_backend_eval_targets(
store: &impl GraphStore,
requested: &[String],
) -> Result<Vec<String>> {
let requested = requested
.iter()
.filter_map(|target| normalize_conflict_target(target))
.collect::<Vec<_>>();
if !requested.is_empty() {
return Ok(requested);
}
for kind in ["backlog", "job_packet"] {
let nodes = store.nodes_by_kind(kind)?;
if let Some(node) = nodes.first() {
if let Some(ref_id) = node.properties.get("ref_id") {
return Ok(vec![ref_id.clone()]);
}
return Ok(vec![node.id.clone()]);
}
}
Ok(Vec::new())
}
fn graph_db_backend_eval_path_targets(
store: &impl GraphStore,
max_hops: usize,
) -> Result<Option<(String, String, usize)>> {
let synthetic_from = "gsym-synthetic-0000";
let synthetic_to = format!("gsym-synthetic-{max_hops:04}");
if store.node(synthetic_from)?.is_some() && store.node(&synthetic_to)?.is_some() {
let outgoing = store.outgoing_edges(synthetic_from, None)?;
if outgoing.len() > 1
&& let Some(edge) = outgoing.first()
{
return Ok(Some((
edge.from_id.clone(),
edge.to_id.clone(),
GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
)));
}
return Ok(Some((synthetic_from.to_string(), synthetic_to, max_hops)));
}
Ok(store.sample_edge(None)?.map(|edge| {
(
edge.from_id,
edge.to_id,
GRAPH_DB_BACKEND_EVAL_DIRECT_PATH_HOPS,
)
}))
}
fn graph_db_backend_eval_path_operation<S: GraphStore>(
store: &S,
configured_max_hops: usize,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
let operation_name = if configured_max_hops == GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
"path_max_hops".to_string()
} else {
format!("path_max_hops_{configured_max_hops}")
};
graph_db_backend_eval_timed(&operation_name, || {
let (from, to, effective_max_hops) =
graph_db_backend_eval_path_targets(store, configured_max_hops)?
.context("backend-eval path probe requires at least one traversable edge")?;
let path = store.shortest_path_with_max_hops(&from, &to, None, Some(effective_max_hops))?;
let warning = if configured_max_hops > GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS {
Some(format!(
"{configured_max_hops}-hop tier is measured only; keep user-facing defaults at {} until repeated samples and SQLite query-plan checks pass",
GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS
))
} else if path.is_none() && effective_max_hops == configured_max_hops {
Some(format!(
"path probe truncated at {configured_max_hops} hops before a route was found"
))
} else {
None
};
Ok((
path.as_ref().map(|path| path.nodes.len()),
serde_json::json!({
"from": from,
"to": to,
"configured_max_hops": configured_max_hops,
"effective_max_hops": effective_max_hops,
"hops": path.as_ref().map(|path| path.hops),
"nodes": path.as_ref().map(|path| &path.nodes),
"found": path.is_some(),
"warning": warning,
}),
))
})
}
fn graph_db_backend_eval_neighborhood_operation<S: GraphStore>(
store: &S,
depth: usize,
limit: usize,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
graph_db_backend_eval_timed("neighborhood", || {
let edge = match store.sample_edge(Some("calls"))? {
Some(edge) => edge,
None => store.sample_edge(None)?.context(
"backend-eval neighborhood probe requires at least one traversable edge",
)?,
};
let page = store
.paged_neighborhood(
&edge.from_id,
depth,
Some(&edge.kind),
GraphQueryOptions {
limit: Some(limit.max(1)),
..GraphQueryOptions::default()
},
)?
.with_context(|| {
format!(
"backend-eval neighborhood target not found: {}",
edge.from_id
)
})?;
Ok((
Some(page.nodes.len() + page.edges.len()),
serde_json::json!({
"center": edge.from_id,
"kind": edge.kind,
"depth": depth,
"limit": limit.max(1),
"node_ids": page.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
"edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": page.page.truncated,
}),
))
})
}
fn graph_db_backend_eval_related_operation<S: GraphStore>(
root: &Path,
scope: Option<&str>,
store: &S,
depth: usize,
limit: usize,
) -> (
GraphDbBackendEvalOperation,
Option<GraphDbBackendEvalSignature>,
) {
graph_db_backend_eval_timed("related", || {
let query = "backend evaluation";
let semantic = semantic_related_report_from_store(
root,
scope,
query,
3,
SemanticRelatedKind::All,
store,
)?;
let seed_ids = semantic
.items
.iter()
.map(|item| item.handle.clone())
.collect::<Vec<_>>();
let subgraph =
graph_db_semantic_seeded_neighborhood(store, &seed_ids, depth, limit.max(1))?;
Ok((
Some(subgraph.nodes.len() + subgraph.edges.len()),
serde_json::json!({
"query": query,
"seed_ids": seed_ids,
"node_ids": subgraph.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
"edge_ids": subgraph.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": subgraph.truncated,
"warnings": semantic.warnings,
"diagnostics": subgraph.diagnostics,
}),
))
})
}
fn graph_db_backend_eval_evidence_signature(report: &GraphDbEvidenceReport) -> serde_json::Value {
serde_json::json!({
"target": report.target,
"target_node_id": report.target_node.id,
"target_kind": report.target_node.kind,
"worker_context": report.worker_context.iter().map(|node| &node.id).collect::<Vec<_>>(),
"source_handles": report.source_handles.iter().map(|node| &node.id).collect::<Vec<_>>(),
"worker_results": report.worker_results.iter().map(|node| &node.id).collect::<Vec<_>>(),
"semantic_related": report.semantic_related.iter().map(|node| &node.id).collect::<Vec<_>>(),
"path_count": report.shortest_paths.len(),
})
}
fn graph_db_backend_eval_target_resolution_signature(
resolved: &[(String, SubstrateGraphNode)],
) -> serde_json::Value {
serde_json::json!({
"targets": resolved.iter().map(|(target, node)| {
serde_json::json!({
"target": target,
"target_node_id": node.id,
"target_kind": node.kind,
"target_label": node.label,
})
}).collect::<Vec<_>>(),
})
}
fn graph_db_backend_eval_conflict_signature(report: &ConflictMatrixReport) -> serde_json::Value {
serde_json::json!({
"targets": report.targets,
"can_parallel": report.can_parallel,
"fail_closed": report.fail_closed,
"cross_target_parallel_safe": report.cross_target_parallel_safe,
"per_target_fail_closed": report.per_target_fail_closed.iter().map(|target| &target.target).collect::<Vec<_>>(),
"candidates": report.candidates.iter().map(|candidate| {
serde_json::json!({
"target": candidate.target,
"risk": conflict_risk_label(candidate.risk),
"owned_files": candidate.owned_files,
"owned_symbols": candidate.owned_symbols,
"source_handles": candidate.source_handles.iter().map(|handle| &handle.handle).collect::<Vec<_>>(),
"previously_completed": candidate.previously_completed,
"parallel_safe": candidate.parallel_safe,
})
}).collect::<Vec<_>>(),
"conflicts": report.conflicts.iter().map(|pair| {
serde_json::json!({
"left": pair.left,
"right": pair.right,
"risk": conflict_risk_label(pair.risk),
})
}).collect::<Vec<_>>(),
})
}
fn graph_db_backend_eval_dispatch_signature(report: &DispatchTraceReport) -> serde_json::Value {
serde_json::json!({
"targets": report.targets,
"node_ids": report.nodes.iter().map(|node| &node.id).collect::<Vec<_>>(),
"edge_keys": report.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"evidence_packet_ids": report.evidence_packet_ids,
"worker_prompt_targets": report.worker_prompt_packets.iter().map(|packet| &packet.target).collect::<Vec<_>>(),
"truncated": report.truncated,
})
}
fn graph_db_backend_eval_edge_scan_probe(
store: &impl GraphStore,
) -> Result<(SubstrateGraphEdge, Vec<GraphPropertyFilter>)> {
if let Some((edge, filter)) = store.sample_edge_with_property()? {
return Ok((edge, vec![filter]));
}
let edge = store
.sample_edge(None)?
.context("backend-eval edge scan requires at least one edge")?;
Ok((edge, Vec::new()))
}
#[allow(clippy::too_many_arguments)]
fn graph_db_backend_eval_report_for_store<S: GraphStore>(
backend: &str,
adapter: &str,
read_only: bool,
root: &Path,
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
store: &S,
freshness: GraphDbFreshnessReport,
refresh_operation: GraphDbBackendEvalOperation,
refresh_signature: Option<GraphDbBackendEvalSignature>,
sqlite_signatures: Option<&[GraphDbBackendEvalSignature]>,
extra_warnings: Vec<String>,
prepared: &ConflictMatrixPreparedInputs,
projection_load: &str,
lock_behavior: &str,
install_portability: &str,
) -> (
GraphDbBackendEvalBackendReport,
Vec<GraphDbBackendEvalSignature>,
) {
let mut operations = vec![refresh_operation];
let mut signatures = refresh_signature.into_iter().collect::<Vec<_>>();
let (operation, signature) = graph_db_backend_eval_timed("status", || {
let (nodes, edges) = store.graph_counts()?;
Ok((
Some(nodes + edges),
serde_json::json!({
"freshness": freshness.status,
"nodes": nodes,
"edges": edges,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("edge_lookup", || {
let edge = store
.sample_edge(None)?
.context("backend-eval edge lookup requires at least one edge")?;
let edge_id = graph_db_edge_key(&edge);
let found = store
.edge(&edge_id)?
.with_context(|| format!("backend-eval edge lookup missed {edge_id}"))?;
Ok((
Some(1),
serde_json::json!({
"edge_id": edge_id,
"from_id": found.from_id,
"to_id": found.to_id,
"kind": found.kind,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("edge_property_scan", || {
let (edge, filters) = graph_db_backend_eval_edge_scan_probe(store)?;
let page = store.paged_edges(
Some(&edge.kind),
GraphQueryOptions {
limit: Some(limit.max(1)),
property_filters: filters.clone(),
..GraphQueryOptions::default()
},
)?;
Ok((
Some(page.edges.len()),
serde_json::json!({
"kind": edge.kind,
"filters": filters.iter().map(|filter| format!("{}={}", filter.key, filter.value)).collect::<Vec<_>>(),
"edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": page.page.truncated,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("incident_edges", || {
let edge = store
.sample_edge(None)?
.context("backend-eval incident edge scan requires at least one edge")?;
let page = store.paged_incident_edges(
&edge.from_id,
Some(&edge.kind),
GraphQueryOptions {
limit: Some(limit.max(1)),
..GraphQueryOptions::default()
},
)?;
Ok((
Some(page.edges.len()),
serde_json::json!({
"node_id": edge.from_id,
"kind": edge.kind,
"edge_ids": page.edges.iter().map(graph_db_edge_key).collect::<Vec<_>>(),
"truncated": page.page.truncated,
}),
))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_neighborhood_operation(store, depth, limit);
operations.push(operation);
signatures.extend(signature);
let (operation, signature) =
graph_db_backend_eval_related_operation(root, scope, store, depth, limit);
operations.push(operation);
signatures.extend(signature);
for configured_max_hops in std::iter::once(GRAPH_DB_BACKEND_EVAL_PATH_MAX_HOPS)
.chain(GRAPH_DB_BACKEND_EVAL_EXTENDED_PATH_HOPS)
{
let (operation, signature) =
graph_db_backend_eval_path_operation(store, configured_max_hops);
operations.push(operation);
signatures.extend(signature);
}
let (operation, signature) = graph_db_backend_eval_timed("evidence_target_resolution", || {
let resolved = targets
.iter()
.map(|target| {
let node = graph_db_resolve_evidence_target(store, target)?
.with_context(|| format!("backend-eval target not found: {target}"))?;
Ok((target.clone(), node))
})
.collect::<Result<Vec<_>>>()?;
let signature = graph_db_backend_eval_target_resolution_signature(&resolved);
Ok((Some(resolved.len()), signature))
});
operations.push(operation);
signatures.extend(signature);
let mut evidence_for_report = None;
let mut graph_snapshot_for_trace = None;
let (operation, signature) = graph_db_backend_eval_timed("evidence", || {
let resolved_targets =
resolve_conflict_matrix_targets(store, targets, &prepared.context_pack)?;
let evidence = collect_conflict_matrix_evidence_packets(
root,
scope,
backend,
&resolved_targets,
depth,
limit,
store,
freshness.clone(),
)?;
let report = &evidence
.first()
.context("backend-eval evidence requires at least one target")?
.report;
let rows = evidence
.iter()
.map(|entry| {
entry.report.worker_context.len()
+ entry.report.source_handles.len()
+ entry.report.worker_results.len()
+ entry.report.semantic_related.len()
})
.sum();
let signature = graph_db_backend_eval_evidence_signature(report);
evidence_for_report = Some((resolved_targets, evidence));
Ok((Some(rows), signature))
});
operations.push(operation);
signatures.extend(signature);
let mut conflict_for_trace = None;
let (operation, signature) = graph_db_backend_eval_timed("conflict_matrix", || {
let graph_prepared = if let Some((targets, evidence)) = evidence_for_report.take() {
let graph =
conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
let shared_preparation =
conflict_matrix_shared_preparation_summary(&graph, &evidence, "memory_reuse");
ConflictMatrixGraphPreparedInputs {
targets,
graph,
evidence,
shared_preparation,
}
} else {
prepare_conflict_matrix_graph_orchestration(
root,
scope,
backend,
targets,
prepared,
depth,
limit,
store,
freshness.clone(),
)?
};
let report = build_conflict_matrix_report_from_prepared_graph(
root,
path,
scope,
depth,
limit,
impact_limit,
freshness.clone(),
extra_warnings.clone(),
prepared,
&graph_prepared,
)?;
let signature = graph_db_backend_eval_conflict_signature(&report);
let rows = report.candidates.len() + report.conflicts.len();
conflict_for_trace = Some(report);
graph_snapshot_for_trace = Some(graph_prepared.graph);
Ok((Some(rows), signature))
});
operations.push(operation);
signatures.extend(signature);
let (operation, signature) = graph_db_backend_eval_timed("dispatch_trace", || {
let conflict = conflict_for_trace
.take()
.context("backend-eval dispatch-trace requires a completed conflict-matrix report")?;
let graph = graph_snapshot_for_trace
.take()
.context("backend-eval dispatch-trace requires conflict-matrix graph preparation")?;
let report = build_dispatch_trace_report_from_conflict_snapshot(
root,
scope,
conflict,
graph.nodes,
graph.edges,
depth,
limit,
Vec::new(),
)?;
Ok((
Some(report.nodes.len() + report.edges.len()),
graph_db_backend_eval_dispatch_signature(&report),
))
});
operations.push(operation);
signatures.extend(signature);
let total_micros = operations
.iter()
.map(|operation| operation.duration_micros)
.sum();
let parity = graph_db_backend_eval_parity(sqlite_signatures, &signatures);
(
GraphDbBackendEvalBackendReport {
backend: backend.to_string(),
adapter: adapter.to_string(),
read_only,
projection_load: projection_load.to_string(),
operations,
total_micros,
parity,
lock_behavior: lock_behavior.to_string(),
install_portability: install_portability.to_string(),
},
signatures,
)
}
pub(crate) fn graph_db_backend_eval_refresh_operation(
duration_micros: u128,
rows: usize,
value: serde_json::Value,
) -> (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature) {
(
GraphDbBackendEvalOperation {
name: "refresh".to_string(),
supported: true,
status: "ok".to_string(),
duration_micros,
rows: Some(rows),
error: None,
},
GraphDbBackendEvalSignature {
operation: "refresh".to_string(),
value,
},
)
}
pub(crate) fn graph_db_backend_eval_synthetic_projection(
nodes: usize,
fanout: usize,
) -> GraphProjection {
let nodes = nodes.max(12);
let symbol_count = nodes.saturating_sub(9).max(1);
let source = GraphProvenance::new("backend-eval", "synthetic");
let mut projection_nodes = vec![
SubstrateGraphNode::new(
"projection:tsift-traversal:synthetic",
GRAPH_PROJECTION_META_KIND,
"synthetic projection",
)
.with_property("projection_version", GRAPH_PROJECTION_VERSION)
.with_property(
"content_hash",
format!("synthetic-{nodes}-{fanout}-{symbol_count}"),
)
.with_provenance(source.clone()),
SubstrateGraphNode::new("gses-synthetic", "session", "synthetic session")
.with_property("ref_id", "synthetic-session"),
SubstrateGraphNode::new("gbak-synthetic", "backlog", "#synthetic")
.with_property("ref_id", "synthetic")
.with_property("path", "tasks/software/synthetic.md")
.with_property("line", "1")
.with_property(
"expand",
"tsift source-read tasks/software/synthetic.md --start 1 --lines 40",
),
SubstrateGraphNode::new("gjob-synthetic", "job_packet", "do #synthetic")
.with_property("ref_id", "synthetic"),
SubstrateGraphNode::new("gwctx-synthetic", "worker_context", "synthetic context")
.with_property("target", "synthetic")
.with_property("summary", "Synthetic worker owns synthetic.rs")
.with_property(
"expand",
"tsift source-read synthetic.rs --start 1 --lines 80",
),
SubstrateGraphNode::new("gsrc-synthetic", "source_handle", "synthetic.rs:1-80")
.with_property("file", "synthetic.rs")
.with_property("start", "1")
.with_property("end", "80")
.with_property(
"expand",
"tsift source-read synthetic.rs --start 1 --lines 80",
),
SubstrateGraphNode::new("gfil-synthetic", "file", "synthetic.rs")
.with_property("path", "synthetic.rs"),
SubstrateGraphNode::new("gsem-synthetic", "semantic_concept", "backend evaluation")
.with_property("handle", "gsem-synthetic")
.with_property("label", "backend evaluation")
.with_property("embedding_model", SEMANTIC_EMBEDDING_MODEL)
.with_property(
"embedding",
semantic_embedding_property("backend evaluation"),
),
SubstrateGraphNode::new("gwres-synthetic", "worker_result", "completed #synthetic")
.with_property("ref_id", "synthetic")
.with_property("status", "completed")
.with_property("touched_files", "synthetic.rs")
.with_property("expected_tests", "cargo test --test graph_db_conformance"),
];
for idx in 0..symbol_count {
projection_nodes.push(
SubstrateGraphNode::new(
format!("gsym-synthetic-{idx:04}"),
"symbol",
format!("synthetic_symbol_{idx:04}"),
)
.with_property("ref_id", format!("synthetic_symbol_{idx:04}"))
.with_property("path", "synthetic.rs")
.with_property("line", (idx + 1).to_string()),
);
}
let mut projection_edges = vec![
SubstrateGraphEdge::new("gses-synthetic", "gbak-synthetic", "contains"),
SubstrateGraphEdge::new("gses-synthetic", "gjob-synthetic", "queues"),
SubstrateGraphEdge::new("gbak-synthetic", "gwctx-synthetic", "has_context"),
SubstrateGraphEdge::new("gjob-synthetic", "gwctx-synthetic", "has_context"),
SubstrateGraphEdge::new("gwctx-synthetic", "gsrc-synthetic", "uses_source"),
SubstrateGraphEdge::new("gbak-synthetic", "gwres-synthetic", "has_worker_result"),
SubstrateGraphEdge::new("gbak-synthetic", "gsem-synthetic", "mentions_concept"),
SubstrateGraphEdge::new("gsrc-synthetic", "gfil-synthetic", "reads_file"),
SubstrateGraphEdge::new("gfil-synthetic", "gsym-synthetic-0000", "defines"),
];
for idx in 0..symbol_count {
let from = format!("gsym-synthetic-{idx:04}");
for offset in 1..=fanout.max(1).min(symbol_count) {
let to_idx = (idx + offset) % symbol_count;
if to_idx != idx {
projection_edges.push(SubstrateGraphEdge::new(
from.clone(),
format!("gsym-synthetic-{to_idx:04}"),
"calls",
));
}
}
}
GraphProjection {
nodes: projection_nodes,
edges: projection_edges
.into_iter()
.map(|edge| {
edge.with_property("dataset", "synthetic")
.with_provenance(source.clone())
})
.collect(),
}
}
pub(crate) fn graph_db_backend_eval_promotion(
datasets: &[GraphDbBackendEvalDataset],
candidates: &[GraphDbExperimentalBackend],
) -> Vec<GraphDbBackendPromotionDecision> {
let mut decisions = Vec::new();
for candidate in candidates {
let mut reasons = Vec::new();
let mut faster_everywhere = true;
let mut parity_everywhere = true;
for dataset in datasets {
let Some(sqlite_report) = dataset
.backends
.iter()
.find(|backend| backend.backend == "sqlite")
else {
parity_everywhere = false;
faster_everywhere = false;
reasons.push(format!(
"{} dataset is missing SQLite baseline",
dataset.name
));
continue;
};
let sqlite_total = sqlite_report.total_micros;
let Some(candidate_report) = dataset
.backends
.iter()
.find(|backend| backend.backend == candidate.name())
else {
parity_everywhere = false;
reasons.push(format!("{} dataset did not run", dataset.name));
continue;
};
if !candidate_report.parity.matches_sqlite {
parity_everywhere = false;
reasons.push(format!("{} parity differed from SQLite", dataset.name));
}
if candidate_report.total_micros >= sqlite_total {
faster_everywhere = false;
reasons.push(format!(
"{} total {}us did not beat SQLite {}us",
dataset.name, candidate_report.total_micros, sqlite_total
));
}
let sqlite_operations = sqlite_report
.operations
.iter()
.map(|operation| (operation.name.as_str(), operation.duration_micros))
.collect::<BTreeMap<_, _>>();
for operation in &candidate_report.operations {
if let Some(sqlite_duration) = sqlite_operations.get(operation.name.as_str())
&& operation.duration_micros >= *sqlite_duration
{
faster_everywhere = false;
reasons.push(format!(
"{} {} operation {}us did not beat SQLite {}us",
dataset.name, operation.name, operation.duration_micros, sqlite_duration
));
}
}
if candidate_report
.operations
.iter()
.any(|operation| operation.status != "ok")
{
parity_everywhere = false;
reasons.push(format!("{} has failed benchmark operations", dataset.name));
}
}
let decision = if let Some(reason) = candidate.prototype_hold_reason() {
reasons.push(reason.to_string());
reasons.push(
"current bounded prototype timings are benchmark evidence, not a backend switch approval"
.to_string(),
);
"hold"
} else if parity_everywhere && faster_everywhere {
reasons.push(
"prototype gate passed; production promotion still requires the real engine adapter to preserve SQLite's bundled install and multi-process lock behavior"
.to_string(),
);
"eligible"
} else {
reasons.push(
"production promotion requires SQLite parity plus lower total time for every measured operation on every dataset without worse lock behavior or install portability"
.to_string(),
);
"hold"
};
decisions.push(GraphDbBackendPromotionDecision {
backend: candidate.name().to_string(),
decision: decision.to_string(),
reasons: dedupe_preserve_order(reasons),
gate: candidate.promotion_gate(),
});
}
decisions
}
pub(crate) fn graph_db_backend_eval_metrics(
datasets: &[GraphDbBackendEvalDataset],
) -> BTreeMap<String, f64> {
let mut metrics = BTreeMap::new();
for dataset in datasets {
let graph_rows = graph_db_backend_eval_graph_rows(dataset);
metrics.insert(format!("{}.nodes", dataset.name), dataset.nodes as f64);
metrics.insert(format!("{}.edges", dataset.name), dataset.edges as f64);
metrics.insert(format!("{}.graph_rows", dataset.name), graph_rows as f64);
for backend in &dataset.backends {
let prefix = format!("{}.{}", dataset.name, backend.backend.replace('-', "_"));
metrics.insert(
format!("{prefix}.total_duration_micros"),
backend.total_micros as f64,
);
append_graph_db_backend_eval_normalized_duration_metric(
&mut metrics,
&format!("{prefix}.total_duration_micros_per_1k_graph_rows"),
backend.total_micros,
graph_rows,
);
for operation in &backend.operations {
metrics.insert(
format!("{prefix}.{}.duration_micros", operation.name),
operation.duration_micros as f64,
);
append_graph_db_backend_eval_normalized_duration_metric(
&mut metrics,
&format!(
"{prefix}.{}.duration_micros_per_1k_graph_rows",
operation.name
),
operation.duration_micros,
graph_rows,
);
if let Some(rows) = operation.rows {
metrics.insert(format!("{prefix}.{}.rows", operation.name), rows as f64);
}
}
}
}
metrics
}
pub(crate) fn graph_db_backend_eval_graph_rows(dataset: &GraphDbBackendEvalDataset) -> usize {
dataset.nodes + dataset.edges
}
pub(crate) fn append_graph_db_backend_eval_normalized_duration_metric(
metrics: &mut BTreeMap<String, f64>,
key: &str,
duration_micros: u128,
graph_rows: usize,
) {
if graph_rows == 0 {
return;
}
metrics.insert(
key.to_string(),
duration_micros as f64 / graph_rows as f64 * GRAPH_DB_BACKEND_EVAL_NORMALIZATION_ROW_UNIT,
);
}
pub(crate) fn append_graph_db_backend_eval_phase_metrics(
metrics: &mut BTreeMap<String, f64>,
dataset: &str,
graph_rows: usize,
phases: &[GraphDbBackendEvalPhaseTiming],
) {
for phase in phases {
metrics.insert(
format!("{dataset}.refresh_phase.{}.duration_micros", phase.name),
phase.duration_micros as f64,
);
append_graph_db_backend_eval_normalized_duration_metric(
metrics,
&format!(
"{dataset}.refresh_phase.{}.duration_micros_per_1k_graph_rows",
phase.name
),
phase.duration_micros,
graph_rows,
);
}
}
fn graph_db_backend_eval_base_command(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> String {
let full_projection_arg = if full_projection {
" --full-projection"
} else {
""
};
format!(
"tsift graph-db --path {}{} --json backend-eval{}",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
full_projection_arg
)
}
pub(crate) fn graph_db_backend_eval_metric_digest_command(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> String {
format!(
"{} | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
graph_db_backend_eval_base_command(root, scope, full_projection)
)
}
fn graph_db_backend_eval_repeated_sample_command(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> String {
format!(
"for sample in 1 2 3; do {}; done | tsift metric-digest --baseline fixtures/graph-db-performance-history.json",
graph_db_backend_eval_base_command(root, scope, full_projection)
)
}
fn graph_db_backend_eval_hop_cap_promotion_gate() -> GraphDbHopCapPromotionGate {
let mut required_metrics = Vec::new();
for workload in perf_gate::HOP_CAP_REQUIRED_WORKLOADS {
required_metrics.push(format!("{workload}.sqlite.path_max_hops.duration_micros"));
required_metrics.push(format!("{workload}.sqlite.path_max_hops.rows"));
for hops in perf_gate::HOP_CAP_CANDIDATE_TIERS {
required_metrics.push(format!(
"{workload}.sqlite.path_max_hops_{hops}.duration_micros"
));
required_metrics.push(format!("{workload}.sqlite.path_max_hops_{hops}.rows"));
}
}
GraphDbHopCapPromotionGate {
status: "hold_64_default_until_gate_passes".to_string(),
current_default_hops: perf_gate::HOP_CAP_CURRENT_DEFAULT,
candidate_hop_tiers: perf_gate::HOP_CAP_CANDIDATE_TIERS.to_vec(),
required_backend: perf_gate::BASELINE_BACKEND.to_string(),
required_workloads: perf_gate::HOP_CAP_REQUIRED_WORKLOADS
.iter()
.map(|workload| (*workload).to_string())
.collect(),
required_metrics,
allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
decision_rule:
"keep 64 as the user-facing default until each candidate tier has repeated real, full_projection, and synthetic_deep_chain SQLite samples within the latency-regression budget and returning useful path rows; full_projection samples are binding only after a cold populate leg proves a cache-hit leg"
.to_string(),
}
}
fn graph_db_backend_eval_backend_adapter_spike_gate() -> GraphDbBackendAdapterSpikeGate {
let candidate_backends = [
GraphDbExperimentalBackend::Falkordb,
GraphDbExperimentalBackend::Kuzu,
]
.into_iter()
.map(|backend| GraphDbBackendAdapterSpikeCandidate {
backend: backend.name().to_string(),
adapter_label: backend.adapter_label().to_string(),
projection_load: backend.projection_load().to_string(),
lock_behavior: backend.lock_behavior().to_string(),
install_portability: backend.install_portability().to_string(),
})
.collect();
GraphDbBackendAdapterSpikeGate {
status: "hold_real_optional_adapter_required".to_string(),
candidate_backends,
required_workloads: perf_gate::GATE_WORKLOAD_PREFIXES
.iter()
.map(|workload| (*workload).to_string())
.collect(),
required_checks: vec![
"real_optional_adapter_behind_graphstore_without_default_build_dependency".to_string(),
"projection_load_writes_provider_neutral_rows_without_sqlite_row_replay".to_string(),
"freshness_and_full_parity_match_sqlite_on_every_graphstore_operation".to_string(),
"lock_semantics_match_or_beat_sqlite_for_writer_and_read_only_workflows".to_string(),
"install_portability_preserves_cargo_build_install_without_external_service_or_native_toolchain"
.to_string(),
"full_projection_cache_hit_sample_before_backend_or_hop_cap_changes".to_string(),
"beats_sqlite_on_every_required_workload_and_metric_in_backend_eval".to_string(),
],
decision_rule:
"do not promote a read-only prototype; FalkorDB or Kuzu can only advance after a real optional adapter proves projection writes/load, lock semantics, install portability, full parity, and faster-than-SQLite results across every required workload"
.to_string(),
evidence_plan: "plans/gback-evidence.md".to_string(),
}
}
pub(crate) fn graph_db_backend_eval_performance_gate(
root: &Path,
scope: Option<&str>,
full_projection: bool,
) -> GraphDbBackendEvalPerformanceGate {
let mut required_metrics = vec![
"real.sqlite.refresh.duration_micros".to_string(),
"real.sqlite.refresh.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.edge_lookup.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.evidence.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
"real.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows".to_string(),
"real.refresh_phase.sqlite_delta_write.duration_micros".to_string(),
"real.refresh_phase.sqlite_property_row_staging.duration_micros".to_string(),
"real.refresh_phase.sqlite_edge_property_row_staging.duration_micros".to_string(),
"real.sqlite.conflict_matrix.duration_micros".to_string(),
"real.sqlite.dispatch_trace.duration_micros".to_string(),
"real.sqlite.path_max_hops.duration_micros".to_string(),
"real.sqlite.path_max_hops_128.duration_micros".to_string(),
"real.sqlite.path_max_hops_256.duration_micros".to_string(),
"real.sqlite.path_max_hops_512.duration_micros".to_string(),
"real.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows".to_string(),
"real.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_high_degree.sqlite.total_duration_micros".to_string(),
"synthetic_high_degree.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
"synthetic_high_degree.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_high_degree.sqlite.edge_property_scan.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_high_degree.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.incident_edges.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_deep_chain.sqlite.neighborhood.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros".to_string(),
"synthetic_deep_chain.sqlite.evidence_target_resolution.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.path_max_hops.duration_micros_per_1k_graph_rows".to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_128.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_256.duration_micros_per_1k_graph_rows"
.to_string(),
"synthetic_deep_chain.sqlite.path_max_hops_512.duration_micros_per_1k_graph_rows"
.to_string(),
];
if full_projection {
required_metrics.extend([
"full_projection.cache.hit".to_string(),
"full_projection.cache.disk_bytes".to_string(),
"full_projection.cache.compression_ratio".to_string(),
"full_projection.refresh_phase.cache_lookup.duration_micros".to_string(),
"full_projection.sqlite.total_duration_micros_per_1k_graph_rows".to_string(),
"full_projection.refresh_phase.source_graph_build.duration_micros_per_1k_graph_rows"
.to_string(),
"full_projection.refresh_phase.projection_rows.duration_micros_per_1k_graph_rows"
.to_string(),
"full_projection.sqlite.sqlite_delta_write.duration_micros".to_string(),
"full_projection.sqlite.sqlite_edge_staging.duration_micros".to_string(),
"full_projection.sqlite.post_write_reads.duration_micros".to_string(),
"full_projection.sqlite.neighborhood.duration_micros".to_string(),
"full_projection.sqlite.evidence_target_resolution.duration_micros".to_string(),
"full_projection.sqlite.evidence.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops_128.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops_256.duration_micros".to_string(),
"full_projection.sqlite.path_max_hops_512.duration_micros".to_string(),
"full_projection.sqlite.conflict_matrix.duration_micros".to_string(),
"full_projection.sqlite.dispatch_trace.duration_micros".to_string(),
]);
}
GraphDbBackendEvalPerformanceGate {
baseline_fixture: "fixtures/graph-db-performance-history.json".to_string(),
ci_profile: "synthetic_high_degree + synthetic_deep_chain metrics are CI-safe and bounded"
.to_string(),
opt_in_real_profile:
"pass --full-projection to add the full-project dataset when checking for large projection regressions"
.to_string(),
full_projection_cache_hit_gate: if full_projection {
"binding full_projection performance evidence requires a cold populate leg followed by cache-leg samples with full_projection.cache.hit=1; cache-miss samples are diagnostics, not backend or hop-cap promotion proof"
.to_string()
} else {
"not evaluated until --full-projection is enabled".to_string()
},
allowed_regression_percent: GRAPH_DB_BACKEND_EVAL_ALLOWED_REGRESSION_PERCENT,
minimum_sample_runs: GRAPH_DB_BACKEND_EVAL_MIN_SAMPLE_RUNS,
normalized_metric_unit: "duration_micros_per_1k_graph_rows".to_string(),
required_metrics,
digest_command: graph_db_backend_eval_metric_digest_command(root, scope, full_projection),
repeated_sample_command: graph_db_backend_eval_repeated_sample_command(
root,
scope,
full_projection,
),
hop_cap_promotion: graph_db_backend_eval_hop_cap_promotion_gate(),
backend_adapter_spike: graph_db_backend_eval_backend_adapter_spike_gate(),
}
}
pub(crate) struct GraphDbBackendEvalOptions<'a> {
path: &'a Path,
scope: Option<&'a str>,
candidates: &'a [String],
targets: &'a [String],
full_projection: bool,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn graph_db_backend_eval_dataset(
name: &str,
root: &Path,
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
candidates: &[GraphDbExperimentalBackend],
sqlite_store: &SqliteGraphStore,
sqlite_freshness: GraphDbFreshnessReport,
sqlite_refresh: (GraphDbBackendEvalOperation, GraphDbBackendEvalSignature),
sqlite_rows: ConvexProjectionRows,
extra_warnings: Vec<String>,
prepared: &ConflictMatrixPreparedInputs,
) -> Result<GraphDbBackendEvalDataset> {
let (nodes, edges) = sqlite_store.graph_counts()?;
let (sqlite_operation, sqlite_signature) = sqlite_refresh;
let (sqlite_report, sqlite_signatures) = graph_db_backend_eval_report_for_store(
"sqlite",
"SQLite GraphStore correctness baseline",
false,
root,
path,
scope,
targets,
depth,
limit,
impact_limit,
sqlite_store,
sqlite_freshness,
sqlite_operation,
Some(sqlite_signature),
None,
extra_warnings.clone(),
prepared,
"SQLite refresh writes provider-neutral projection rows into graph.db transactionally",
"SQLite WAL correctness store; refresh uses one transactional writer and read-only queries use snapshot recovery",
"bundled rusqlite baseline; no external service or runtime required",
);
let mut backends = vec![sqlite_report];
for candidate in candidates {
let started = Instant::now();
let store = ExperimentalReadOnlyGraphStore::from_rows(*candidate, &sqlite_rows)?;
let (candidate_nodes, candidate_edges) = store.graph_counts()?;
let rows = candidate_nodes + candidate_edges;
let refresh = graph_db_backend_eval_refresh_operation(
started.elapsed().as_micros(),
rows,
serde_json::json!({
"nodes": candidate_nodes,
"edges": candidate_edges,
}),
);
let freshness = sqlite_graph_freshness(sqlite_store, scope.unwrap_or("root"))?;
let (candidate_report, _signatures) = graph_db_backend_eval_report_for_store(
candidate.name(),
candidate.adapter_label(),
true,
root,
path,
scope,
targets,
depth,
limit,
impact_limit,
&store,
freshness,
refresh.0,
Some(refresh.1),
Some(&sqlite_signatures),
extra_warnings.clone(),
prepared,
candidate.projection_load(),
candidate.lock_behavior(),
candidate.install_portability(),
);
backends.push(candidate_report);
}
Ok(GraphDbBackendEvalDataset {
name: name.to_string(),
target_count: targets.len(),
nodes,
edges,
backends,
})
}
pub(crate) fn print_graph_db_backend_eval_human(report: &GraphDbBackendEvalReport) {
println!(
"graph-db backend-eval baseline:{} candidates:{}",
report.baseline_backend,
report.candidates.join(", ")
);
for phase in &report.phase_timings {
println!(
"phase:{} {}us {}",
phase.name, phase.duration_micros, phase.detail
);
}
for dataset in &report.datasets {
println!(
"dataset:{} targets:{} rows:{}",
dataset.name,
dataset.target_count,
dataset.nodes + dataset.edges
);
for backend in &dataset.backends {
println!(
" backend:{} total:{}us parity:{}",
backend.backend, backend.total_micros, backend.parity.matches_sqlite
);
println!(" projection-load: {}", backend.projection_load);
println!(" lock-behavior: {}", backend.lock_behavior);
println!(" install-portability: {}", backend.install_portability);
for operation in &backend.operations {
println!(
" {} {} {}us",
operation.name, operation.status, operation.duration_micros
);
}
for diagnostic in &backend.parity.diagnostics {
println!(" parity: {diagnostic}");
}
}
}
for decision in &report.promotion {
println!("promotion {}: {}", decision.backend, decision.decision);
println!(" gate: {}", decision.gate.status);
for reason in &decision.reasons {
println!(" reason: {reason}");
}
for check in &decision.gate.required_checks {
println!(" check: {check}");
}
}
println!("metric-digest: {}", report.metric_digest_command);
println!(
"repeat-samples: {}",
report.performance_gate.repeated_sample_command
);
}
fn traversal_expand_command(root: &Path, handle: &str) -> String {
format!(
"tsift traverse {} --path {} --depth 1 --limit 50",
shell_quote(handle),
shell_quote(root.to_string_lossy().as_ref())
)
}
fn traversal_file_node(root: &Path, file: &str) -> TraversalNode {
let display = relativize(file, root);
let handle = stable_handle("gfil", &format!("file:{display}"));
TraversalNode {
handle: handle.clone(),
kind: "file".to_string(),
label: display.clone(),
ref_id: Some(display.clone()),
path: Some(display),
line: None,
detail: None,
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_raw_source_file_node(root: &Path, file: &str) -> TraversalNode {
let mut node = traversal_file_node(root, file);
if let Some(path) = node.path.clone() {
node.detail = Some("raw source fallback; graph evidence unavailable".to_string());
node.expand = source_read_command(root, &path, 1, 80);
}
node
}
fn traversal_symbol_node(root: &Path, symbol: &index::StoredSymbol) -> TraversalNode {
let file = relativize(&symbol.file, root);
let key = format!("symbol:{file}:{}:{}", symbol.line, symbol.name);
let handle = stable_handle("gsym", &key);
TraversalNode {
handle: handle.clone(),
kind: "symbol".to_string(),
label: symbol.name.clone(),
ref_id: Some(symbol.name.clone()),
path: Some(file),
line: Some(symbol.line),
detail: Some(format!("{} {}", symbol.language, symbol.kind)),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_unresolved_symbol_node(root: &Path, name: &str) -> TraversalNode {
let handle = stable_handle("gsym", &format!("symbol:{name}"));
TraversalNode {
handle: handle.clone(),
kind: "symbol".to_string(),
label: name.to_string(),
ref_id: Some(name.to_string()),
path: None,
line: None,
detail: Some("unresolved call target".to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_route_node(root: &Path, route: &index::StoredRoute) -> TraversalNode {
let file = relativize(&route.file, root);
let method = route.method.as_deref().unwrap_or("any");
let key = format!(
"route:{file}:{}:{}:{}",
route.line, method, route.route_path
);
let handle = stable_handle("grte", &key);
TraversalNode {
handle: handle.clone(),
kind: "route".to_string(),
label: format!("{} {}", method.to_uppercase(), route.route_path),
ref_id: Some(route.route_path.clone()),
path: Some(file),
line: Some(route.line),
detail: Some(format!(
"{} route handled by {}",
route.framework, route.handler_name
)),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_session_node(
root: &Path,
markdown_path: &Path,
session_id: Option<&str>,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gses", &format!("session:{display}"));
TraversalNode {
handle: handle.clone(),
kind: "session".to_string(),
label: session_id.unwrap_or(&display).to_string(),
ref_id: session_id.map(str::to_string),
path: Some(display),
line: None,
detail: Some("agent-doc session artifact".to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_backlog_node(
root: &Path,
markdown_path: &Path,
id: &str,
text: &str,
line: i64,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gbak", &format!("backlog:{display}:#{id}"));
TraversalNode {
handle: handle.clone(),
kind: "backlog".to_string(),
label: format!("#{id}"),
ref_id: Some(id.to_string()),
path: Some(display),
line: Some(line),
detail: Some(text.to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_job_packet_node(
root: &Path,
markdown_path: &Path,
label: &str,
ref_id: Option<&str>,
detail: &str,
line: i64,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle("gjob", &format!("job:{display}:{line}:{label}"));
TraversalNode {
handle: handle.clone(),
kind: "job_packet".to_string(),
label: label.to_string(),
ref_id: ref_id.map(str::to_string),
path: Some(display),
line: Some(line),
detail: Some(detail.to_string()),
properties: BTreeMap::new(),
expand: traversal_expand_command(root, &handle),
}
}
#[derive(Clone, Debug)]
struct ParsedWorkerResult {
id: String,
status: String,
touched_files: Vec<String>,
tests: Vec<String>,
follow_up_ids: Vec<String>,
}
fn traversal_worker_result_node(
root: &Path,
markdown_path: &Path,
parsed: &ParsedWorkerResult,
line_text: &str,
line: i64,
) -> TraversalNode {
let display = relativize_pathbuf(markdown_path, root)
.to_string_lossy()
.replace('\\', "/");
let handle = stable_handle(
"wres",
&format!(
"worker-result:{display}:{}:{}:{}",
parsed.id, parsed.status, line
),
);
let mut properties = BTreeMap::new();
properties.insert("status".to_string(), parsed.status.clone());
if !parsed.touched_files.is_empty() {
properties.insert("touched_files".to_string(), parsed.touched_files.join(","));
}
if !parsed.tests.is_empty() {
properties.insert("expected_tests".to_string(), parsed.tests.join(" && "));
}
if !parsed.follow_up_ids.is_empty() {
properties.insert("follow_up_ids".to_string(), parsed.follow_up_ids.join(","));
}
TraversalNode {
handle: handle.clone(),
kind: "worker_result".to_string(),
label: format!("{} #{}", parsed.status, parsed.id),
ref_id: Some(parsed.id.clone()),
path: Some(display),
line: Some(line),
detail: Some(line_text.trim().to_string()),
properties,
expand: traversal_expand_command(root, &handle),
}
}
fn traversal_tokens(input: &str) -> BTreeSet<String> {
input
.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
.flat_map(|part| part.split(['_', '-']))
.map(str::trim)
.filter(|part| part.len() >= 3)
.map(|part| part.to_ascii_lowercase())
.collect()
}
fn traversal_node_tokens(node: &TraversalNode) -> BTreeSet<String> {
let mut tokens = traversal_tokens(&node.label);
if let Some(ref_id) = &node.ref_id {
tokens.extend(traversal_tokens(ref_id));
}
if let Some(path) = &node.path {
tokens.extend(traversal_tokens(path));
}
if let Some(detail) = &node.detail {
tokens.extend(traversal_tokens(detail));
}
tokens
}
fn parse_agent_doc_session_id(content: &str) -> Option<String> {
content.lines().find_map(|line| {
let trimmed = line.trim();
trimmed
.strip_prefix("agent_doc_session:")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
})
}
fn parse_backlog_line(line: &str) -> Option<(String, String)> {
let trimmed = line.trim();
if !trimmed.starts_with("- [") {
return None;
}
let start = trimmed.find("[#")?;
let after_start = start + 2;
let rest = &trimmed[after_start..];
let end = rest.find(']')?;
let id = rest[..end].trim();
if id.is_empty() {
return None;
}
let text = rest[end + 1..].trim().to_string();
Some((id.to_string(), text))
}
fn parse_queue_dispatch_line(line: &str) -> Option<String> {
let trimmed = line.trim();
["dispatch ", "preset "].iter().find_map(|prefix| {
trimmed
.strip_prefix(prefix)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
})
}
fn parse_queue_do_line(line: &str) -> Option<String> {
let trimmed = line.trim();
let rest = trimmed.strip_prefix("- do [#")?;
let end = rest.find(']')?;
let id = rest[..end].trim();
(!id.is_empty()).then(|| id.to_string())
}
fn markdown_code_spans(input: &str) -> Vec<String> {
input
.split('`')
.enumerate()
.filter(|(idx, _)| idx % 2 == 1)
.map(|(_, part)| part.trim().to_string())
.filter(|part| !part.is_empty())
.collect()
}
fn push_traversal_token_index(
index: &mut HashMap<String, Vec<usize>>,
tokens: &BTreeSet<String>,
entry_index: usize,
) {
for token in tokens {
index.entry(token.clone()).or_default().push(entry_index);
}
}
impl<'a> TraversalCodeLookup<'a> {
fn new(
symbols: &'a [TraversalSymbolIndexEntry],
files: &'a [TraversalFileIndexEntry],
routes: &'a [TraversalRouteIndexEntry],
) -> Self {
let mut symbol_index = HashMap::new();
for (idx, entry) in symbols.iter().enumerate() {
push_traversal_token_index(&mut symbol_index, &entry.tokens, idx);
}
let mut file_index = HashMap::new();
let mut file_path_index = HashMap::new();
for (idx, entry) in files.iter().enumerate() {
push_traversal_token_index(&mut file_index, &entry.tokens, idx);
if let Some(path) = entry.node.path.as_ref() {
file_path_index.insert(path.clone(), path.clone());
}
}
let mut route_index = HashMap::new();
for (idx, entry) in routes.iter().enumerate() {
push_traversal_token_index(&mut route_index, &entry.tokens, idx);
}
Self {
symbols,
files,
routes,
symbol_index,
file_index,
route_index,
file_path_index,
}
}
fn touched_files_for_line(&self, line: &str) -> Vec<String> {
let mut touched_files = BTreeSet::new();
for candidate in markdown_code_spans(line)
.into_iter()
.chain(line.split_whitespace().map(str::to_string))
{
for path in traversal_path_candidates(&candidate) {
if let Some(file) = self.file_path_index.get(&path) {
touched_files.insert(file.clone());
}
}
}
touched_files.into_iter().collect()
}
}
fn traversal_path_candidates(candidate: &str) -> Vec<String> {
let trimmed = candidate.trim_matches(|ch: char| {
matches!(
ch,
'`' | '"' | '\'' | ',' | ';' | '.' | '!' | '?' | '(' | ')' | '[' | ']' | '{' | '}'
)
});
if trimmed.is_empty() {
return Vec::new();
}
let mut candidates = vec![trimmed.to_string()];
if let Some((path, line_suffix)) = trimmed.rsplit_once(':')
&& !path.is_empty()
&& line_suffix.chars().all(|ch| ch.is_ascii_digit())
{
candidates.push(path.to_string());
}
candidates
}
fn parse_worker_result_line(
line: &str,
lookup: &TraversalCodeLookup<'_>,
) -> Vec<ParsedWorkerResult> {
if line.trim_start().starts_with("- [") {
return Vec::new();
}
let lower = line.to_ascii_lowercase();
let status =
if lower.contains("completed") || lower.contains("code-complete") || lower.contains("done")
{
"completed"
} else if lower.contains("blocked") || lower.contains("externally blocked") {
"blocked"
} else {
return Vec::new();
};
let result_prefix_end = ["follow-up", "follow up", "next:"]
.iter()
.filter_map(|marker| lower.find(marker))
.min()
.unwrap_or(line.len());
let ids = extract_conflict_target_refs(&line[..result_prefix_end]);
if ids.is_empty() {
return Vec::new();
}
let result_ids = ids.iter().cloned().collect::<BTreeSet<_>>();
let all_ids = extract_conflict_target_refs(line);
let touched_files = lookup.touched_files_for_line(line);
let tests = markdown_code_spans(line)
.into_iter()
.filter(|span| span.to_ascii_lowercase().contains("test"))
.collect::<Vec<_>>();
ids.iter()
.map(|id| ParsedWorkerResult {
id: id.clone(),
status: status.to_string(),
touched_files: touched_files.clone(),
tests: tests.clone(),
follow_up_ids: all_ids
.iter()
.filter(|other| *other != id && !result_ids.contains(*other))
.cloned()
.collect(),
})
.collect()
}
fn hinted_markdown_file(root: &Path, path_hint: &Path) -> Option<PathBuf> {
let hinted_path = if path_hint.is_absolute() {
path_hint.to_path_buf()
} else {
root.join(path_hint)
};
if hinted_path.extension().and_then(|ext| ext.to_str()) == Some("md") && hinted_path.is_file() {
return Some(hinted_path);
}
None
}
fn traversal_markdown_content_looks_like_session(content: &str) -> bool {
parse_agent_doc_session_id(content).is_some()
|| content.contains("<!-- agent:exchange")
|| content.contains("<!-- agent:backlog")
|| content.contains("## Backlog")
}
fn markdown_files_for_traversal(root: &Path, path_hint: &Path) -> Result<Vec<PathBuf>> {
if let Some(hinted_path) = hinted_markdown_file(root, path_hint) {
return Ok(vec![hinted_path]);
}
let mut files = Vec::new();
let walker = ignore::WalkBuilder::new(root)
.hidden(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.build();
for result in walker {
let entry =
result.with_context(|| format!("walking markdown files under {}", root.display()))?;
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
if traversal_path_is_generated_artifact(root, root, entry.path()) {
continue;
}
if entry.path().extension().and_then(|ext| ext.to_str()) == Some("md") {
files.push(entry.path().to_path_buf());
}
}
files.sort();
Ok(files)
}
fn traversal_watermark_path(root: &Path, path: &Path) -> String {
path.strip_prefix(root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn push_traversal_metadata_watermark_part(
root: &Path,
path: &Path,
label: &str,
parts: &mut Vec<String>,
) {
let display = traversal_watermark_path(root, path);
match fs::metadata(path) {
Ok(metadata) => {
let (secs, nanos) = metadata
.modified()
.ok()
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
.map(|duration| (duration.as_secs(), duration.subsec_nanos()))
.unwrap_or((0, 0));
parts.push(format!(
"{label}:{display}:len={}:mtime={secs}.{nanos}",
metadata.len()
));
}
Err(_) => parts.push(format!("{label}:{display}:missing")),
}
}
#[derive(Serialize)]
struct TraversalSummaryWatermarkRow<'a> {
symbol_name: &'a str,
file_path: &'a str,
entities: &'a Option<Vec<summarize::Entity>>,
relationships: &'a Option<Vec<summarize::Relationship>>,
concept_labels: &'a Option<Vec<String>>,
}
fn push_traversal_summaries_watermark_part(root: &Path, parts: &mut Vec<String>) -> Result<()> {
let summaries_db = root.join(".tsift/summaries.db");
if !summaries_db.exists() {
parts.push("summaries_db:absent".to_string());
return Ok(());
}
match summarize::SummaryDb::open_read_only_resilient(&summaries_db)
.and_then(|summary_db| summary_db.all())
{
Ok(summaries) => {
let rows = summaries
.iter()
.map(|summary| TraversalSummaryWatermarkRow {
symbol_name: &summary.symbol_name,
file_path: &summary.file_path,
entities: &summary.entities,
relationships: &summary.relationships,
concept_labels: &summary.concept_labels,
})
.collect::<Vec<_>>();
parts.push(format!(
"summaries_db:rows={}:semantic_hash={}",
rows.len(),
content_hash(&rows)?
));
}
Err(_) => {
push_traversal_metadata_watermark_part(
root,
&summaries_db,
"summaries_db_unreadable",
parts,
);
}
}
Ok(())
}
#[cfg(test)]
fn traversal_relative_path_is_generated_artifact(relative: &str) -> bool {
resolution::relative_path_is_generated_artifact(relative)
}
fn traversal_path_is_generated_artifact(root: &Path, source_root: &Path, path: &Path) -> bool {
resolution::path_is_generated_artifact(root, source_root, path)
}
fn traversal_index_snapshot_part_is_generated(root: &Path, source_root: &Path, part: &str) -> bool {
resolution::index_snapshot_part_is_generated(root, source_root, part)
}
pub(crate) fn traversal_source_watermark(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<Option<String>> {
let mut parts = vec![
format!("projection_version:{GRAPH_PROJECTION_VERSION}"),
format!("scope:{}", scope.unwrap_or("root")),
format!("path_hint:{}", traversal_watermark_path(root, path_hint)),
format!("session_only:{session_only}"),
];
if !session_only || hinted_markdown_file(root, path_hint).is_none() {
let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
Ok(targets) => targets,
Err(_) => return Ok(None),
};
let Some(target) = targets.into_iter().next() else {
return Ok(None);
};
let db = match index::IndexDb::open_read_only_resilient(&target.db_path) {
Ok(db) => db,
Err(_) => return Ok(None),
};
parts.push(format!("index_label:{}", target.label));
parts.push(format!(
"index_scope:{}",
target.scope_name.as_deref().unwrap_or("root")
));
parts.push(format!(
"index_source_root:{}",
traversal_watermark_path(root, &target.source_root)
));
let mut snapshot_rows = 0usize;
for part in db.source_snapshot_parts()? {
if traversal_index_snapshot_part_is_generated(root, &target.source_root, &part) {
continue;
}
snapshot_rows += 1;
parts.push(format!("index_snapshot:{part}"));
}
parts.push(format!("index_snapshot_rows:{snapshot_rows}"));
}
let markdown_files = markdown_files_for_traversal(root, path_hint)?;
parts.push(format!("markdown_count:{}", markdown_files.len()));
for markdown_path in markdown_files {
push_traversal_metadata_watermark_part(root, &markdown_path, "markdown", &mut parts);
}
push_traversal_summaries_watermark_part(root, &mut parts)?;
Ok(Some(content_hash(&parts)?))
}
fn ranked_symbol_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalSymbolIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalSymbolIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn ranked_file_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalFileIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalFileIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn ranked_route_matches<'a>(
query_tokens: &BTreeSet<String>,
entries: &'a [TraversalRouteIndexEntry],
index: &HashMap<String, Vec<usize>>,
) -> Vec<(usize, &'a TraversalRouteIndexEntry)> {
let mut scores = BTreeMap::<usize, usize>::new();
for token in query_tokens {
if let Some(indices) = index.get(token) {
for idx in indices {
*scores.entry(*idx).or_default() += 1;
}
}
}
let mut matches = scores
.into_iter()
.map(|(idx, score)| (score, &entries[idx]))
.collect::<Vec<_>>();
matches.sort_by(|(left_score, left), (right_score, right)| {
right_score
.cmp(left_score)
.then_with(|| left.node.label.cmp(&right.node.label))
.then_with(|| left.handle.cmp(&right.handle))
});
matches
}
fn link_backlog_to_code_nodes(
graph: &mut TraversalGraphBuild,
backlog: &TraversalNode,
text: &str,
lookup: &TraversalCodeLookup<'_>,
limit: usize,
) {
let mut query_tokens = traversal_tokens(text);
if let Some(ref_id) = &backlog.ref_id {
query_tokens.extend(traversal_tokens(ref_id));
}
if query_tokens.is_empty() {
return;
}
for (score, entry) in ranked_symbol_matches(&query_tokens, lookup.symbols, &lookup.symbol_index)
.into_iter()
.take(limit)
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches symbol tokens".to_string()),
score,
);
}
for (score, entry) in ranked_file_matches(&query_tokens, lookup.files, &lookup.file_index)
.into_iter()
.take(limit.min(5))
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches file tokens".to_string()),
score,
);
}
for (score, entry) in ranked_route_matches(&query_tokens, lookup.routes, &lookup.route_index)
.into_iter()
.take(limit.min(5))
{
graph.add_edge(
&backlog.handle,
&entry.handle,
"mentions",
Some("backlog text matches route tokens".to_string()),
score,
);
}
}
fn load_agent_doc_traversal_nodes(
root: &Path,
path_hint: &Path,
graph: &mut TraversalGraphBuild,
lookup: &TraversalCodeLookup<'_>,
) -> Result<()> {
for markdown_path in markdown_files_for_traversal(root, path_hint)? {
let content = match fs::read_to_string(&markdown_path) {
Ok(content) => content,
Err(err) => {
graph.warnings.push(format!(
"session artifact unavailable: {}: {err}",
markdown_path.display()
));
continue;
}
};
if !traversal_markdown_content_looks_like_session(&content) {
continue;
}
let session_id = parse_agent_doc_session_id(&content);
let session = traversal_session_node(root, &markdown_path, session_id.as_deref());
graph.add_node(session.clone());
let lines = content.lines().collect::<Vec<_>>();
let mut backlog_by_id = BTreeMap::<String, TraversalNode>::new();
for (idx, line) in lines.iter().enumerate() {
let Some((id, text)) = parse_backlog_line(line) else {
continue;
};
let backlog = traversal_backlog_node(root, &markdown_path, &id, &text, idx as i64 + 1);
graph.add_node(backlog.clone());
backlog_by_id.insert(id.clone(), backlog.clone());
graph.add_edge(
&session.handle,
&backlog.handle,
"contains",
Some("session backlog item".to_string()),
1,
);
link_backlog_to_code_nodes(graph, &backlog, &text, lookup, 8);
}
let mut in_queue = false;
let mut job_by_id = BTreeMap::<String, TraversalNode>::new();
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("<!-- agent:queue") {
in_queue = true;
continue;
}
if trimmed.starts_with("<!-- /agent:queue") {
in_queue = false;
continue;
}
if !in_queue {
continue;
}
if let Some(dispatch) = parse_queue_dispatch_line(line) {
let dispatch_ref = dispatch.strip_prefix('#').unwrap_or(dispatch.as_str());
let node = traversal_job_packet_node(
root,
&markdown_path,
&format!("dispatch {dispatch}"),
Some(dispatch_ref),
"agent-doc dispatch preset",
idx as i64 + 1,
);
graph.add_node(node.clone());
graph.add_edge(
&session.handle,
&node.handle,
"contains",
Some("session queued dispatch".to_string()),
1,
);
continue;
}
if let Some(id) = parse_queue_do_line(line) {
let detail = backlog_by_id
.get(&id)
.and_then(|node| node.detail.clone())
.unwrap_or_else(|| "queued backlog item".to_string());
let node = traversal_job_packet_node(
root,
&markdown_path,
&format!("do #{id}"),
Some(&id),
&detail,
idx as i64 + 1,
);
graph.add_node(node.clone());
graph.add_edge(
&session.handle,
&node.handle,
"contains",
Some("session queued job packet".to_string()),
1,
);
if let Some(backlog) = backlog_by_id.get(&id) {
graph.add_edge(
&node.handle,
&backlog.handle,
"targets",
Some("queued backlog item".to_string()),
1,
);
}
job_by_id.insert(id, node);
}
}
let mut seen_results = BTreeSet::<(String, String, i64)>::new();
for (idx, line) in lines.iter().enumerate() {
for parsed in parse_worker_result_line(line, lookup) {
let line_no = idx as i64 + 1;
if !seen_results.insert((parsed.id.clone(), parsed.status.clone(), line_no)) {
continue;
}
let result =
traversal_worker_result_node(root, &markdown_path, &parsed, line, line_no);
graph.add_node(result.clone());
graph.add_edge(
&session.handle,
&result.handle,
"contains",
Some("session worker result".to_string()),
1,
);
if let Some(backlog) = backlog_by_id.get(&parsed.id) {
graph.add_edge(
&backlog.handle,
&result.handle,
"has_result",
Some(format!("worker result {}", parsed.status)),
1,
);
}
if let Some(job) = job_by_id.get(&parsed.id) {
graph.add_edge(
&job.handle,
&result.handle,
"has_result",
Some(format!("queued worker result {}", parsed.status)),
1,
);
}
let mut result_text = line.to_string();
if !parsed.touched_files.is_empty() {
result_text.push(' ');
result_text.push_str(&parsed.touched_files.join(" "));
}
link_backlog_to_code_nodes(graph, &result, &result_text, lookup, 8);
}
}
}
Ok(())
}
#[derive(Debug, Clone)]
struct AgentDocIndexGate {
db_path: Option<PathBuf>,
source_root: PathBuf,
diagnostics: Vec<String>,
}
#[derive(Clone, Hash, PartialEq, Eq)]
struct AgentDocIndexGateCacheKey {
root: PathBuf,
path_hint: PathBuf,
scope: Option<String>,
packet_label: String,
}
fn agent_doc_index_gate_cache() -> &'static std::sync::Mutex<
std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>,
> {
static CACHE: std::sync::OnceLock<
std::sync::Mutex<std::collections::HashMap<AgentDocIndexGateCacheKey, AgentDocIndexGate>>,
> = std::sync::OnceLock::new();
CACHE.get_or_init(|| std::sync::Mutex::new(std::collections::HashMap::new()))
}
fn prepare_agent_doc_index_gate_cached(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
packet_label: &str,
) -> (AgentDocIndexGate, String) {
let key = AgentDocIndexGateCacheKey {
root: root.to_path_buf(),
path_hint: path_hint.to_path_buf(),
scope: scope.map(str::to_string),
packet_label: packet_label.to_string(),
};
if let Ok(cache) = agent_doc_index_gate_cache().lock()
&& let Some(cached) = cache.get(&key)
{
return (
cached.clone(),
"reused from in-process index gate cache by root/path_hint/scope key".to_string(),
);
}
let gate = prepare_agent_doc_index_gate(root, path_hint, scope, packet_label);
if let Ok(mut cache) = agent_doc_index_gate_cache().lock() {
cache.insert(key, gate.clone());
}
(
gate,
"fresh inspection/refresh — cache miss on this preparation key".to_string(),
)
}
fn index_reason_for_state(state: SearchIndexState) -> Option<RebuildSearchReason> {
match state {
SearchIndexState::Fresh => None,
SearchIndexState::Missing => Some(RebuildSearchReason::Missing),
SearchIndexState::Stale { stale_files } => Some(RebuildSearchReason::Stale { stale_files }),
}
}
fn index_reason_detail(target: &SearchIndexTarget, reason: RebuildSearchReason) -> String {
rebuild_search_target_detail(&RebuildSearchTarget {
label: target.label.clone(),
reason,
reindex_cmd: target.reindex_cmd.clone(),
})
}
fn index_refresh_diagnostic(
target: &SearchIndexTarget,
reason: RebuildSearchReason,
summary: &index::IndexSummary,
packet_label: &str,
) -> String {
let changed = summary.new + summary.modified + summary.deleted;
format!(
"index refreshed: {}; updated {} changed file{} before {}",
index_reason_detail(target, reason),
changed,
if changed == 1 { "" } else { "s" },
packet_label
)
}
fn index_refresh_fallback_diagnostic(
target: &SearchIndexTarget,
reason: RebuildSearchReason,
err: &anyhow::Error,
packet_label: &str,
) -> String {
format!(
"{}; could not refresh before {}: {err:#}; falling back to raw source file nodes",
index_reason_detail(target, reason),
packet_label
)
}
fn graph_fallback_source_root(root: &Path, path_hint: &Path, scope: Option<&str>) -> PathBuf {
if let Some(scope_name) = scope
&& let Ok(scope) = config::Config::resolve_submodule(root, scope_name)
{
return scope.source_root;
}
if let Ok(Some(scope)) = config::Config::infer_submodule_from_path(root, path_hint) {
return scope.source_root;
}
if let Ok(Some(scope)) = infer_agent_doc_task_submodule(root, path_hint) {
return scope.source_root;
}
root.to_path_buf()
}
fn prepare_agent_doc_index_gate(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
packet_label: &str,
) -> AgentDocIndexGate {
let fallback_source_root = graph_fallback_source_root(root, path_hint, scope);
let targets = match resolve_search_index_targets(root, path_hint, scope, false) {
Ok(targets) => targets,
Err(err) => {
return AgentDocIndexGate {
db_path: None,
source_root: fallback_source_root,
diagnostics: vec![format!(
"code index unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
)],
};
}
};
let Some(target) = targets.into_iter().next() else {
return AgentDocIndexGate {
db_path: None,
source_root: fallback_source_root,
diagnostics: vec![format!(
"code index unavailable before {packet_label}: no index target resolved; falling back to raw source file nodes"
)],
};
};
let state = match inspect_search_index(&target) {
Ok(state) => state,
Err(err) => {
return AgentDocIndexGate {
db_path: None,
source_root: target.source_root,
diagnostics: vec![format!(
"code index freshness unavailable before {packet_label}: {err:#}; falling back to raw source file nodes"
)],
};
}
};
let Some(reason) = index_reason_for_state(state) else {
return AgentDocIndexGate {
db_path: Some(target.db_path),
source_root: target.source_root,
diagnostics: Vec::new(),
};
};
match apply_search_index_update(root, &target) {
Ok(summary) => {
// #gdbgatecold: the index was just rewritten, so any cached
// pre-refresh inspection result for this scope (held by the
// active `InspectScopeGuard`) is stale. Drop it so the next
// `inspect_read_only` re-reads the fresh index.
index::inspect_scope_invalidate_all();
let diagnostics = vec![index_refresh_diagnostic(
&target,
reason,
&summary,
packet_label,
)];
AgentDocIndexGate {
db_path: Some(target.db_path),
source_root: target.source_root,
diagnostics,
}
}
Err(err) => {
let diagnostics = vec![index_refresh_fallback_diagnostic(
&target,
reason,
&err,
packet_label,
)];
AgentDocIndexGate {
db_path: None,
source_root: target.source_root,
diagnostics,
}
}
}
}
fn add_raw_source_file_nodes(
root: &Path,
source_root: &Path,
graph: &mut TraversalGraphBuild,
file_entries: &mut Vec<TraversalFileIndexEntry>,
) -> Result<()> {
let mut entries = walk::walk_files(source_root)?;
entries.sort_by(|left, right| left.path.cmp(&right.path));
for entry in entries {
let file = entry.path.to_string_lossy();
let node = traversal_raw_source_file_node(root, file.as_ref());
let entry = TraversalFileIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
graph.add_node(node);
file_entries.push(entry);
}
Ok(())
}
fn build_traversal_graph_source_with_options(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<TraversalGraphBuild> {
let mut graph = TraversalGraphBuild::default();
let mut symbol_entries = Vec::new();
let mut file_entries = Vec::new();
let mut route_entries = Vec::new();
let bounded_session_projection = hinted_markdown_file(root, path_hint).is_some();
if !session_only || hinted_markdown_file(root, path_hint).is_none() {
let (gate, _cache_detail) =
prepare_agent_doc_index_gate_cached(root, path_hint, scope, "graph traversal packet");
graph.warnings.extend(gate.diagnostics);
match gate.db_path {
Some(db_path) if db_path.exists() => {
let db = index::IndexDb::open_read_only_resilient(&db_path)?;
let file_paths = db.file_paths()?;
let mut file_handle_by_path = HashMap::<String, String>::new();
for file in file_paths {
if traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&file),
) {
continue;
}
let node = traversal_file_node(root, &file);
let entry = TraversalFileIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
if let Some(path) = entry.node.path.as_ref() {
file_handle_by_path.insert(path.clone(), entry.handle.clone());
}
graph.add_node(node);
file_entries.push(entry);
}
let symbols = db.all_symbols()?;
let mut symbol_by_file_name_line = HashMap::new();
let mut first_symbol_by_name = BTreeMap::<String, String>::new();
for symbol in symbols.iter().filter(|symbol| {
!traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&symbol.file),
)
}) {
let node = traversal_symbol_node(root, symbol);
let file = relativize(&symbol.file, root);
symbol_by_file_name_line.insert(
format!("{file}:{}:{}", symbol.line, symbol.name),
node.handle.clone(),
);
first_symbol_by_name
.entry(symbol.name.clone())
.or_insert_with(|| node.handle.clone());
let entry = TraversalSymbolIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
graph.add_node(node.clone());
if let Some(file_handle) = file_handle_by_path.get(&file) {
graph.add_edge(
file_handle,
&node.handle,
"defines",
Some("file defines symbol".to_string()),
1,
);
}
symbol_entries.push(entry);
}
if !bounded_session_projection {
for edge in db.all_stored_edges()? {
if traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&edge.caller_file),
) {
continue;
}
let caller_file = relativize(&edge.caller_file, root);
let caller_key =
format!("{caller_file}:{}:{}", edge.caller_line, edge.caller_name);
let Some(caller_handle) =
symbol_by_file_name_line.get(&caller_key).cloned()
else {
continue;
};
let callee_handle = if let Some(handle) =
first_symbol_by_name.get(&edge.callee_name)
{
handle.clone()
} else {
let node = traversal_unresolved_symbol_node(root, &edge.callee_name);
let handle = node.handle.clone();
graph.add_node(node);
handle
};
graph.add_edge(
&caller_handle,
&callee_handle,
"calls",
Some(format!("call site {}:{}", caller_file, edge.call_site_line)),
1,
);
}
}
for route in db.all_routes()? {
if traversal_path_is_generated_artifact(
root,
&gate.source_root,
Path::new(&route.file),
) {
continue;
}
let node = traversal_route_node(root, &route);
let entry = TraversalRouteIndexEntry {
handle: node.handle.clone(),
tokens: traversal_node_tokens(&node),
node: node.clone(),
};
graph.add_node(node.clone());
if let Some(path) = node.path.as_ref()
&& let Some(file_handle) = file_handle_by_path.get(path)
{
graph.add_edge(
file_handle,
&node.handle,
"defines_route",
Some("file declares route".to_string()),
1,
);
}
let handler_handle =
if let Some(handle) = first_symbol_by_name.get(&route.handler_name) {
handle.clone()
} else {
let node = traversal_unresolved_symbol_node(root, &route.handler_name);
let handle = node.handle.clone();
graph.add_node(node);
handle
};
graph.add_edge(
&entry.handle,
&handler_handle,
"handled_by",
Some("route handler reference".to_string()),
1,
);
route_entries.push(entry);
}
}
_ => {
add_raw_source_file_nodes(root, &gate.source_root, &mut graph, &mut file_entries)
.with_context(|| {
format!(
"loading raw source fallback nodes from {}",
gate.source_root.display()
)
})?;
}
}
}
let code_lookup = TraversalCodeLookup::new(&symbol_entries, &file_entries, &route_entries);
load_agent_doc_traversal_nodes(root, path_hint, &mut graph, &code_lookup)?;
Ok(graph)
}
#[cfg(test)]
fn build_traversal_graph_source(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<TraversalGraphBuild> {
build_traversal_graph_source_with_options(root, path_hint, scope, false)
}
pub(crate) fn write_traversal_graph_store_with_options(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
let source_graph =
build_traversal_graph_source_with_options(root, path_hint, scope, session_only)?;
let projection = traversal_projection_from_graph(root, scope, &source_graph)?;
let graph_db = graph_substrate_db_path(root, scope);
let mut store = SqliteGraphStore::open(&graph_db)?;
let source_watermark = traversal_source_watermark(root, path_hint, scope, session_only)
.ok()
.flatten()
.or_else(|| graph_projection_content_hash(&projection));
let refresh = store.replace_projection_with_version(
scope.unwrap_or("root"),
&projection,
Some(GRAPH_PROJECTION_VERSION),
source_watermark,
)?;
Ok((source_graph, refresh))
}
pub(crate) fn write_traversal_graph_store(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
write_traversal_graph_store_with_options(root, path_hint, scope, false)
}
fn refresh_traversal_graph_store_with_options(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
session_only: bool,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
let (source_graph, refresh) =
write_traversal_graph_store_with_options(root, path_hint, scope, session_only)?;
let graph_db = graph_substrate_db_path(root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let mut graph = traversal_graph_from_store(root, &store)?;
graph.warnings = source_graph.warnings;
Ok((graph, refresh))
}
fn refresh_traversal_graph_store(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<(TraversalGraphBuild, SqliteProjectionRefresh)> {
refresh_traversal_graph_store_with_options(root, path_hint, scope, false)
}
pub(crate) fn build_traversal_graph(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
) -> Result<TraversalGraphBuild> {
let (graph, _refresh) = refresh_traversal_graph_store(root, path_hint, scope)?;
Ok(graph)
}
fn traversal_query_kind_priority(kind: &str) -> usize {
match kind {
"backlog" => 0,
"job_packet" => 1,
"worker_result" => 2,
"symbol" => 3,
"file" => 4,
"route" => 5,
"session" => 6,
"semantic_concept" => 7,
"semantic_entity" => 8,
_ => 9,
}
}
fn traversal_node_match_rank(node: &TraversalNode, query: &str) -> Option<(usize, usize, String)> {
let trimmed = query.trim();
if trimmed.is_empty() {
return None;
}
let kind_priority = traversal_query_kind_priority(&node.kind);
if node.handle == trimmed {
return Some((0, kind_priority, node.handle.clone()));
}
if node.path.as_deref() == Some(trimmed) {
let path_priority = if node.kind == "file" {
0
} else {
kind_priority.saturating_add(1)
};
return Some((1, path_priority, node.handle.clone()));
}
let normalized_backlog = trimmed.trim_start_matches('#');
if node.ref_id.as_deref() == Some(trimmed) || node.ref_id.as_deref() == Some(normalized_backlog)
{
return Some((2, kind_priority, node.handle.clone()));
}
if node.label == trimmed || (node.kind == "symbol" && node.label == normalized_backlog) {
return Some((3, kind_priority, node.handle.clone()));
}
None
}
fn resolve_traversal_node<'a>(
graph: &'a TraversalGraphBuild,
query: &str,
) -> Option<&'a TraversalNode> {
graph
.nodes
.values()
.filter_map(|node| traversal_node_match_rank(node, query).map(|rank| (rank, node)))
.min_by(|(left_rank, _), (right_rank, _)| left_rank.cmp(right_rank))
.map(|(_, node)| node)
}
fn traversal_adjacency(edges: &[TraversalEdge]) -> BTreeMap<String, Vec<String>> {
let mut adj = BTreeMap::<String, BTreeSet<String>>::new();
for edge in edges {
adj.entry(edge.from.clone())
.or_default()
.insert(edge.to.clone());
adj.entry(edge.to.clone())
.or_default()
.insert(edge.from.clone());
}
adj.into_iter()
.map(|(node, neighbors)| (node, neighbors.into_iter().collect()))
.collect()
}
fn traversal_shortest_handles(
edges: &[TraversalEdge],
from: &str,
to: &str,
) -> Option<Vec<String>> {
if from == to {
return Some(vec![from.to_string()]);
}
let adj = traversal_adjacency(edges);
if !adj.contains_key(from) || !adj.contains_key(to) {
return None;
}
let mut visited = BTreeSet::new();
let mut queue = VecDeque::new();
let mut parent = BTreeMap::<String, String>::new();
visited.insert(from.to_string());
queue.push_back(from.to_string());
while let Some(current) = queue.pop_front() {
if let Some(neighbors) = adj.get(¤t) {
for neighbor in neighbors {
if visited.insert(neighbor.clone()) {
parent.insert(neighbor.clone(), current.clone());
if neighbor == to {
let mut path = vec![to.to_string()];
let mut cursor = to.to_string();
while let Some(prev) = parent.get(&cursor) {
path.push(prev.clone());
cursor = prev.clone();
}
path.reverse();
return Some(path);
}
queue.push_back(neighbor.clone());
}
}
}
}
None
}
fn traversal_scored_neighbors(edges: &[TraversalEdge], current: &str) -> Vec<String> {
let mut best_score_by_neighbor = BTreeMap::<String, usize>::new();
for edge in edges {
let neighbor = if edge.from == current {
edge.to.as_str()
} else if edge.to == current {
edge.from.as_str()
} else {
continue;
};
let score = traversal_relation_score(edge, current);
best_score_by_neighbor
.entry(neighbor.to_string())
.and_modify(|best| *best = (*best).max(score))
.or_insert(score);
}
let mut ranked = best_score_by_neighbor.into_iter().collect::<Vec<_>>();
ranked.sort_by(|(left_handle, left_score), (right_handle, right_score)| {
right_score
.cmp(left_score)
.then_with(|| left_handle.cmp(right_handle))
});
ranked.into_iter().map(|(handle, _)| handle).collect()
}
fn traversal_neighborhood_handles(
edges: &[TraversalEdge],
origin: &str,
depth: usize,
limit: usize,
) -> BTreeSet<String> {
let mut seen = BTreeSet::new();
let mut queue = VecDeque::new();
seen.insert(origin.to_string());
queue.push_back((origin.to_string(), 0usize));
while let Some((current, current_depth)) = queue.pop_front() {
if current_depth >= depth {
continue;
}
for neighbor in traversal_scored_neighbors(edges, ¤t) {
if limit > 0 && seen.len() >= limit {
return seen;
}
if seen.insert(neighbor.clone()) {
queue.push_back((neighbor, current_depth + 1));
}
}
}
seen
}
fn traversal_edges_between(
handles: &BTreeSet<String>,
edges: &[TraversalEdge],
) -> Vec<TraversalEdge> {
edges
.iter()
.filter(|edge| handles.contains(&edge.from) && handles.contains(&edge.to))
.cloned()
.collect()
}
fn traversal_path_edges(path: &[String], edges: &[TraversalEdge]) -> Vec<TraversalEdge> {
let mut result = Vec::new();
for pair in path.windows(2) {
if let Some(edge) = edges.iter().find(|edge| {
(edge.from == pair[0] && edge.to == pair[1])
|| (edge.from == pair[1] && edge.to == pair[0])
}) {
result.push(edge.clone());
}
}
result
}
fn sorted_traversal_nodes<'a>(
nodes: impl IntoIterator<Item = &'a TraversalNode>,
) -> Vec<TraversalNode> {
let mut nodes = nodes.into_iter().cloned().collect::<Vec<_>>();
nodes.sort_by(|left, right| {
left.kind
.cmp(&right.kind)
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.path.cmp(&right.path))
.then_with(|| left.handle.cmp(&right.handle))
});
nodes
}
fn traversal_relation_score(edge: &TraversalEdge, origin: &str) -> usize {
let base = match edge.relation.as_str() {
"mentions" => 100,
"contains" => 80,
"calls" => {
if edge.from == origin {
70
} else {
65
}
}
"handled_by" => 68,
"defines_route" => 62,
"mentions_concept" | "mentions_entity" => 66,
"semantic_relation" => 64,
"tagged_concept" | "related_concept" => 58,
"defines" => {
if edge.from == origin {
60
} else {
55
}
}
_ => 10,
};
base + edge.weight
}
fn traversal_recommendation_reason(edge: &TraversalEdge, origin: &str) -> String {
match edge.relation.as_str() {
"mentions" => "matched from backlog/session text".to_string(),
"contains" => "contained in the selected session artifact".to_string(),
"defines" if edge.from == origin => "symbol defined in selected file".to_string(),
"defines" => "file that defines the selected symbol".to_string(),
"defines_route" if edge.from == origin => "route declared in selected file".to_string(),
"defines_route" => "file that declares the selected route".to_string(),
"handled_by" if edge.from == origin => "handler for the selected route".to_string(),
"handled_by" => "route handled by the selected symbol".to_string(),
"mentions_concept" => "cached summary concept for the selected source".to_string(),
"mentions_entity" => "cached summary entity for the selected source".to_string(),
"semantic_relation" => "LLM-extracted semantic relationship".to_string(),
"tagged_concept" => "concept label attached to the selected entity".to_string(),
"related_concept" => "co-occurring cached summary concept".to_string(),
"calls" if edge.from == origin => "callee from the selected symbol".to_string(),
"calls" => "caller of the selected symbol".to_string(),
other => format!("connected by {other}"),
}
}
fn traversal_recommendations(
graph: &TraversalGraphBuild,
origin: Option<&str>,
shortest_path: Option<&[String]>,
limit: usize,
) -> Vec<TraversalRecommendation> {
let Some(origin) = origin else {
return Vec::new();
};
let mut recommendations = Vec::new();
let mut seen = BTreeSet::new();
if let Some(path) = shortest_path
&& path.len() > 1
&& path.first().is_some_and(|handle| handle == origin)
&& let Some(next) = graph.nodes.get(&path[1])
{
seen.insert(next.handle.clone());
recommendations.push(TraversalRecommendation {
handle: next.handle.clone(),
kind: next.kind.clone(),
label: next.label.clone(),
reason: "next hop on shortest path".to_string(),
score: 1_000,
expand: next.expand.clone(),
});
}
let mut candidates = graph
.edges
.iter()
.filter_map(|edge| {
let neighbor = if edge.from == origin {
edge.to.as_str()
} else if edge.to == origin {
edge.from.as_str()
} else {
return None;
};
let node = graph.nodes.get(neighbor)?;
Some((traversal_relation_score(edge, origin), edge, node))
})
.collect::<Vec<_>>();
candidates.sort_by(|(left_score, _, left), (right_score, _, right)| {
right_score
.cmp(left_score)
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.handle.cmp(&right.handle))
});
let max = if limit == 0 { usize::MAX } else { limit };
for (score, edge, node) in candidates {
if recommendations.len() >= max {
break;
}
if seen.insert(node.handle.clone()) {
recommendations.push(TraversalRecommendation {
handle: node.handle.clone(),
kind: node.kind.clone(),
label: node.label.clone(),
reason: traversal_recommendation_reason(edge, origin),
score,
expand: node.expand.clone(),
});
}
}
recommendations
}
fn exploration_budget_for_counts(nodes: usize, edges: usize) -> ExplorationBudget {
let scale = nodes.saturating_add(edges);
if scale <= 80 {
ExplorationBudget {
project_size: "small".to_string(),
max_source_windows: 8,
lines_per_window: 96,
relationship_limit: 40,
}
} else if scale <= 800 {
ExplorationBudget {
project_size: "medium".to_string(),
max_source_windows: 6,
lines_per_window: 80,
relationship_limit: 32,
}
} else {
ExplorationBudget {
project_size: "large".to_string(),
max_source_windows: 4,
lines_per_window: 64,
relationship_limit: 24,
}
}
}
fn exploration_node_label(node: &TraversalNode) -> String {
format!("{}:{}", node.kind, node.label)
}
fn exploration_source_window_for_node(
root: &Path,
node: &TraversalNode,
budget: &ExplorationBudget,
) -> Option<ExplorationSourceWindow> {
let file = node.path.as_ref()?;
let anchor = node
.line
.and_then(|line| usize::try_from(line).ok())
.and_then(|line| line.checked_add(1))
.unwrap_or(1);
let context_before = budget.lines_per_window / 3;
let start = anchor.saturating_sub(context_before).max(1);
let end = start
.saturating_add(budget.lines_per_window)
.saturating_sub(1);
let handle = stable_handle("xwin", &format!("{file}:{start}:{end}:{}", node.handle));
Some(ExplorationSourceWindow {
handle,
file: file.clone(),
start,
end,
reason: format!("cluster around {}", exploration_node_label(node)),
expand: source_read_command(root, file, start, budget.lines_per_window),
})
}
fn build_exploration_packet(
root: &Path,
totals: &TraversalTotals,
selected_nodes: &[TraversalNode],
selected_edges: &[TraversalEdge],
) -> ExplorationPacket {
let budget = exploration_budget_for_counts(totals.nodes, totals.edges);
let node_by_handle = selected_nodes
.iter()
.map(|node| (node.handle.as_str(), node))
.collect::<BTreeMap<_, _>>();
let relationship_map = selected_edges
.iter()
.take(budget.relationship_limit)
.filter_map(|edge| {
let from = node_by_handle.get(edge.from.as_str())?;
let to = node_by_handle.get(edge.to.as_str())?;
Some(ExplorationRelation {
from: exploration_node_label(from),
relation: edge.relation.clone(),
to: exploration_node_label(to),
label: edge.label.clone(),
})
})
.collect::<Vec<_>>();
let mut seen_windows = BTreeSet::new();
let mut source_windows = Vec::new();
for node in selected_nodes {
if source_windows.len() >= budget.max_source_windows {
break;
}
let Some(window) = exploration_source_window_for_node(root, node, &budget) else {
continue;
};
let key = (window.file.clone(), window.start, window.end);
if seen_windows.insert(key) {
source_windows.push(window);
}
}
ExplorationPacket {
budget,
relationship_map,
source_windows,
worker_context: Vec::new(),
no_reread_guidance:
"Use the source_windows expand commands for line-numbered context; avoid whole-file reads unless the needed line is outside every listed window."
.to_string(),
}
}
pub(crate) fn traversal_report(
root: &Path,
scope: Option<&str>,
graph: TraversalGraphBuild,
query: Option<&str>,
target: Option<&str>,
depth: usize,
limit: usize,
) -> Result<TraversalReport> {
let totals = TraversalTotals {
nodes: graph.nodes.len(),
edges: graph.edges.len(),
};
let origin_node = query.and_then(|value| resolve_traversal_node(&graph, value));
let target_node = target.and_then(|value| resolve_traversal_node(&graph, value));
if let Some(query) = query
&& origin_node.is_none()
{
bail!("traversal node not found: {}", query);
}
if let Some(target) = target
&& target_node.is_none()
{
bail!("traversal target not found: {}", target);
}
let (mode, selected_nodes, selected_edges, shortest_path) =
if let (Some(origin), Some(target)) = (origin_node, target_node) {
if let Some(handles) =
traversal_shortest_handles(&graph.edges, &origin.handle, &target.handle)
{
let handle_set = handles.iter().cloned().collect::<BTreeSet<_>>();
let nodes = handles
.iter()
.filter_map(|handle| graph.nodes.get(handle).cloned())
.collect::<Vec<_>>();
let edges = traversal_path_edges(&handles, &graph.edges);
let path = TraversalPathReport {
from: origin.clone(),
to: target.clone(),
hops: handles.len().saturating_sub(1),
nodes: nodes.clone(),
edges: edges.clone(),
};
(
"path".to_string(),
nodes,
traversal_edges_between(&handle_set, &graph.edges),
Some(path),
)
} else {
(
"path".to_string(),
vec![origin.clone(), target.clone()],
Vec::new(),
None,
)
}
} else if let Some(origin) = origin_node {
let handles =
traversal_neighborhood_handles(&graph.edges, &origin.handle, depth, limit);
let nodes =
sorted_traversal_nodes(handles.iter().filter_map(|handle| graph.nodes.get(handle)));
let edges = traversal_edges_between(&handles, &graph.edges);
("neighborhood".to_string(), nodes, edges, None)
} else {
let mut nodes = sorted_traversal_nodes(graph.nodes.values());
let truncated_nodes = limit > 0 && nodes.len() > limit;
if truncated_nodes {
nodes.truncate(limit);
}
let handles = nodes
.iter()
.map(|node| node.handle.clone())
.collect::<BTreeSet<_>>();
let mut edges = traversal_edges_between(&handles, &graph.edges);
let truncated_edges = limit > 0 && edges.len() > limit;
if truncated_edges {
edges.truncate(limit);
}
("export".to_string(), nodes, edges, None)
};
let shortest_handles = shortest_path.as_ref().map(|path| {
path.nodes
.iter()
.map(|node| node.handle.clone())
.collect::<Vec<_>>()
});
let recommendations = traversal_recommendations(
&graph,
origin_node.map(|node| node.handle.as_str()),
shortest_handles.as_deref(),
if limit == 0 { 10 } else { limit.min(10) },
);
let exploration = build_exploration_packet(root, &totals, &selected_nodes, &selected_edges);
let truncated = selected_nodes.len() < totals.nodes || selected_edges.len() < totals.edges;
Ok(TraversalReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
mode,
totals,
query: query.map(str::to_string),
target: target.map(str::to_string),
nodes: selected_nodes,
edges: selected_edges,
shortest_path,
recommendations,
exploration,
truncated,
warnings: graph.warnings,
})
}
fn html_escape(input: &str) -> String {
input
.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
pub(crate) fn traversal_report_html(report: &TraversalReport) -> Result<String> {
let json = serde_json::to_string(report)?.replace("</", "<\\/");
let mut html = String::new();
html.push_str(
"<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift traversal graph</title>",
);
html.push_str(
r#"<style>
:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#ffffff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e;--semantic:#9a3412}
@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf;--semantic:#fb923c}}
*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.toolbar{display:flex;gap:8px;align-items:center}.toolbar input{min-width:220px;border:1px solid var(--line);border-radius:6px;background:var(--panel);color:var(--text);padding:8px 10px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 320px;gap:14px;min-height:650px}.graph-panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.graph-panel{position:relative}.legend{position:absolute;left:12px;top:12px;display:flex;flex-wrap:wrap;gap:6px;max-width:calc(100% - 24px)}.legend span{font-size:12px;background:color-mix(in srgb,var(--panel) 86%,transparent);border:1px solid var(--line);border-radius:999px;padding:4px 8px}.side{padding:14px;overflow:auto}.side h2{font-size:15px;margin:0 0 8px}.selected{border-top:1px solid var(--line);margin-top:12px;padding-top:12px}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px;cursor:pointer}.row:hover{border-color:var(--accent)}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted)}svg{width:100%;height:650px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.edge.semantic{stroke:var(--semantic);stroke-width:1.8}.node{stroke:var(--panel);stroke-width:2;cursor:pointer}.node.semantic{stroke:var(--semantic);stroke-width:2.5}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text);pointer-events:none}.hidden{display:none}@media(max-width:900px){.top{display:block}.toolbar{margin-top:12px}.layout{grid-template-columns:1fr}.side{max-height:360px}svg{height:560px}}
</style>"#,
);
html.push_str("</head><body>");
html.push_str("<div class=\"page\">");
html.push_str(&format!(
"<header class=\"top\"><div><h1>tsift traversal graph</h1><div class=\"meta\">mode <code>{}</code> | nodes <code>{}</code>/<code>{}</code> | edges <code>{}</code>/<code>{}</code></div></div><div class=\"toolbar\"><input id=\"filter\" type=\"search\" placeholder=\"Filter nodes\"></div></header>",
html_escape(&report.mode),
report.nodes.len(),
report.totals.nodes,
report.edges.len(),
report.totals.edges
));
html.push_str(
r#"<main class="layout"><section class="graph-panel"><div id="legend" class="legend"></div><svg id="graph-canvas" role="img" aria-label="Traversal graph"></svg></section><aside class="side"><h2>Nodes</h2><div id="node-list" class="list"></div><div id="selected" class="selected"></div></aside></main>"#,
);
html.push_str("<script id=\"graph-data\" type=\"application/json\">");
html.push_str(&json);
html.push_str(
r##"</script><script>
const report = JSON.parse(document.getElementById("graph-data").textContent);
const svg = document.getElementById("graph-canvas");
const list = document.getElementById("node-list");
const selected = document.getElementById("selected");
const filter = document.getElementById("filter");
const legend = document.getElementById("legend");
const nodes = report.nodes.map((node, index) => ({...node, index}));
const nodeByHandle = new Map(nodes.map(node => [node.handle, node]));
const edges = report.edges.filter(edge => nodeByHandle.has(edge.from) && nodeByHandle.has(edge.to));
const colorByKind = new Map([
["file", "#2563eb"], ["symbol", "#16a34a"], ["route", "#7c3aed"],
["session", "#0891b2"], ["backlog", "#dc2626"], ["job_packet", "#ea580c"],
["semantic_concept", "#9a3412"], ["semantic_entity", "#b45309"],
["source_handle", "#64748b"], ["worker_context", "#475569"], ["worker_result", "#15803d"]
]);
function color(kind){ return colorByKind.get(kind) || "#6b7280"; }
function isSemantic(edge){ return edge.relation.includes("concept") || edge.relation.includes("entity") || edge.relation.includes("semantic"); }
function text(value){ return value == null ? "" : String(value); }
function matches(node, query){
if (!query) return true;
const haystack = [node.kind,node.label,node.handle,node.ref_id,node.path,node.detail].map(text).join(" ").toLowerCase();
return haystack.includes(query);
}
function layout(){
const rect = svg.getBoundingClientRect();
const width = rect.width || 900;
const height = rect.height || 650;
const cx = width / 2;
const cy = height / 2;
const kinds = [...new Set(nodes.map(node => node.kind))].sort();
const counts = new Map();
for (const node of nodes) counts.set(node.kind, (counts.get(node.kind) || 0) + 1);
const offsets = new Map();
for (const node of nodes) {
const group = kinds.indexOf(node.kind);
const index = offsets.get(node.kind) || 0;
offsets.set(node.kind, index + 1);
const groupCount = counts.get(node.kind) || 1;
const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
const angle = (Math.PI * 2 * index / Math.max(groupCount, 1)) + (group * 0.47);
node.x = cx + Math.cos(angle) * ring;
node.y = cy + Math.sin(angle) * ring;
}
}
function draw(){
const query = filter.value.trim().toLowerCase();
const visible = new Set(nodes.filter(node => matches(node, query)).map(node => node.handle));
svg.innerHTML = "";
for (const edge of edges) {
if (!visible.has(edge.from) || !visible.has(edge.to)) continue;
const from = nodeByHandle.get(edge.from);
const to = nodeByHandle.get(edge.to);
const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
line.setAttribute("class", "edge" + (isSemantic(edge) ? " semantic" : ""));
line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.relation + (edge.label ? ": " + edge.label : "");
svg.appendChild(line);
}
for (const node of nodes) {
if (!visible.has(node.handle)) continue;
const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
circle.setAttribute("fill", color(node.kind));
circle.setAttribute("class", "node" + (node.kind.startsWith("semantic_") ? " semantic" : ""));
circle.addEventListener("click", () => selectNode(node));
circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
svg.appendChild(circle);
const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
label.setAttribute("class", "node-label");
label.textContent = node.label.length > 34 ? node.label.slice(0, 31) + "..." : node.label;
svg.appendChild(label);
}
renderList(query);
}
function renderLegend(){
const kinds = [...new Set(nodes.map(node => node.kind))].sort();
legend.innerHTML = kinds.map(kind => `<span><b style="color:${color(kind)}">●</b> ${kind}</span>`).join("");
}
function renderList(query){
const rows = nodes.filter(node => matches(node, query)).slice(0, 120);
list.innerHTML = rows.map(node => `<div class="row" data-handle="${node.handle}"><div class="kind">${node.kind}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${node.handle}</div></div>`).join("");
for (const row of list.querySelectorAll(".row")) {
row.addEventListener("click", () => selectNode(nodeByHandle.get(row.dataset.handle)));
}
}
function selectNode(node){
const adjacent = edges.filter(edge => edge.from === node.handle || edge.to === node.handle).slice(0, 20);
selected.innerHTML = `<h2>${escapeHtml(node.label)}</h2><div class="kind">${node.kind}</div><p class="handle">${node.handle}</p>${node.path ? `<p>${escapeHtml(node.path)}${node.line != null ? ":" + node.line : ""}</p>` : ""}${node.detail ? `<p>${escapeHtml(node.detail)}</p>` : ""}<p><code>${escapeHtml(node.expand)}</code></p><h2>Edges</h2><div class="list">${adjacent.map(edge => `<div class="row"><div class="kind">${edge.relation}</div><div>${escapeHtml(edge.from)} -> ${escapeHtml(edge.to)}</div>${edge.label ? `<div>${escapeHtml(edge.label)}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No visible edges.</div>"}</div>`;
}
function escapeHtml(value){
return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));
}
filter.addEventListener("input", draw);
window.addEventListener("resize", () => { layout(); draw(); });
renderLegend();
layout();
draw();
if (nodes.length) selectNode(nodes[0]);
</script></div></body></html>"##,
);
Ok(html)
}
fn semantic_related_report_from_store(
root: &Path,
scope: Option<&str>,
query: &str,
limit: usize,
kind: SemanticRelatedKind,
store: &impl GraphStore,
) -> Result<SemanticRelatedReport> {
if query.trim().is_empty() {
bail!("semantic query cannot be empty");
}
let query_embedding = semantic_embedding(query);
let node_kinds: &[&str] = match kind {
SemanticRelatedKind::Concept => &["semantic_concept"],
SemanticRelatedKind::Entity => &["semantic_entity"],
SemanticRelatedKind::All => &["semantic_concept", "semantic_entity"],
};
let mut items = Vec::new();
for node_kind in node_kinds {
for node in store.nodes_by_kind(node_kind)? {
let Some(embedding) = node
.properties
.get("embedding")
.and_then(|value| parse_semantic_embedding_property(value))
else {
continue;
};
let score = semantic_cosine(&query_embedding, &embedding);
items.push(SemanticRelatedItem {
handle: node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone()),
kind: node.kind,
label: node.label,
score,
file_path: node
.properties
.get("source_file")
.or_else(|| node.properties.get("path"))
.cloned(),
source_symbol: node.properties.get("source_symbol").cloned(),
detail: node
.properties
.get("description")
.or_else(|| node.properties.get("detail"))
.cloned(),
expand: node
.properties
.get("expand")
.cloned()
.unwrap_or_else(|| traversal_expand_command(root, &node.id)),
});
}
}
items.sort_by(|left, right| {
right
.score
.partial_cmp(&left.score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.label.cmp(&right.label))
.then_with(|| left.handle.cmp(&right.handle))
});
if limit > 0 && items.len() > limit {
items.truncate(limit);
}
let mut warnings = Vec::new();
if items.is_empty() {
warnings.push(
"no semantic graph rows found; run `tsift summarize --extract <path>` first"
.to_string(),
);
}
Ok(SemanticRelatedReport {
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
query: query.to_string(),
embedding_model: SEMANTIC_EMBEDDING_MODEL.to_string(),
count: items.len(),
items,
warnings,
})
}
fn graph_db_semantic_edge_scan_cap(limit: usize) -> usize {
if limit == 0 {
return 0;
}
limit.saturating_mul(4).clamp(
GRAPH_DB_SEMANTIC_MIN_EDGE_SCAN_CAP,
GRAPH_DB_SEMANTIC_MAX_EDGE_SCAN_CAP,
)
}
fn graph_db_semantic_node_discovery_cap(seed_count: usize, limit: usize) -> usize {
if limit == 0 {
return usize::MAX;
}
limit.saturating_mul(3).max(limit).max(seed_count)
}
fn graph_db_semantic_edge_other_id<'a>(
edge: &'a SubstrateGraphEdge,
current_id: &str,
) -> Option<&'a str> {
if edge.from_id == current_id {
Some(edge.to_id.as_str())
} else if edge.to_id == current_id {
Some(edge.from_id.as_str())
} else {
None
}
}
fn graph_db_semantic_edge_score(edge: &SubstrateGraphEdge, current_id: &str) -> i64 {
let mut score = resolution::edge_kind_rank_score(&edge.kind).saturating_mul(10);
score += if edge.from_id == current_id { 8 } else { 4 };
score += match edge.kind.as_str() {
"mentions_concept" | "mentions_entity" | "tagged_concept" | "tagged_entity"
| "related_concept" => 30,
"semantic_relation" => 28,
"calls" => 24,
"mentions" => 22,
"requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
"defines" | "contains" | "belongs_to" => 12,
_ => 0,
};
score
}
fn graph_db_semantic_seeded_neighborhood(
store: &impl GraphStore,
seed_ids: &[String],
depth: usize,
limit: usize,
) -> Result<GraphDbSemanticSeededSubgraph> {
let seed_rank = seed_ids
.iter()
.enumerate()
.map(|(idx, seed)| (seed.clone(), idx))
.collect::<BTreeMap<_, _>>();
let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
let mut edges = BTreeMap::<String, SubstrateGraphEdge>::new();
let mut node_score_by_id = BTreeMap::<String, i64>::new();
let mut queue = VecDeque::<(String, usize)>::new();
let mut seen_at_depth = BTreeMap::<String, usize>::new();
let edge_scan_cap = graph_db_semantic_edge_scan_cap(limit);
let node_discovery_cap = graph_db_semantic_node_discovery_cap(seed_ids.len(), limit);
let mut skipped_by_edge_cap = 0usize;
let mut skipped_by_node_cap = 0usize;
let mut diagnostics = vec![
"semantic-seeded retrieval uses phrase similarity to pick graph seeds".to_string(),
"seed expansion traverses both outgoing and incident edges so code, markdown, conversation, and memory adapters can link into semantic rows without reversing their edge direction".to_string(),
format!(
"seed expansion ranks incident/outgoing edges before caps; per-node edge scan cap={} node discovery cap={}",
if edge_scan_cap == 0 {
"unbounded".to_string()
} else {
edge_scan_cap.to_string()
},
if node_discovery_cap == usize::MAX {
"unbounded".to_string()
} else {
node_discovery_cap.to_string()
}
),
];
for (idx, seed_id) in seed_ids.iter().enumerate() {
if let Some(node) = store.node(seed_id)? {
nodes.entry(seed_id.clone()).or_insert(node);
node_score_by_id
.entry(seed_id.clone())
.or_insert(1_000_000i64.saturating_sub(idx as i64));
queue.push_back((seed_id.clone(), 0));
seen_at_depth.entry(seed_id.clone()).or_insert(0);
} else {
diagnostics.push(format!(
"semantic seed {seed_id} was not present in the graph store"
));
}
}
while let Some((current_id, current_depth)) = queue.pop_front() {
if current_depth >= depth {
continue;
}
let mut expansion_edges_by_key = BTreeMap::<String, SubstrateGraphEdge>::new();
for edge in store.outgoing_edges(¤t_id, None)? {
expansion_edges_by_key
.entry(graph_db_edge_key(&edge))
.or_insert(edge);
}
for edge in store.incident_edges(¤t_id, None)? {
expansion_edges_by_key
.entry(graph_db_edge_key(&edge))
.or_insert(edge);
}
let mut expansion_edges = expansion_edges_by_key.into_values().collect::<Vec<_>>();
expansion_edges.sort_by(|left, right| {
graph_db_semantic_edge_score(right, ¤t_id)
.cmp(&graph_db_semantic_edge_score(left, ¤t_id))
.then_with(|| graph_db_edge_key(left).cmp(&graph_db_edge_key(right)))
});
if edge_scan_cap > 0 && expansion_edges.len() > edge_scan_cap {
skipped_by_edge_cap += expansion_edges.len() - edge_scan_cap;
expansion_edges.truncate(edge_scan_cap);
}
for edge in expansion_edges {
let Some(other_id) = graph_db_semantic_edge_other_id(&edge, ¤t_id) else {
continue;
};
let other_known = nodes.contains_key(other_id);
if !other_known && nodes.len() >= node_discovery_cap {
skipped_by_node_cap += 1;
continue;
}
let other_id = other_id.to_string();
let edge_score = graph_db_semantic_edge_score(&edge, ¤t_id)
.saturating_add((depth.saturating_sub(current_depth) as i64).saturating_mul(5));
node_score_by_id
.entry(other_id.clone())
.and_modify(|score| *score = (*score).max(edge_score))
.or_insert(edge_score);
let edge_key = graph_db_edge_key(&edge);
edges.entry(edge_key).or_insert_with(|| edge.clone());
if let std::collections::btree_map::Entry::Vacant(entry) = nodes.entry(other_id.clone())
&& let Some(node) = store.node(&other_id)?
{
entry.insert(node);
}
if !nodes.contains_key(&other_id) {
continue;
}
let next_depth = current_depth + 1;
let should_queue = seen_at_depth
.get(&other_id)
.is_none_or(|seen_depth| next_depth < *seen_depth);
if should_queue {
seen_at_depth.insert(other_id.clone(), next_depth);
queue.push_back((other_id, next_depth));
}
}
}
if skipped_by_edge_cap > 0 {
diagnostics.push(format!(
"semantic-seeded expansion skipped {skipped_by_edge_cap} lower-scoring incident/outgoing edge(s) after per-node caps"
));
}
if skipped_by_node_cap > 0 {
diagnostics.push(format!(
"semantic-seeded expansion skipped {skipped_by_node_cap} lower-scoring node discovery edge(s) after the discovery cap"
));
}
let mut nodes = nodes.into_values().collect::<Vec<_>>();
nodes.sort_by(|left, right| {
seed_rank
.get(&left.id)
.copied()
.unwrap_or(usize::MAX)
.cmp(&seed_rank.get(&right.id).copied().unwrap_or(usize::MAX))
.then_with(|| {
node_score_by_id
.get(&right.id)
.copied()
.unwrap_or_default()
.cmp(&node_score_by_id.get(&left.id).copied().unwrap_or_default())
})
.then(left.id.cmp(&right.id))
});
let before_limit = nodes.len();
let truncated = limit > 0 && nodes.len() > limit;
if truncated {
nodes.truncate(limit);
diagnostics.push(format!(
"semantic-seeded neighborhood truncated from {before_limit} to {limit} node(s)"
));
}
let node_ids = nodes
.iter()
.map(|node| node.id.as_str())
.collect::<BTreeSet<_>>();
let mut edges = edges
.into_values()
.filter(|edge| {
node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
})
.collect::<Vec<_>>();
edges.sort_by_key(graph_db_edge_key);
Ok(GraphDbSemanticSeededSubgraph {
nodes,
edges,
truncated,
diagnostics,
})
}
#[allow(clippy::too_many_arguments)]
fn cmd_semantic_related(
query: &str,
path: &Path,
scope: Option<&str>,
limit: usize,
kind: SemanticRelatedKind,
json_output: bool,
compact: bool,
pretty: bool,
terse: bool,
schema: bool,
) -> Result<()> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
write_traversal_graph_store(&root, path, scope)?;
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)?;
let mut report = semantic_related_report_from_store(&root, scope, query, limit, kind, &store)?;
if let Some(recovery) = store.read_only_recovery() {
report
.warnings
.push(graph_db_read_recovery_diagnostic(recovery));
}
if json_output {
println!("{}", to_json_schema(&report, pretty, terse, schema)?);
} else if compact {
for item in &report.items {
println!(
"{:.3}\t{}\t{}\t{}",
item.score, item.kind, item.label, item.handle
);
}
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
} else {
println!(
"Related semantic graph rows for {:?} ({})",
report.query, report.embedding_model
);
for item in &report.items {
println!(
" {:.3} [{}] {} ({})",
item.score, item.kind, item.label, item.handle
);
if let Some(detail) = &item.detail {
println!(" {}", detail);
}
if let Some(file_path) = &item.file_path {
println!(" file: {}", file_path);
}
println!(" expand: {}", item.expand);
}
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
}
Ok(())
}
#[derive(Serialize)]
struct SourceLinePreview {
line: usize,
text: String,
}
#[derive(Serialize)]
struct SourceRangePreview {
start: usize,
end: usize,
total_lines: usize,
truncated_before: bool,
truncated_after: bool,
}
#[derive(Serialize)]
struct SourceExpandCommands {
#[serde(skip_serializing_if = "Option::is_none")]
before: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
after: Option<String>,
file: String,
}
#[derive(Serialize)]
struct SourceSymbolRef {
handle: String,
name: String,
kind: String,
language: String,
file: String,
line: usize,
#[serde(skip_serializing_if = "Option::is_none")]
end_line: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
signature: Option<String>,
expand: String,
}
#[derive(Serialize)]
struct SourceSummaryRef {
handle: String,
symbol_name: String,
file_path: String,
summary: String,
expand: String,
}
#[derive(Serialize)]
struct SourceReadReport {
handle: String,
root: String,
file: String,
range: SourceRangePreview,
preview: Vec<SourceLinePreview>,
symbols: Vec<SourceSymbolRef>,
summaries: Vec<SourceSummaryRef>,
expand: SourceExpandCommands,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
fn resolve_source_file(root: &Path, file: &Path) -> Result<PathBuf> {
let candidate = if file.is_absolute() {
file.to_path_buf()
} else {
root.join(file)
};
let canonical = candidate
.canonicalize()
.with_context(|| format!("canonicalizing source file {}", candidate.display()))?;
if !canonical.is_file() {
bail!("source file is not a regular file: {}", canonical.display());
}
let canonical_root = root
.canonicalize()
.with_context(|| format!("canonicalizing project root {}", root.display()))?;
if !canonical.starts_with(&canonical_root) {
bail!(
"source file {} is outside project root {}",
canonical.display(),
canonical_root.display()
);
}
Ok(canonical)
}
fn source_read_command(root: &Path, file: &str, start: usize, lines: usize) -> String {
format!(
"tsift source-read {} --path {} --start {} --lines {} --budget normal",
shell_quote(file),
shell_quote(&root.to_string_lossy()),
start,
lines
)
}
fn source_symbol_expand_command(root: &Path, symbol: &str) -> String {
format!(
"tsift --envelope explain {} --path {} --budget normal",
shell_quote(symbol),
shell_quote(&root.to_string_lossy())
)
}
fn source_summary_expand_command(root: &Path, symbol: &str) -> String {
format!(
"tsift summarize {} --path {} --json",
shell_quote(symbol),
shell_quote(&root.to_string_lossy())
)
}
fn source_symbol_line(symbol: &index::StoredSymbol) -> usize {
usize::try_from(symbol.line)
.ok()
.and_then(|line| line.checked_add(1))
.unwrap_or(1)
}
fn source_symbol_end_line(symbol: &index::StoredSymbol) -> Option<usize> {
symbol
.end_line
.and_then(|line| usize::try_from(line).ok())
.and_then(|line| line.checked_add(1))
}
fn source_symbol_intersects(symbol: &index::StoredSymbol, start: usize, end: usize) -> bool {
if end == 0 {
return false;
}
let symbol_start = source_symbol_line(symbol);
let symbol_end = source_symbol_end_line(symbol).unwrap_or(symbol_start);
symbol_start <= end && symbol_end >= start
}
#[allow(clippy::too_many_arguments)]
fn load_source_symbols(
root: &Path,
file_abs: &Path,
file_display: &str,
scope: Option<&str>,
start: usize,
end: usize,
limit: usize,
max_bytes: usize,
warnings: &mut Vec<String>,
) -> Vec<SourceSymbolRef> {
let db_path = match resolve_query_db_path(root, file_abs, scope) {
Ok(path) => path,
Err(err) => {
warnings.push(format!("index refs unavailable: {err:#}"));
return Vec::new();
}
};
if !db_path.exists() {
warnings.push(format!(
"index refs unavailable: no index found at {}",
db_path.display()
));
return Vec::new();
}
let db = match index::IndexDb::open_read_only_resilient(&db_path) {
Ok(db) => db,
Err(err) => {
warnings.push(format!("index refs unavailable: {err:#}"));
return Vec::new();
}
};
let file_key = file_abs.to_string_lossy().to_string();
let symbols = match db.symbols_for_file(&file_key) {
Ok(symbols) => symbols,
Err(err) => {
warnings.push(format!("symbol refs unavailable: {err:#}"));
return Vec::new();
}
};
symbols
.into_iter()
.filter(|symbol| source_symbol_intersects(symbol, start, end))
.take(limit)
.map(|symbol| {
let line = source_symbol_line(&symbol);
let end_line = source_symbol_end_line(&symbol);
let handle = stable_handle(
"ssym",
&format!("{}:{}:{}", file_display, symbol.name, line),
);
SourceSymbolRef {
handle,
name: truncate_for_budget(&symbol.name, max_bytes),
kind: symbol.kind,
language: symbol.language,
file: file_display.to_string(),
line,
end_line,
signature: symbol
.signature
.map(|signature| truncate_for_budget(&signature, max_bytes)),
expand: source_symbol_expand_command(root, &symbol.name),
}
})
.collect()
}
fn load_source_summaries(
root: &Path,
file_display: &str,
limit: usize,
max_bytes: usize,
warnings: &mut Vec<String>,
) -> Vec<SourceSummaryRef> {
let db_path = root.join(".tsift/summaries.db");
if !db_path.exists() {
return Vec::new();
}
let db = match summarize::SummaryDb::open_read_only_resilient(&db_path) {
Ok(db) => db,
Err(err) => {
warnings.push(format!("summary refs unavailable: {err:#}"));
return Vec::new();
}
};
let summaries = match db.get_by_file(file_display) {
Ok(summaries) => summaries,
Err(err) => {
warnings.push(format!("summary refs unavailable: {err:#}"));
return Vec::new();
}
};
summaries
.into_iter()
.take(limit)
.map(|summary| SourceSummaryRef {
handle: stable_handle(
"sum",
&format!(
"{}:{}:{}",
summary.file_path, summary.symbol_name, summary.id
),
),
symbol_name: truncate_for_budget(&summary.symbol_name, max_bytes),
file_path: summary.file_path,
summary: truncate_for_budget(&summary.summary, max_bytes),
expand: source_summary_expand_command(root, &summary.symbol_name),
})
.collect()
}
#[allow(clippy::too_many_arguments)]
fn cmd_source_read(
file: &Path,
path: &Path,
start: usize,
lines: usize,
end: Option<usize>,
scope: Option<&str>,
format: OutputFormat,
absolute: bool,
budget: ResponseBudget,
) -> Result<()> {
if start == 0 {
bail!("--start is 1-based and must be greater than zero");
}
if lines == 0 {
bail!("--lines must be greater than zero");
}
if let Some(end) = end
&& end < start
{
bail!("--end must be greater than or equal to --start");
}
let root = lint::resolve_project_root_or_canonical_path(path)?;
let file_abs = resolve_source_file(&root, file)?;
let file_display = if absolute {
file_abs.to_string_lossy().to_string()
} else {
relativize_pathbuf(&file_abs, &root)
.to_string_lossy()
.to_string()
};
let source = fs::read(&file_abs).with_context(|| format!("reading {}", file_abs.display()))?;
let text = String::from_utf8_lossy(&source);
let all_lines: Vec<&str> = text.lines().collect();
let total_lines = all_lines.len();
if total_lines > 0 && start > total_lines {
bail!(
"--start {} is beyond end of {} ({} lines)",
start,
file_display,
total_lines
);
}
let requested_end = end.unwrap_or_else(|| start.saturating_add(lines).saturating_sub(1));
let end_line = requested_end.min(total_lines);
let max_bytes = budget.preview_bytes();
let preview = if total_lines == 0 {
Vec::new()
} else {
all_lines[(start - 1)..end_line]
.iter()
.enumerate()
.map(|(idx, line)| SourceLinePreview {
line: start + idx,
text: truncate_for_budget(line, max_bytes),
})
.collect()
};
let mut warnings = Vec::new();
let max_items = budget.preview_items();
let symbols = load_source_symbols(
&root,
&file_abs,
&file_display,
scope,
start,
end_line,
max_items,
max_bytes,
&mut warnings,
);
let summaries =
load_source_summaries(&root, &file_display, max_items, max_bytes, &mut warnings);
let effective_lines = end_line.saturating_sub(start).saturating_add(1).max(1);
let expand = SourceExpandCommands {
before: (start > 1).then(|| {
let before_start = start.saturating_sub(lines).max(1);
source_read_command(&root, &file_display, before_start, start - before_start)
}),
after: (end_line < total_lines)
.then(|| source_read_command(&root, &file_display, end_line + 1, lines)),
file: source_read_command(&root, &file_display, 1, total_lines.max(effective_lines)),
};
let report = SourceReadReport {
handle: stable_handle("swin", &format!("{file_display}:{start}:{end_line}")),
root: root.to_string_lossy().to_string(),
file: file_display,
range: SourceRangePreview {
start,
end: end_line,
total_lines,
truncated_before: start > 1,
truncated_after: end_line < total_lines,
},
preview,
symbols,
summaries,
expand,
warnings,
};
if format.json_output {
let truncated = report.range.truncated_before || report.range.truncated_after;
let follow_up = [
report.expand.before.clone(),
report.expand.after.clone(),
Some(report.expand.file.clone()),
]
.into_iter()
.flatten()
.collect::<Vec<_>>();
print_json_or_envelope(
&report,
&format,
"source-read",
"window",
ToolEnvelopeSummary {
text: format!(
"source window {}:{}-{}",
report.file, report.range.start, report.range.end
),
metrics: vec![
envelope_metric("lines", report.preview.len()),
envelope_metric("symbols", report.symbols.len()),
envelope_metric("summaries", report.summaries.len()),
],
},
truncated,
follow_up,
)?;
} else if format.compact {
println!(
"source {}:{}-{} / {} handle:{}",
report.file,
report.range.start,
report.range.end,
report.range.total_lines,
report.handle
);
for line in &report.preview {
println!("{:>5} {}", line.line, line.text);
}
if !report.symbols.is_empty() {
println!("syms[{}]:", report.symbols.len());
for symbol in &report.symbols {
println!(" {} {}:{}", symbol.name, symbol.file, symbol.line);
}
}
if report.range.truncated_before || report.range.truncated_after {
println!("expand: {}", report.expand.file);
}
} else {
println!(
"Source window `{}` lines {}-{} of {} ({})",
report.file,
report.range.start,
report.range.end,
report.range.total_lines,
report.handle
);
for line in &report.preview {
println!("{:>5} | {}", line.line, line.text);
}
if !report.symbols.is_empty() {
println!();
println!("Symbol refs:");
for symbol in &report.symbols {
println!(
" {} `{}` {}:{} — {}",
symbol.handle, symbol.name, symbol.file, symbol.line, symbol.expand
);
}
}
if !report.summaries.is_empty() {
println!();
println!("Summary refs:");
for summary in &report.summaries {
println!(
" {} `{}` — {}",
summary.handle, summary.symbol_name, summary.expand
);
}
}
if report.range.truncated_before || report.range.truncated_after {
println!();
println!("Expand:");
if let Some(before) = &report.expand.before {
println!(" before: {}", before);
}
if let Some(after) = &report.expand.after {
println!(" after: {}", after);
}
println!(" file: {}", report.expand.file);
}
for warning in &report.warnings {
eprintln!("warning: {warning}");
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
#[derive(Serialize)]
struct ExplainBudgetDefinitionPreview {
handle: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
kind: String,
name: String,
file: String,
line: i64,
expand: String,
}
#[derive(Serialize)]
struct ExplainBudgetEdgePreview {
handle: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
name: String,
file: String,
line: i64,
expand: String,
}
#[derive(Serialize)]
struct ExplainBudgetCommunityPreview {
size: usize,
members: Vec<String>,
}
#[derive(Serialize)]
struct ExplainBudgetReport {
symbol: String,
max_items: usize,
max_bytes: usize,
definition_total: usize,
callers_total: usize,
callers_truncated_by_limit: bool,
callees_total: usize,
callees_truncated_by_limit: bool,
truncated: bool,
definitions: Vec<ExplainBudgetDefinitionPreview>,
callers: Vec<ExplainBudgetEdgePreview>,
callees: Vec<ExplainBudgetEdgePreview>,
#[serde(skip_serializing_if = "Option::is_none")]
community: Option<ExplainBudgetCommunityPreview>,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn build_explain_budget_report(
symbol: &str,
_root: &Path,
symbols: &[index::StoredSymbol],
callers: &[index::StoredEdge],
callers_total: usize,
callers_truncated_by_limit: bool,
callees: &[index::StoredEdge],
callees_total: usize,
callees_truncated_by_limit: bool,
community: Option<&graph::Community>,
budget: ResponseBudget,
) -> ExplainBudgetReport {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let definitions = symbols
.iter()
.take(max_items)
.map(|entry| {
let symbol_ref = build_compact_symbol_ref(
"edef",
&format!(
"{}:{}:{}:{}",
entry.kind, entry.name, entry.file, entry.line
),
&entry.name,
entry.tags.as_deref(),
max_bytes,
);
ExplainBudgetDefinitionPreview {
handle: symbol_ref.handle,
tag_alias: symbol_ref.tag_alias,
kind: entry.kind.clone(),
name: symbol_ref.name,
file: truncate_for_budget(&entry.file, max_bytes),
line: entry.line,
expand: format!(
"tsift search {} --exact --path {} --limit 20",
shell_quote(&entry.name),
shell_quote(&entry.file)
),
}
})
.collect();
let callers_preview: Vec<ExplainBudgetEdgePreview> = callers
.iter()
.take(max_items)
.map(|entry| {
let symbol_ref = build_compact_symbol_ref(
"ecall",
&format!(
"{}:{}:{}:{}",
entry.caller_name, entry.caller_file, entry.call_site_line, symbol
),
&entry.caller_name,
None,
max_bytes,
);
ExplainBudgetEdgePreview {
handle: symbol_ref.handle,
tag_alias: symbol_ref.tag_alias,
name: symbol_ref.name,
file: truncate_for_budget(&entry.caller_file, max_bytes),
line: entry.call_site_line,
expand: format!(
"tsift explain {} --path {} --limit 0",
shell_quote(&entry.caller_name),
shell_quote(&entry.caller_file)
),
}
})
.collect();
let callees_preview: Vec<ExplainBudgetEdgePreview> = callees
.iter()
.take(max_items)
.map(|entry| {
let symbol_ref = build_compact_symbol_ref(
"eces",
&format!(
"{}:{}:{}:{}",
entry.callee_name, entry.caller_file, entry.call_site_line, symbol
),
&entry.callee_name,
None,
max_bytes,
);
ExplainBudgetEdgePreview {
handle: symbol_ref.handle,
tag_alias: symbol_ref.tag_alias,
name: symbol_ref.name,
file: truncate_for_budget(&entry.caller_file, max_bytes),
line: entry.call_site_line,
expand: format!(
"tsift explain {} --path {} --limit 0",
shell_quote(&entry.callee_name),
shell_quote(&entry.caller_file)
),
}
})
.collect();
let community_preview = community.map(|entry| ExplainBudgetCommunityPreview {
size: entry.members.len(),
members: entry
.members
.iter()
.take(max_items)
.map(|member| truncate_for_budget(&member.name, max_bytes))
.collect(),
});
ExplainBudgetReport {
symbol: symbol.to_string(),
max_items,
max_bytes,
definition_total: symbols.len(),
callers_total,
callers_truncated_by_limit,
callees_total,
callees_truncated_by_limit,
truncated: symbols.len() > max_items
|| callers_total > callers_preview.len()
|| callees_total > callees_preview.len()
|| community
.map(|entry| entry.members.len() > max_items)
.unwrap_or(false),
definitions,
callers: callers_preview,
callees: callees_preview,
community: community_preview,
}
}
pub(crate) fn print_explain_budget_human(report: &ExplainBudgetReport) {
println!(
"explain-budget sym:{} defs:{}/{} crs:{}/{} ces:{}/{}",
shell_quote(&report.symbol),
report.definitions.len(),
report.definition_total,
report.callers.len(),
report.callers_total,
report.callees.len(),
report.callees_total
);
for entry in &report.definitions {
println!(
"def {} {} {}:{} expand:{}",
format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
entry.kind,
entry.file,
entry.line,
entry.expand
);
}
for entry in &report.callers {
println!(
"caller {} {}:{} expand:{}",
format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
entry.file,
entry.line,
entry.expand
);
}
for entry in &report.callees {
println!(
"callee {} {}:{} expand:{}",
format_symbol_preview_line(&entry.handle, &entry.name, entry.tag_alias.as_deref()),
entry.file,
entry.line,
entry.expand
);
}
if let Some(community) = &report.community {
println!(
"community size:{} members:{}",
community.size,
community.members.join(", ")
);
}
if report.truncated {
println!(
"budget truncated items:{} bytes:{}",
report.max_items, report.max_bytes
);
}
}
/// Reconcile the tsift symbol index against the tagpath `.naming/index.json`
/// source set and report files covered by one but not the other.
///
/// Today silent recall loss happens when tagpath's `[exclude]` / `extends`
/// chain or its hard-coded `SKIP_DIRS` skip files or languages that tsift
/// still indexes — the tsift symbols in those files cannot resolve a
/// `tagpath_handle` even with a fresh tagpath index. This audit surfaces
/// the diff so operators can decide whether to broaden the tagpath walk,
/// add an `[exclude]` to tsift, or accept the gap.
const TAGPATH_AUDIT_SKIP_DIRS: &[&str] = &[
".git",
"node_modules",
"target",
"__pycache__",
".venv",
"vendor",
];
const TAGPATH_AUDIT_SOURCE_EXTENSIONS: &[&str] = &[
"rs", "py", "ts", "js", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
"scala", "zig", "nim", "ex", "exs", "erl", "hs", "ml", "clj", "r", "lua", "php", "pl", "d",
"cr", "dart", "jl", "v", "odin", "gleam", "rkt", "scm", "lisp", "lsp", "f", "fs", "fsi", "fsx",
"sh", "bash", "zsh", "sql", "css", "tsx",
];
pub(crate) fn tagpath_audit_supported_extensions(root: &Path) -> BTreeSet<String> {
let mut extensions = TAGPATH_AUDIT_SOURCE_EXTENSIONS
.iter()
.map(|ext| (*ext).to_string())
.collect::<BTreeSet<_>>();
let config_path = root.join(".naming.toml");
if !config_path.exists() {
return extensions;
}
match tagpath::config::resolve(&config_path) {
Ok(config) => {
if let Some(grammars) = config.grammars {
for grammar in grammars.languages.values() {
for ext in &grammar.extensions {
if let Some(normalized) = normalize_extension(ext) {
extensions.insert(normalized);
}
}
}
}
}
Err(err) => {
eprintln!("tagpath_policy_hint_config_unreadable: {err}");
}
}
extensions
}
pub(crate) fn tagpath_audit_policy_hints(
rel_path: &str,
supported_extensions: &BTreeSet<String>,
) -> Vec<String> {
let path = Path::new(rel_path);
let mut hints = BTreeSet::new();
if let Some(parent) = path.parent() {
for component in parent.components() {
if let std::path::Component::Normal(name) = component {
let name = name.to_string_lossy();
if TAGPATH_AUDIT_SKIP_DIRS.contains(&name.as_ref()) {
hints.insert(format!("skip_dir:{name}"));
}
}
}
}
if path
.extension()
.and_then(|ext| ext.to_str())
.and_then(normalize_extension)
.is_some_and(|ext| !supported_extensions.contains(&ext))
{
hints.insert("extension_unsupported".to_string());
}
hints.into_iter().collect()
}
fn normalize_extension(ext: &str) -> Option<String> {
let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
if normalized.is_empty() {
None
} else {
Some(normalized)
}
}
pub(crate) fn diff_digest_status_label(status: diff_digest::DiffDigestFileStatus) -> &'static str {
match status {
diff_digest::DiffDigestFileStatus::Added => "added",
diff_digest::DiffDigestFileStatus::Modified => "modified",
diff_digest::DiffDigestFileStatus::Deleted => "deleted",
}
}
pub(crate) fn diff_digest_summary_label(
state: diff_digest::DiffDigestSummaryState,
) -> &'static str {
match state {
diff_digest::DiffDigestSummaryState::Current => "current",
diff_digest::DiffDigestSummaryState::Stale => "stale",
diff_digest::DiffDigestSummaryState::Missing => "missing",
diff_digest::DiffDigestSummaryState::Unavailable => "unavailable",
}
}
fn test_digest_summary_label(state: test_digest::TestDigestSummaryState) -> &'static str {
match state {
test_digest::TestDigestSummaryState::Current => "current",
test_digest::TestDigestSummaryState::Stale => "stale",
test_digest::TestDigestSummaryState::Missing => "missing",
test_digest::TestDigestSummaryState::Unavailable => "unavailable",
}
}
fn log_digest_summary_label(state: log_digest::LogDigestSummaryState) -> &'static str {
match state {
log_digest::LogDigestSummaryState::Current => "current",
log_digest::LogDigestSummaryState::Stale => "stale",
log_digest::LogDigestSummaryState::Missing => "missing",
log_digest::LogDigestSummaryState::Unavailable => "unavailable",
}
}
pub(crate) fn diff_digest_mode_label(mode: diff_digest::DiffDigestMode) -> &'static str {
match mode {
diff_digest::DiffDigestMode::WorkingTree => "worktree",
diff_digest::DiffDigestMode::Cached => "cached",
diff_digest::DiffDigestMode::Revision => "revision",
}
}
pub(crate) fn diff_digest_mode_display(report: &diff_digest::DiffDigestReport) -> String {
match (&report.mode, &report.revision) {
(diff_digest::DiffDigestMode::WorkingTree, _) => "working tree".to_string(),
(diff_digest::DiffDigestMode::Cached, _) => "staged index".to_string(),
(diff_digest::DiffDigestMode::Revision, Some(revision)) => {
format!("revision {revision}")
}
(diff_digest::DiffDigestMode::Revision, None) => "revision".to_string(),
}
}
pub(crate) fn diff_digest_empty_message(report: &diff_digest::DiffDigestReport) -> String {
match (&report.mode, &report.revision) {
(diff_digest::DiffDigestMode::WorkingTree, _) => "No git changes found.".to_string(),
(diff_digest::DiffDigestMode::Cached, _) => "No staged git changes found.".to_string(),
(diff_digest::DiffDigestMode::Revision, Some(revision)) => {
format!("No diff found for revision {revision}.")
}
(diff_digest::DiffDigestMode::Revision, None) => "No revision diff found.".to_string(),
}
}
fn cmd_impact(
path: &Path,
cached: bool,
revision: Option<&str>,
scope: Option<&str>,
limit: usize,
format: OutputFormat,
) -> Result<()> {
let report = impact::compute(
path,
impact::ImpactOptions {
cached,
revision,
scope,
limit,
},
)?;
if format.json_output {
println!(
"{}",
to_json_schema(&report, format.pretty, format.terse, format.schema)?
);
return Ok(());
}
if format.compact {
println!(
"impact mode:{} changed:{} symbols:{} tests:{}/{}",
diff_digest_mode_label(report.mode),
report.changed_files.len(),
report.changed_symbols.len(),
report.affected_tests.len(),
report.affected_tests_total
);
for target in &report.affected_tests {
println!(
"{} reasons:{} command:{}",
target.path,
target.reasons.len(),
target.commands.join(" && ")
);
}
for warning in &report.warnings {
println!("warning {warning}");
}
return Ok(());
}
println!("Impact ({})", diff_digest_mode_label(report.mode));
println!(" changed files: {}", report.changed_files.len());
println!(" changed symbols: {}", report.changed_symbols.len());
println!(
" affected tests: {}/{}",
report.affected_tests.len(),
report.affected_tests_total
);
for target in &report.affected_tests {
println!();
println!("{}", target.path);
for reason in &target.reasons {
println!(" - {reason}");
}
if !target.symbols.is_empty() {
println!(" symbols: {}", target.symbols.join(", "));
}
for command in &target.commands {
println!(" run: {}", command);
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
pub(crate) fn render_test_digest_from_input(
path: &Path,
input: &str,
runner: Option<&str>,
format: OutputFormat,
) -> Result<()> {
let report = test_digest::compute(path, input, runner)?;
if format.json_output {
println!(
"{}",
to_json_schema(&report, format.pretty, format.terse, format.schema)?
);
return Ok(());
}
if report.failure_groups.is_empty() {
println!("No failures detected (runner: {}).", report.runner);
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
if format.compact {
println!(
"test runner:{} failures:{} groups:{} passed:{} failed:{} skipped:{}",
report.runner,
report.failures,
report.grouped_failures,
report.counts.passed.unwrap_or(0),
report.counts.failed.unwrap_or(report.grouped_failures),
report.counts.skipped.unwrap_or(0),
);
for failure in &report.failure_groups {
let tests = truncate_for_compact(&failure.tests.join(","), 60);
let location = match (&failure.path, failure.line) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => "-".to_string(),
};
println!(
"{} tests:{} count:{} summaries:{} msg:{}",
location,
tests,
failure.occurrences,
test_digest_summary_label(failure.summary_state),
truncate_for_compact(&failure.message, 80)
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
println!("Test digest ({})", report.runner);
println!(" failures: {}", report.failures);
println!(" failure groups: {}", report.grouped_failures);
if let Some(passed) = report.counts.passed {
println!(" passed: {}", passed);
}
if let Some(failed) = report.counts.failed {
println!(" failed: {}", failed);
}
if let Some(skipped) = report.counts.skipped {
println!(" skipped: {}", skipped);
}
for failure in &report.failure_groups {
println!();
match (&failure.path, failure.line, failure.column) {
(Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
(Some(path), Some(line), None) => println!("{path}:{line}"),
(Some(path), None, _) => println!("{path}"),
(None, _, _) => println!("(no file anchor)"),
}
println!(" tests: {}", failure.tests.join(", "));
println!(" occurrences: {}", failure.occurrences);
println!(" message: {}", failure.message);
println!(
" cached summaries: {}",
test_digest_summary_label(failure.summary_state)
);
for summary in &failure.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
#[serde(rename_all = "snake_case")]
enum ConflictMatrixRisk {
Low,
Medium,
High,
FailClosed,
}
#[derive(Clone, Debug, Default, Serialize)]
struct ConflictMatrixOverlap {
files: Vec<String>,
symbols: Vec<String>,
tests: Vec<String>,
config_files: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixSourceHandle {
handle: String,
file: String,
start: usize,
end: usize,
reason: String,
expand: String,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixSemanticRef {
handle: String,
kind: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
source_file: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
source_symbol: Option<String>,
expand: String,
}
#[derive(Clone, Debug, Default, Serialize)]
struct ConflictMatrixTokenBudget {
prompt_estimated_tokens: usize,
max_prompt_tokens: usize,
source_window_count: usize,
source_window_lines: usize,
max_context_bytes: usize,
}
#[derive(Clone, Debug, Default, Serialize)]
struct ConflictMatrixRequiredContext {
read_only_files: Vec<String>,
source_handles: Vec<String>,
worker_context_handles: Vec<String>,
semantic_handles: Vec<String>,
expansion_commands: Vec<String>,
}
#[derive(Clone, Debug, Default, Serialize)]
struct ConflictMatrixGraphHandles {
target_node_id: String,
evidence_packet_id: String,
worker_prompt_packet_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
projection_hash: Option<String>,
source_handles: Vec<String>,
worker_context_handles: Vec<String>,
semantic_handles: Vec<String>,
}
#[derive(Clone, Debug, Default, Serialize)]
struct ConflictMatrixWorkerFeedback {
total: usize,
completed: usize,
blocked: usize,
touched_files: Vec<String>,
expected_tests: Vec<String>,
follow_up_ids: Vec<String>,
outcome_history: Vec<String>,
repeated_blockage: bool,
stale_expected_tests: Vec<String>,
follow_up_debt: Vec<String>,
closure_rank_score: usize,
closure_rank_reasons: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixOwnershipBlock {
contract_version: &'static str,
title: String,
owned_files: Vec<String>,
owned_symbols: Vec<String>,
read_only_context: Vec<String>,
read_only_files: Vec<String>,
forbidden_files: Vec<String>,
expected_tests: Vec<String>,
expansion_commands: Vec<String>,
token_budget: ConflictMatrixTokenBudget,
prompt: String,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixWorkerPromptPacket {
contract_version: &'static str,
packet_id: String,
target: String,
rank: usize,
risk: ConflictMatrixRisk,
previously_completed: bool,
parallel_safe: bool,
blocks: Vec<String>,
blocked_by: Vec<String>,
required_context: ConflictMatrixRequiredContext,
graph_handles: ConflictMatrixGraphHandles,
#[serde(skip_serializing_if = "Option::is_none")]
projection_hash: Option<String>,
title: String,
owned_files: Vec<String>,
owned_symbols: Vec<String>,
read_only_context: Vec<String>,
forbidden_files: Vec<String>,
expected_tests: Vec<String>,
expansion_commands: Vec<String>,
token_budget: ConflictMatrixTokenBudget,
semantic_dispatch_score: usize,
semantic_dispatch_reasons: Vec<String>,
worker_feedback: ConflictMatrixWorkerFeedback,
prompt: String,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixCandidate {
rank: usize,
target: String,
evidence_packet_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
projection_hash: Option<String>,
target_node_id: String,
target_kind: String,
target_label: String,
risk: ConflictMatrixRisk,
previously_completed: bool,
parallel_safe: bool,
blocks: Vec<String>,
blocked_by: Vec<String>,
required_context: ConflictMatrixRequiredContext,
graph_handles: ConflictMatrixGraphHandles,
risk_score: usize,
risk_reasons: Vec<String>,
owned_files: Vec<String>,
owned_symbols: Vec<String>,
config_files: Vec<String>,
affected_tests: Vec<String>,
worker_context: Vec<String>,
semantic_related: Vec<ConflictMatrixSemanticRef>,
semantic_dispatch_score: usize,
semantic_dispatch_reasons: Vec<String>,
worker_feedback: ConflictMatrixWorkerFeedback,
source_handles: Vec<ConflictMatrixSourceHandle>,
worker_context_handles: Vec<String>,
staged_overlap: ConflictMatrixOverlap,
ownership: ConflictMatrixOwnershipBlock,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixPair {
left: String,
right: String,
risk: ConflictMatrixRisk,
risk_score: usize,
shared_files: Vec<String>,
shared_symbols: Vec<String>,
shared_tests: Vec<String>,
shared_config_files: Vec<String>,
verdict: String,
}
#[derive(Serialize)]
struct ConflictMatrixInputSummary {
graph_db_evidence_targets: Vec<String>,
evidence_packets: Vec<ConflictMatrixEvidencePacketSummary>,
shared_preparation: ConflictMatrixSharedPreparationSummary,
preparation_cache: ConflictMatrixPreparationCacheSummary,
preparation_timings: Vec<GraphDbBackendEvalPhaseTiming>,
context_pack_command: String,
cached_diff_command: String,
impact_command: String,
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixPreparedSourceWindow {
file: String,
start: usize,
end: usize,
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixPreparedContext {
target: String,
target_kind: String,
status_reminders: Vec<String>,
prompt_targets: Vec<String>,
touched_files: Vec<String>,
touched_symbols: Vec<String>,
files_changed: usize,
worker_context: Vec<String>,
source_windows: Vec<ConflictMatrixPreparedSourceWindow>,
}
impl ConflictMatrixPreparedContext {
fn from_context_pack(context_pack: &ContextPackReport) -> Self {
Self {
target: context_pack.target.clone(),
target_kind: context_pack.target_kind.clone(),
status_reminders: context_pack.status_reminders.clone(),
prompt_targets: context_pack.next_context.prompt_targets.clone(),
touched_files: context_pack.next_context.touched_files.clone(),
touched_symbols: context_pack.next_context.touched_symbols.clone(),
files_changed: context_pack.diff_digest.files_changed,
worker_context: context_pack
.exploration
.worker_context
.iter()
.map(|worker| worker.summary.clone())
.collect(),
source_windows: context_pack
.exploration
.source_windows
.iter()
.map(|window| ConflictMatrixPreparedSourceWindow {
file: window.file.clone(),
start: window.start,
end: window.end,
})
.collect(),
}
}
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixEvidencePacketSummary {
target: String,
packet_id: String,
target_node_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
projection_hash: Option<String>,
replay_command: String,
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixSharedPreparationSummary {
evidence_cache_status: String,
graph_nodes: usize,
graph_edges: usize,
evidence_packets: usize,
source_handles: usize,
worker_context: usize,
worker_results: usize,
semantic_rows: usize,
dispatch_trace_snapshot_nodes: usize,
dispatch_trace_snapshot_edges: usize,
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixPreparationCacheSummary {
version: String,
key: String,
status: String,
source_watermark: String,
document_watermark: String,
staged_diff_watermark: String,
}
#[derive(Serialize)]
struct ConflictMatrixContextSummary {
target: String,
target_kind: String,
prompt_targets: Vec<String>,
touched_files: Vec<String>,
touched_symbols: Vec<String>,
files_changed: usize,
worker_context: Vec<String>,
source_windows: Vec<String>,
status_reminders: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct ConflictMatrixPerTargetFailClosed {
target: String,
previously_completed: bool,
risk_reasons: Vec<String>,
owned_files: Vec<String>,
source_handle_count: usize,
}
#[derive(Serialize)]
struct ConflictMatrixOrchestrationObservability {
contract_version: &'static str,
projection_freshness: GraphDbFreshnessReport,
projection_hashes: Vec<String>,
evidence_packet_ids: Vec<String>,
conflict_matrix_decisions: Vec<String>,
worker_ownership_blocks: Vec<String>,
follow_up_commands: Vec<String>,
}
#[derive(Serialize)]
struct ConflictMatrixReport {
contract_version: &'static str,
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
targets: Vec<String>,
can_parallel: bool,
fail_closed: bool,
cross_target_parallel_safe: bool,
per_target_fail_closed: Vec<ConflictMatrixPerTargetFailClosed>,
inputs: ConflictMatrixInputSummary,
context_pack: ConflictMatrixContextSummary,
cached_diff: diff_digest::DiffDigestReport,
impact: impact::ImpactReport,
candidates: Vec<ConflictMatrixCandidate>,
worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
conflicts: Vec<ConflictMatrixPair>,
orchestration: ConflictMatrixOrchestrationObservability,
next_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
fn conflict_risk_label(risk: ConflictMatrixRisk) -> &'static str {
match risk {
ConflictMatrixRisk::Low => "low",
ConflictMatrixRisk::Medium => "medium",
ConflictMatrixRisk::High => "high",
ConflictMatrixRisk::FailClosed => "fail_closed",
}
}
fn sorted_set(values: &BTreeSet<String>) -> Vec<String> {
values.iter().cloned().collect()
}
fn sorted_intersection(left: &BTreeSet<String>, right: &BTreeSet<String>) -> Vec<String> {
left.intersection(right).cloned().collect()
}
fn normalize_conflict_target(raw: &str) -> Option<String> {
let trimmed = raw
.trim()
.trim_matches(|ch: char| matches!(ch, '`' | ',' | ';' | '.'));
let bracketed = trimmed
.strip_prefix("[#")
.and_then(|value| value.strip_suffix(']'))
.unwrap_or(trimmed);
let normalized = bracketed
.trim()
.trim_start_matches('#')
.trim_matches(|ch: char| matches!(ch, '[' | ']'));
(!normalized.is_empty()).then(|| normalized.to_string())
}
fn extract_conflict_target_refs(input: &str) -> Vec<String> {
input
.split(|ch: char| {
!(ch.is_ascii_alphanumeric()
|| ch == '#'
|| ch == '_'
|| ch == '-'
|| ch == '['
|| ch == ']')
})
.filter_map(|token| {
let hash = token.find('#')?;
normalize_conflict_target(&token[hash..])
})
.collect()
}
fn conflict_targets_from_context_pack(
store: &impl GraphStore,
context_pack: &ConflictMatrixPreparedContext,
) -> Result<Vec<String>> {
let mut candidates = Vec::new();
for prompt in &context_pack.prompt_targets {
candidates.extend(extract_conflict_target_refs(prompt));
}
for worker in &context_pack.worker_context {
candidates.extend(extract_conflict_target_refs(worker));
}
let mut targets = Vec::new();
let mut seen = BTreeSet::new();
for candidate in candidates {
if !seen.insert(candidate.clone()) {
continue;
}
if graph_db_resolve_evidence_target(store, &candidate)?.is_some() {
targets.push(candidate);
}
}
Ok(targets)
}
fn resolve_conflict_matrix_targets(
store: &impl GraphStore,
raw_targets: &[String],
context_pack: &ConflictMatrixPreparedContext,
) -> Result<Vec<String>> {
let mut targets = raw_targets
.iter()
.filter_map(|target| normalize_conflict_target(target))
.collect::<Vec<_>>();
if targets.is_empty() {
targets = conflict_targets_from_context_pack(store, context_pack)?;
}
let mut seen = BTreeSet::new();
targets.retain(|target| seen.insert(target.clone()));
if targets.is_empty() {
bail!(
"conflict-matrix needs at least one resolvable backlog id, job handle, or graph node id"
);
}
Ok(targets)
}
fn is_planner_config_path(path: &str) -> bool {
resolution::is_planner_config_path(path)
}
fn conflict_matrix_source_handle(node: &SubstrateGraphNode) -> Option<ConflictMatrixSourceHandle> {
let file = node.properties.get("file")?.clone();
let start = node
.properties
.get("start")
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(1);
let end = node
.properties
.get("end")
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(start);
Some(ConflictMatrixSourceHandle {
handle: node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone()),
file,
start,
end,
reason: node.properties.get("reason").cloned().unwrap_or_default(),
expand: node.properties.get("expand").cloned().unwrap_or_default(),
})
}
fn conflict_matrix_semantic_ref(
root: &Path,
node: &SubstrateGraphNode,
) -> ConflictMatrixSemanticRef {
ConflictMatrixSemanticRef {
handle: node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone()),
kind: node.kind.clone(),
label: node.label.clone(),
source_file: node
.properties
.get("source_file")
.or_else(|| node.properties.get("path"))
.cloned(),
source_symbol: node.properties.get("source_symbol").cloned(),
expand: node
.properties
.get("expand")
.cloned()
.unwrap_or_else(|| traversal_expand_command(root, &node.id)),
}
}
#[derive(Clone)]
struct ConflictMatrixGraphIndex {
symbols_by_file: BTreeMap<String, Vec<String>>,
}
fn conflict_matrix_graph_index(graph_nodes: &[SubstrateGraphNode]) -> ConflictMatrixGraphIndex {
let mut symbols_by_file = BTreeMap::<String, Vec<String>>::new();
for node in graph_nodes {
if node.kind != "symbol" {
continue;
}
if let Some(path) = node.properties.get("path") {
symbols_by_file
.entry(path.clone())
.or_default()
.push(node.label.clone());
}
}
for symbols in symbols_by_file.values_mut() {
symbols.sort();
symbols.dedup();
}
ConflictMatrixGraphIndex { symbols_by_file }
}
fn conflict_matrix_symbols_for_files(
graph_index: &ConflictMatrixGraphIndex,
files: &BTreeSet<String>,
target_node: &SubstrateGraphNode,
) -> BTreeSet<String> {
let mut symbols = BTreeSet::new();
if target_node.kind == "symbol" {
symbols.insert(target_node.label.clone());
}
for file in files {
if let Some(file_symbols) = graph_index.symbols_by_file.get(file) {
symbols.extend(file_symbols.iter().cloned());
}
}
symbols
}
fn conflict_matrix_test_commands(target: &impact::ImpactTestTarget) -> Vec<String> {
if target.commands.is_empty() {
vec![target.path.clone()]
} else {
target.commands.clone()
}
}
fn conflict_matrix_affected_tests(
impact_report: &impact::ImpactReport,
files: &BTreeSet<String>,
symbols: &BTreeSet<String>,
staged_overlap: &ConflictMatrixOverlap,
) -> Vec<String> {
let mut tests = BTreeSet::new();
for target in &impact_report.affected_tests {
let path_match = files.contains(&target.path);
let symbol_match = target.symbols.iter().any(|symbol| symbols.contains(symbol));
if path_match || symbol_match {
tests.extend(conflict_matrix_test_commands(target));
}
}
if tests.is_empty()
&& (!staged_overlap.files.is_empty()
|| !staged_overlap.symbols.is_empty()
|| !staged_overlap.config_files.is_empty())
{
for target in &impact_report.affected_tests {
tests.extend(conflict_matrix_test_commands(target));
}
}
tests.into_iter().collect()
}
fn conflict_matrix_semantic_dispatch_score(
semantic_related: &[ConflictMatrixSemanticRef],
files: &BTreeSet<String>,
symbols: &BTreeSet<String>,
) -> (usize, Vec<String>) {
let mut score = 0usize;
let mut reasons = Vec::new();
for semantic in semantic_related {
let base = match semantic.kind.as_str() {
"semantic_concept" => 8,
"semantic_entity" => 6,
_ => 3,
};
let mut points = base;
let mut detail = vec![format!("{} {}", semantic.kind, semantic.label)];
if semantic
.source_file
.as_ref()
.is_some_and(|file| files.contains(file))
{
points += 4;
detail.push("owned file".to_string());
}
if semantic
.source_symbol
.as_ref()
.is_some_and(|symbol| symbols.contains(symbol))
{
points += 2;
detail.push("owned symbol".to_string());
}
score += points;
reasons.push(format!("+{points} {}", detail.join(" / ")));
}
(score, reasons)
}
fn conflict_matrix_staged_overlap(
files: &BTreeSet<String>,
symbols: &BTreeSet<String>,
cached_diff: &diff_digest::DiffDigestReport,
) -> ConflictMatrixOverlap {
let staged_files = cached_diff
.files
.iter()
.map(|file| file.path.clone())
.collect::<BTreeSet<_>>();
let staged_symbols = cached_diff
.files
.iter()
.flat_map(|file| file.touched_symbols.iter().cloned())
.collect::<BTreeSet<_>>();
let file_overlap = sorted_intersection(files, &staged_files);
let symbol_overlap = sorted_intersection(symbols, &staged_symbols);
let config_files = file_overlap
.iter()
.filter(|file| is_planner_config_path(file))
.cloned()
.collect::<Vec<_>>();
ConflictMatrixOverlap {
files: file_overlap,
symbols: symbol_overlap,
tests: Vec::new(),
config_files,
}
}
fn graph_node_list_property(node: &SubstrateGraphNode, key: &str) -> Vec<String> {
node.properties
.get(key)
.map(|value| {
value
.split([',', ';'])
.flat_map(|part| part.split("&&"))
.map(str::trim)
.filter(|part| !part.is_empty())
.map(str::to_string)
.collect()
})
.unwrap_or_default()
}
fn conflict_matrix_worker_feedback(
worker_results: &[SubstrateGraphNode],
) -> ConflictMatrixWorkerFeedback {
let mut touched_files = BTreeSet::new();
let mut expected_tests = BTreeSet::new();
let mut follow_up_ids = BTreeSet::new();
let mut outcome_history = Vec::new();
let mut completed = 0usize;
let mut blocked = 0usize;
let mut results = worker_results.iter().collect::<Vec<_>>();
results.sort_by(|left, right| {
left.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok())
.cmp(
&right
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
)
.then(left.id.cmp(&right.id))
});
for node in results {
let status = node
.properties
.get("status")
.map(String::as_str)
.unwrap_or("unknown");
match status {
"completed" => completed += 1,
"blocked" => blocked += 1,
_ => {}
}
touched_files.extend(graph_node_list_property(node, "touched_files"));
expected_tests.extend(graph_node_list_property(node, "expected_tests"));
follow_up_ids.extend(graph_node_list_property(node, "follow_up_ids"));
let location = match (node.properties.get("path"), node.properties.get("line")) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => node.id.clone(),
};
let detail = node
.properties
.get("detail")
.cloned()
.unwrap_or_else(|| node.label.clone());
outcome_history.push(format!("{status} at {location}: {detail}"));
}
let repeated_blockage = blocked > 1;
let warnings = if repeated_blockage {
vec![format!(
"repeated blockage observed in {blocked} worker_result rows; inspect outcome_history before redispatch"
)]
} else {
Vec::new()
};
ConflictMatrixWorkerFeedback {
total: worker_results.len(),
completed,
blocked,
touched_files: touched_files.into_iter().collect(),
expected_tests: expected_tests.into_iter().collect(),
follow_up_ids: follow_up_ids.into_iter().collect(),
outcome_history,
repeated_blockage,
stale_expected_tests: Vec::new(),
follow_up_debt: Vec::new(),
closure_rank_score: 0,
closure_rank_reasons: Vec::new(),
warnings,
}
}
fn feedback_ref_list(values: &[String]) -> String {
if values.is_empty() {
"none".to_string()
} else {
values.join(",")
}
}
fn stale_expected_tests_for_candidate(candidate: &ConflictMatrixCandidate) -> Vec<String> {
if candidate.worker_feedback.expected_tests.is_empty() {
return Vec::new();
}
let current_tests = candidate
.affected_tests
.iter()
.cloned()
.collect::<BTreeSet<_>>();
if current_tests.is_empty() {
return candidate.worker_feedback.expected_tests.clone();
}
candidate
.worker_feedback
.expected_tests
.iter()
.filter(|test| !current_tests.contains(*test))
.cloned()
.collect()
}
fn apply_conflict_matrix_worker_feedback_controls(candidates: &mut [ConflictMatrixCandidate]) {
for candidate in candidates.iter_mut() {
let stale_expected_tests = stale_expected_tests_for_candidate(candidate);
let follow_up_debt = candidate.worker_feedback.follow_up_ids.clone();
let mut score = 0usize;
let mut reasons = Vec::new();
if candidate.worker_feedback.repeated_blockage {
score += candidate.worker_feedback.blocked.saturating_mul(40);
reasons.push(format!(
"repeated blockage: {} blocked worker_result rows",
candidate.worker_feedback.blocked
));
}
if !stale_expected_tests.is_empty() {
score += stale_expected_tests.len().saturating_mul(25);
let reason = if candidate.affected_tests.is_empty() {
format!(
"stale expected tests: {} no longer match current impact output",
feedback_ref_list(&stale_expected_tests)
)
} else {
format!(
"stale expected tests: {} not in current impacted tests {}",
feedback_ref_list(&stale_expected_tests),
feedback_ref_list(&candidate.affected_tests)
)
};
reasons.push(reason.clone());
candidate.worker_feedback.warnings.push(format!(
"{reason}; refresh impact or rerun the listed tests before redispatch"
));
}
if !follow_up_debt.is_empty() {
score += follow_up_debt.len().saturating_mul(10);
let reason = format!("follow-up debt: {}", feedback_ref_list(&follow_up_debt));
reasons.push(reason.clone());
candidate.worker_feedback.warnings.push(format!(
"{reason}; include or resolve the referenced backlog ids before closing dispatch"
));
}
candidate.worker_feedback.stale_expected_tests = stale_expected_tests;
candidate.worker_feedback.follow_up_debt = follow_up_debt;
candidate.worker_feedback.closure_rank_score = score;
candidate.worker_feedback.closure_rank_reasons = reasons;
candidate.worker_feedback.warnings =
dedupe_preserve_order(std::mem::take(&mut candidate.worker_feedback.warnings));
}
}
fn empty_conflict_matrix_ownership(target: &str) -> ConflictMatrixOwnershipBlock {
ConflictMatrixOwnershipBlock {
contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
title: format!("Worker ownership for {target}"),
owned_files: Vec::new(),
owned_symbols: Vec::new(),
read_only_context: Vec::new(),
read_only_files: Vec::new(),
forbidden_files: Vec::new(),
expected_tests: Vec::new(),
expansion_commands: Vec::new(),
token_budget: ConflictMatrixTokenBudget::default(),
prompt: String::new(),
}
}
fn conflict_matrix_candidate_from_evidence(
root: &Path,
evidence: &GraphDbEvidenceReport,
graph_index: &ConflictMatrixGraphIndex,
cached_diff: &diff_digest::DiffDigestReport,
impact_report: &impact::ImpactReport,
) -> ConflictMatrixCandidate {
let mut files = BTreeSet::new();
let source_handles = evidence
.source_handles
.iter()
.filter_map(|node| {
let handle = conflict_matrix_source_handle(node)?;
files.insert(handle.file.clone());
Some(handle)
})
.collect::<Vec<_>>();
if matches!(
evidence.target_node.kind.as_str(),
"file" | "symbol" | "route"
) && let Some(path) = evidence.target_node.properties.get("path")
{
files.insert(path.clone());
}
let symbols = conflict_matrix_symbols_for_files(graph_index, &files, &evidence.target_node);
let config_files = files
.iter()
.filter(|file| is_planner_config_path(file))
.cloned()
.collect::<BTreeSet<_>>();
let mut staged_overlap = conflict_matrix_staged_overlap(&files, &symbols, cached_diff);
let affected_tests =
conflict_matrix_affected_tests(impact_report, &files, &symbols, &staged_overlap);
staged_overlap.tests = affected_tests.clone();
let mut worker_feedback = conflict_matrix_worker_feedback(&evidence.worker_results);
let previously_completed = worker_feedback.completed > 0;
let mut risk_score = 0usize;
let mut risk_reasons = Vec::new();
if files.is_empty() && previously_completed {
worker_feedback.warnings.push(format!(
"previously completed: {} completed worker_result row(s) exist without source ownership evidence; treating no-owned-files as informational instead of per-target fail-closed",
worker_feedback.completed
));
} else if files.is_empty() {
risk_score += 120;
risk_reasons.push("no source ownership evidence; fail closed before dispatch".to_string());
}
if !config_files.is_empty() {
risk_score += 80 * config_files.len();
risk_reasons.push("candidate owns config or workflow files".to_string());
}
if !staged_overlap.config_files.is_empty() {
risk_score += 100 * staged_overlap.config_files.len();
risk_reasons.push("staged diff already touches candidate config files".to_string());
}
if !staged_overlap.files.is_empty() {
risk_score += 70 * staged_overlap.files.len();
risk_reasons.push("staged diff already touches candidate files".to_string());
}
if !staged_overlap.symbols.is_empty() {
risk_score += 35 * staged_overlap.symbols.len();
risk_reasons.push("staged diff already touches candidate symbols".to_string());
}
if affected_tests.len() > 1 {
risk_score += affected_tests.len() * 5;
risk_reasons.push("candidate fans into multiple affected test commands".to_string());
}
let risk = if (files.is_empty() && !previously_completed)
|| !staged_overlap.config_files.is_empty()
|| !staged_overlap.files.is_empty()
{
ConflictMatrixRisk::FailClosed
} else if !config_files.is_empty() || !staged_overlap.symbols.is_empty() {
ConflictMatrixRisk::High
} else if affected_tests.len() > 1 {
ConflictMatrixRisk::Medium
} else {
ConflictMatrixRisk::Low
};
let worker_context = evidence
.worker_context
.iter()
.map(|node| {
node.properties
.get("summary")
.cloned()
.unwrap_or_else(|| node.label.clone())
})
.collect::<Vec<_>>();
let worker_context_handles = evidence
.worker_context
.iter()
.map(|node| {
node.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone())
})
.collect::<Vec<_>>();
let semantic_related = evidence
.semantic_related
.iter()
.map(|node| conflict_matrix_semantic_ref(root, node))
.collect::<Vec<_>>();
let (semantic_dispatch_score, semantic_dispatch_reasons) =
conflict_matrix_semantic_dispatch_score(&semantic_related, &files, &symbols);
ConflictMatrixCandidate {
rank: 0,
target: evidence.target.clone(),
evidence_packet_id: evidence.packet_id.clone(),
projection_hash: evidence.projection_hash.clone(),
target_node_id: evidence.target_node.id.clone(),
target_kind: evidence.target_node.kind.clone(),
target_label: evidence.target_node.label.clone(),
risk,
previously_completed,
parallel_safe: false,
blocks: Vec::new(),
blocked_by: Vec::new(),
required_context: ConflictMatrixRequiredContext::default(),
graph_handles: ConflictMatrixGraphHandles::default(),
risk_score,
risk_reasons,
owned_files: sorted_set(&files),
owned_symbols: sorted_set(&symbols),
config_files: sorted_set(&config_files),
affected_tests,
worker_context,
semantic_related,
semantic_dispatch_score,
semantic_dispatch_reasons,
worker_feedback,
source_handles,
worker_context_handles,
staged_overlap,
ownership: empty_conflict_matrix_ownership(&evidence.target),
}
}
fn set_from_vec(values: &[String]) -> BTreeSet<String> {
values.iter().cloned().collect()
}
fn conflict_pair_risk(
shared_files: &[String],
shared_symbols: &[String],
shared_tests: &[String],
shared_config_files: &[String],
) -> (ConflictMatrixRisk, usize, String) {
let score = shared_files.len() * 100
+ shared_config_files.len() * 100
+ shared_symbols.len() * 40
+ shared_tests.len() * 10;
if !shared_files.is_empty() || !shared_config_files.is_empty() {
(
ConflictMatrixRisk::FailClosed,
score,
"serialize or assign one worker as the sole owner of the shared files".to_string(),
)
} else if !shared_symbols.is_empty() {
(
ConflictMatrixRisk::High,
score,
"split by file or serialize; shared symbols are not safe parallel ownership"
.to_string(),
)
} else if !shared_tests.is_empty() {
(
ConflictMatrixRisk::Medium,
score,
"parallel work is possible, but keep a shared test gate after merge".to_string(),
)
} else {
(
ConflictMatrixRisk::Low,
score,
"no direct file, symbol, config, or test overlap found".to_string(),
)
}
}
fn build_conflict_matrix_pairs(candidates: &[ConflictMatrixCandidate]) -> Vec<ConflictMatrixPair> {
let mut pairs = Vec::new();
for left_idx in 0..candidates.len() {
for right_idx in (left_idx + 1)..candidates.len() {
let left = &candidates[left_idx];
let right = &candidates[right_idx];
let left_files = set_from_vec(&left.owned_files);
let right_files = set_from_vec(&right.owned_files);
let left_symbols = set_from_vec(&left.owned_symbols);
let right_symbols = set_from_vec(&right.owned_symbols);
let left_tests = set_from_vec(&left.affected_tests);
let right_tests = set_from_vec(&right.affected_tests);
let left_config = set_from_vec(&left.config_files);
let right_config = set_from_vec(&right.config_files);
let shared_files = sorted_intersection(&left_files, &right_files);
let shared_symbols = sorted_intersection(&left_symbols, &right_symbols);
let shared_tests = sorted_intersection(&left_tests, &right_tests);
let shared_config_files = sorted_intersection(&left_config, &right_config);
let (risk, risk_score, verdict) = conflict_pair_risk(
&shared_files,
&shared_symbols,
&shared_tests,
&shared_config_files,
);
pairs.push(ConflictMatrixPair {
left: left.target.clone(),
right: right.target.clone(),
risk,
risk_score,
shared_files,
shared_symbols,
shared_tests,
shared_config_files,
verdict,
});
}
}
pairs.sort_by(|left, right| {
right
.risk
.cmp(&left.risk)
.then_with(|| right.risk_score.cmp(&left.risk_score))
.then_with(|| left.left.cmp(&right.left))
.then_with(|| left.right.cmp(&right.right))
});
pairs
}
fn conflict_matrix_per_target_fail_closed(
candidates: &[ConflictMatrixCandidate],
) -> Vec<ConflictMatrixPerTargetFailClosed> {
candidates
.iter()
.filter(|candidate| candidate.risk == ConflictMatrixRisk::FailClosed)
.map(|candidate| ConflictMatrixPerTargetFailClosed {
target: candidate.target.clone(),
previously_completed: candidate.previously_completed,
risk_reasons: candidate.risk_reasons.clone(),
owned_files: candidate.owned_files.clone(),
source_handle_count: candidate.source_handles.len(),
})
.collect()
}
fn markdown_list(values: &[String]) -> String {
if values.is_empty() {
return "- none".to_string();
}
values
.iter()
.map(|value| format!("- {value}"))
.collect::<Vec<_>>()
.join("\n")
}
fn conflict_matrix_expansion_commands(candidate: &ConflictMatrixCandidate) -> Vec<String> {
let mut commands = candidate
.source_handles
.iter()
.filter(|handle| !handle.expand.trim().is_empty())
.map(|handle| handle.expand.clone())
.chain(
candidate
.semantic_related
.iter()
.map(|semantic| semantic.expand.clone()),
)
.chain(candidate.affected_tests.iter().cloned())
.collect::<Vec<_>>();
if commands.is_empty() {
commands.push(format!(
"tsift graph-db evidence {} --depth 3 --limit 8 --json",
shell_quote(&candidate.target)
));
}
dedupe_preserve_order(commands)
}
fn conflict_matrix_token_budget(
prompt: &str,
source_handles: &[ConflictMatrixSourceHandle],
) -> ConflictMatrixTokenBudget {
let source_window_lines = source_handles
.iter()
.map(|handle| handle.end.saturating_sub(handle.start).saturating_add(1))
.sum::<usize>();
let max_context_bytes = source_window_lines.saturating_mul(120).max(prompt.len());
ConflictMatrixTokenBudget {
prompt_estimated_tokens: estimated_tokens_from_bytes(prompt.len()),
max_prompt_tokens: estimated_tokens_from_bytes(max_context_bytes),
source_window_count: source_handles.len(),
source_window_lines,
max_context_bytes,
}
}
fn conflict_matrix_worker_prompt_packet_id(candidate: &ConflictMatrixCandidate) -> String {
stable_handle(
"wpp",
&format!(
"{}:{}:{}:{}",
WORKER_PROMPT_PACKET_CONTRACT_VERSION,
candidate.target,
candidate.target_node_id,
candidate.projection_hash.as_deref().unwrap_or("no-hash")
),
)
}
fn conflict_matrix_required_context(
candidate: &ConflictMatrixCandidate,
) -> ConflictMatrixRequiredContext {
ConflictMatrixRequiredContext {
read_only_files: candidate.ownership.read_only_files.clone(),
source_handles: candidate
.source_handles
.iter()
.map(|handle| handle.handle.clone())
.collect(),
worker_context_handles: candidate.worker_context_handles.clone(),
semantic_handles: candidate
.semantic_related
.iter()
.map(|semantic| semantic.handle.clone())
.collect(),
expansion_commands: candidate.ownership.expansion_commands.clone(),
}
}
fn conflict_matrix_graph_handles(
candidate: &ConflictMatrixCandidate,
) -> ConflictMatrixGraphHandles {
ConflictMatrixGraphHandles {
target_node_id: candidate.target_node_id.clone(),
evidence_packet_id: candidate.evidence_packet_id.clone(),
worker_prompt_packet_id: conflict_matrix_worker_prompt_packet_id(candidate),
projection_hash: candidate.projection_hash.clone(),
source_handles: candidate
.source_handles
.iter()
.map(|handle| handle.handle.clone())
.collect(),
worker_context_handles: candidate.worker_context_handles.clone(),
semantic_handles: candidate
.semantic_related
.iter()
.map(|semantic| semantic.handle.clone())
.collect(),
}
}
fn apply_conflict_matrix_ownership_blocks(candidates: &mut [ConflictMatrixCandidate]) {
let all_files_by_target = candidates
.iter()
.map(|candidate| {
(
candidate.target.clone(),
candidate
.owned_files
.iter()
.cloned()
.collect::<BTreeSet<_>>(),
)
})
.collect::<Vec<_>>();
for candidate in candidates.iter_mut() {
let mut read_only = BTreeSet::new();
for (target, files) in &all_files_by_target {
if target != &candidate.target {
read_only.extend(files.iter().cloned());
}
}
let mut forbidden = read_only.clone();
forbidden.extend(candidate.staged_overlap.files.iter().cloned());
forbidden.extend(candidate.staged_overlap.config_files.iter().cloned());
let read_only_files = sorted_set(&read_only);
let forbidden_files = sorted_set(&forbidden);
let expected_tests = candidate.affected_tests.clone();
let mut read_only_context = read_only_files.clone();
read_only_context.extend(
candidate
.worker_context
.iter()
.map(|summary| format!("worker_context: {summary}")),
);
read_only_context.extend(candidate.semantic_related.iter().map(|semantic| {
format!(
"semantic:{}:{}{}",
semantic.kind,
semantic.label,
semantic
.source_file
.as_ref()
.map(|file| format!(" ({file})"))
.unwrap_or_default()
)
}));
read_only_context.extend(
candidate
.semantic_dispatch_reasons
.iter()
.map(|reason| format!("semantic_rank: {reason}")),
);
if candidate.worker_feedback.total > 0 {
read_only_context.push(format!(
"worker_feedback: completed={} blocked={} touched_files={} expected_tests={} follow_up_ids={}",
candidate.worker_feedback.completed,
candidate.worker_feedback.blocked,
feedback_ref_list(&candidate.worker_feedback.touched_files),
feedback_ref_list(&candidate.worker_feedback.expected_tests),
feedback_ref_list(&candidate.worker_feedback.follow_up_ids),
));
}
if candidate.worker_feedback.closure_rank_score > 0 {
read_only_context.push(format!(
"worker_feedback_closure: score={} stale_expected_tests={} follow_up_debt={}",
candidate.worker_feedback.closure_rank_score,
feedback_ref_list(&candidate.worker_feedback.stale_expected_tests),
feedback_ref_list(&candidate.worker_feedback.follow_up_debt),
));
}
read_only_context.extend(
candidate
.worker_feedback
.warnings
.iter()
.map(|warning| format!("worker_feedback_warning: {warning}")),
);
read_only_context = dedupe_preserve_order(read_only_context);
let expansion_commands = conflict_matrix_expansion_commands(candidate);
let title = format!(
"Worker {} owns {} ({})",
candidate.rank, candidate.target, candidate.target_label
);
let prompt_body = format!(
"{title}\n\nOwned files:\n{}\n\nOwned symbols:\n{}\n\nRead-only context:\n{}\n\nForbidden files:\n{}\n\nExpected tests:\n{}\n\nExpansion commands:\n{}\n\nSemantic dispatch score: {}\n{}\n\nFail closed if the task requires a forbidden/shared file, an unowned config file, or a public contract change outside this ownership block.",
markdown_list(&candidate.owned_files),
markdown_list(&candidate.owned_symbols),
markdown_list(&read_only_context),
markdown_list(&forbidden_files),
markdown_list(&expected_tests),
markdown_list(&expansion_commands),
candidate.semantic_dispatch_score,
markdown_list(&candidate.semantic_dispatch_reasons),
);
let token_budget = conflict_matrix_token_budget(&prompt_body, &candidate.source_handles);
let prompt = format!(
"{prompt_body}\n\nToken budget: prompt_estimated_tokens={} max_prompt_tokens={} source_windows={} source_window_lines={} max_context_bytes={}",
token_budget.prompt_estimated_tokens,
token_budget.max_prompt_tokens,
token_budget.source_window_count,
token_budget.source_window_lines,
token_budget.max_context_bytes,
);
candidate.ownership = ConflictMatrixOwnershipBlock {
contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
title,
owned_files: candidate.owned_files.clone(),
owned_symbols: candidate.owned_symbols.clone(),
read_only_context,
read_only_files,
forbidden_files,
expected_tests,
expansion_commands,
token_budget,
prompt,
};
}
}
fn conflict_matrix_pair_requires_serial(pair: &ConflictMatrixPair) -> bool {
matches!(
pair.risk,
ConflictMatrixRisk::High | ConflictMatrixRisk::FailClosed
)
}
fn apply_conflict_matrix_scheduler_fields(
candidates: &mut [ConflictMatrixCandidate],
conflicts: &[ConflictMatrixPair],
) {
let rank_by_target = candidates
.iter()
.map(|candidate| (candidate.target.clone(), candidate.rank))
.collect::<BTreeMap<_, _>>();
let mut blocks = BTreeMap::<String, BTreeSet<String>>::new();
let mut blocked_by = BTreeMap::<String, BTreeSet<String>>::new();
for pair in conflicts {
if !conflict_matrix_pair_requires_serial(pair) {
continue;
}
let left_rank = rank_by_target
.get(&pair.left)
.copied()
.unwrap_or(usize::MAX);
let right_rank = rank_by_target
.get(&pair.right)
.copied()
.unwrap_or(usize::MAX);
let (blocker, blocked) = if left_rank <= right_rank {
(&pair.left, &pair.right)
} else {
(&pair.right, &pair.left)
};
blocks
.entry(blocker.clone())
.or_default()
.insert(blocked.clone());
blocked_by
.entry(blocked.clone())
.or_default()
.insert(blocker.clone());
}
for candidate in candidates.iter() {
for follow_up in &candidate.worker_feedback.follow_up_debt {
blocks
.entry(candidate.target.clone())
.or_default()
.insert(follow_up.clone());
if rank_by_target.contains_key(follow_up) {
blocked_by
.entry(follow_up.clone())
.or_default()
.insert(candidate.target.clone());
}
}
}
for candidate in candidates.iter_mut() {
let candidate_blocks: Vec<String> = blocks
.remove(&candidate.target)
.map(|values| values.into_iter().collect())
.unwrap_or_default();
let candidate_blocked_by: Vec<String> = blocked_by
.remove(&candidate.target)
.map(|values| values.into_iter().collect())
.unwrap_or_default();
let has_serial_edges = !candidate_blocks.is_empty() || !candidate_blocked_by.is_empty();
candidate.parallel_safe =
candidate.risk != ConflictMatrixRisk::FailClosed && !has_serial_edges;
candidate.blocks = candidate_blocks;
candidate.blocked_by = candidate_blocked_by;
candidate.required_context = conflict_matrix_required_context(candidate);
candidate.graph_handles = conflict_matrix_graph_handles(candidate);
}
}
fn conflict_matrix_worker_prompt_packets(
candidates: &[ConflictMatrixCandidate],
) -> Vec<ConflictMatrixWorkerPromptPacket> {
candidates
.iter()
.map(|candidate| ConflictMatrixWorkerPromptPacket {
contract_version: WORKER_PROMPT_PACKET_CONTRACT_VERSION,
packet_id: conflict_matrix_worker_prompt_packet_id(candidate),
target: candidate.target.clone(),
rank: candidate.rank,
risk: candidate.risk,
previously_completed: candidate.previously_completed,
parallel_safe: candidate.parallel_safe,
blocks: candidate.blocks.clone(),
blocked_by: candidate.blocked_by.clone(),
required_context: candidate.required_context.clone(),
graph_handles: candidate.graph_handles.clone(),
projection_hash: candidate.projection_hash.clone(),
title: candidate.ownership.title.clone(),
owned_files: candidate.ownership.owned_files.clone(),
owned_symbols: candidate.ownership.owned_symbols.clone(),
read_only_context: candidate.ownership.read_only_context.clone(),
forbidden_files: candidate.ownership.forbidden_files.clone(),
expected_tests: candidate.ownership.expected_tests.clone(),
expansion_commands: candidate.ownership.expansion_commands.clone(),
token_budget: candidate.ownership.token_budget.clone(),
semantic_dispatch_score: candidate.semantic_dispatch_score,
semantic_dispatch_reasons: candidate.semantic_dispatch_reasons.clone(),
worker_feedback: candidate.worker_feedback.clone(),
prompt: candidate.ownership.prompt.clone(),
})
.collect()
}
fn conflict_matrix_orchestration_observability(
freshness: &GraphDbFreshnessReport,
candidates: &[ConflictMatrixCandidate],
conflicts: &[ConflictMatrixPair],
next_commands: &[String],
) -> ConflictMatrixOrchestrationObservability {
let evidence_packet_ids = candidates
.iter()
.map(|candidate| candidate.evidence_packet_id.clone())
.collect::<Vec<_>>();
let projection_hashes = candidates
.iter()
.filter_map(|candidate| candidate.projection_hash.clone())
.collect::<BTreeSet<_>>()
.into_iter()
.collect::<Vec<_>>();
let mut conflict_matrix_decisions = candidates
.iter()
.map(|candidate| {
format!(
"candidate #{} {} risk={} previously_completed={} closure_score={} semantic_score={} owned_files={} forbidden_files={}",
candidate.rank,
candidate.target,
conflict_risk_label(candidate.risk),
candidate.previously_completed,
candidate.worker_feedback.closure_rank_score,
candidate.semantic_dispatch_score,
candidate.ownership.owned_files.len(),
candidate.ownership.forbidden_files.len()
)
})
.collect::<Vec<_>>();
conflict_matrix_decisions.extend(conflicts.iter().map(|pair| {
format!(
"pair {}<->{} risk={} verdict={}",
pair.left,
pair.right,
conflict_risk_label(pair.risk),
pair.verdict
)
}));
let worker_ownership_blocks = candidates
.iter()
.map(|candidate| candidate.ownership.title.clone())
.collect::<Vec<_>>();
ConflictMatrixOrchestrationObservability {
contract_version: CONFLICT_MATRIX_CONTRACT_VERSION,
projection_freshness: freshness.clone(),
projection_hashes,
evidence_packet_ids,
conflict_matrix_decisions,
worker_ownership_blocks,
follow_up_commands: next_commands.to_vec(),
}
}
fn conflict_matrix_context_summary(
context_pack: &ConflictMatrixPreparedContext,
) -> ConflictMatrixContextSummary {
ConflictMatrixContextSummary {
target: context_pack.target.clone(),
target_kind: context_pack.target_kind.clone(),
prompt_targets: context_pack.prompt_targets.clone(),
touched_files: context_pack.touched_files.clone(),
touched_symbols: context_pack.touched_symbols.clone(),
files_changed: context_pack.files_changed,
worker_context: context_pack.worker_context.clone(),
source_windows: context_pack
.source_windows
.iter()
.map(|window| format!("{}:{}-{}", window.file, window.start, window.end))
.collect(),
status_reminders: context_pack.status_reminders.clone(),
}
}
fn conflict_matrix_next_commands(
root: &Path,
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
) -> Vec<String> {
let mut commands = Vec::new();
for target in targets {
commands.push(format!(
"tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(target),
depth,
limit
));
}
commands.push(format!(
"tsift --envelope context-pack {} --budget normal",
shell_quote(path.to_string_lossy().as_ref())
));
commands.push(format!(
"tsift diff-digest --cached {} --json",
shell_quote(root.to_string_lossy().as_ref())
));
commands.push(format!(
"tsift impact {} --cached{} --limit {} --json",
shell_quote(root.to_string_lossy().as_ref()),
scope
.map(|scope| format!(" --scope {}", shell_quote(scope)))
.unwrap_or_default(),
impact_limit
));
dedupe_preserve_order(commands)
}
fn print_conflict_matrix_human(report: &ConflictMatrixReport, compact: bool) {
if compact {
println!(
"conflict-matrix targets:{} candidates:{} conflicts:{} can_parallel:{} fail_closed:{} cross_safe:{} per_target_fail_closed:{}",
report.targets.len(),
report.candidates.len(),
report.conflicts.len(),
report.can_parallel,
report.fail_closed,
report.cross_target_parallel_safe,
report.per_target_fail_closed.len()
);
} else {
println!("Conflict matrix");
println!(" targets: {}", report.targets.join(", "));
println!(" can parallel: {}", report.can_parallel);
println!(" fail closed: {}", report.fail_closed);
println!(
" cross target parallel safe: {}",
report.cross_target_parallel_safe
);
println!(
" per target fail closed: {}",
report.per_target_fail_closed.len()
);
}
for candidate in &report.candidates {
println!(
"candidate #{} {} risk:{} score:{} semantic:{} files:{} symbols:{} tests:{}",
candidate.rank,
candidate.target,
conflict_risk_label(candidate.risk),
candidate.risk_score,
candidate.semantic_dispatch_score,
candidate.owned_files.len(),
candidate.owned_symbols.len(),
candidate.affected_tests.len()
);
if candidate.previously_completed {
println!(" previously completed: true");
}
for reason in &candidate.risk_reasons {
println!(" reason: {reason}");
}
if candidate.worker_feedback.total > 0 {
println!(
" worker feedback: completed:{} blocked:{} files:{} tests:{} follow-ups:{} closure:{}",
candidate.worker_feedback.completed,
candidate.worker_feedback.blocked,
candidate.worker_feedback.touched_files.len(),
candidate.worker_feedback.expected_tests.len(),
candidate.worker_feedback.follow_up_ids.len(),
candidate.worker_feedback.closure_rank_score
);
for reason in &candidate.worker_feedback.closure_rank_reasons {
println!(" closure: {reason}");
}
for warning in &candidate.worker_feedback.warnings {
println!(" warning: {warning}");
}
}
}
for pair in &report.conflicts {
println!(
"conflict {} <-> {} risk:{} score:{} verdict:{}",
pair.left,
pair.right,
conflict_risk_label(pair.risk),
pair.risk_score,
pair.verdict
);
for file in &pair.shared_files {
println!(" shared file: {file}");
}
for symbol in &pair.shared_symbols {
println!(" shared symbol: {symbol}");
}
}
for command in &report.next_commands {
println!("next: {command}");
}
for packet in &report.worker_prompt_packets {
println!("worker-prompt #{} {}", packet.rank, packet.title);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
if !report.per_target_fail_closed.is_empty() {
println!(
"per-target fail closed: {} target(s)",
report.per_target_fail_closed.len()
);
for target in &report.per_target_fail_closed {
println!(
" {} source_handles:{} owned_files:{} reasons:{}",
target.target,
target.source_handle_count,
target.owned_files.len(),
target.risk_reasons.join("; ")
);
}
}
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixPreparedInputs {
context_pack: ConflictMatrixPreparedContext,
cached_diff: diff_digest::DiffDigestReport,
impact_report: impact::ImpactReport,
preparation_cache: ConflictMatrixPreparationCacheSummary,
preparation_timings: Vec<GraphDbBackendEvalPhaseTiming>,
}
struct ConflictMatrixGraphSnapshot {
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
index: ConflictMatrixGraphIndex,
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixPreparedEvidence {
report: GraphDbEvidenceReport,
summary: ConflictMatrixEvidencePacketSummary,
}
struct ConflictMatrixGraphPreparedInputs {
targets: Vec<String>,
graph: ConflictMatrixGraphSnapshot,
evidence: Vec<ConflictMatrixPreparedEvidence>,
shared_preparation: ConflictMatrixSharedPreparationSummary,
}
#[derive(Clone, Serialize, Deserialize)]
struct ConflictMatrixGraphPreparedCache {
version: String,
key: String,
targets: Vec<String>,
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
evidence: Vec<ConflictMatrixPreparedEvidence>,
shared_preparation: ConflictMatrixSharedPreparationSummary,
}
static CONFLICT_MATRIX_PREPARATION_CACHE: OnceLock<
Mutex<BTreeMap<String, ConflictMatrixPreparedInputs>>,
> = OnceLock::new();
fn conflict_matrix_preparation_cache()
-> &'static Mutex<BTreeMap<String, ConflictMatrixPreparedInputs>> {
CONFLICT_MATRIX_PREPARATION_CACHE.get_or_init(|| Mutex::new(BTreeMap::new()))
}
fn hash_bytes_hex(bytes: &[u8]) -> String {
blake3::hash(bytes).to_hex().to_string()
}
fn conflict_matrix_disk_cache_dir(root: &Path) -> PathBuf {
root.join(".tsift/conflict-matrix-cache")
}
fn conflict_matrix_disk_cache_path(root: &Path, kind: &str, key: &str) -> PathBuf {
conflict_matrix_disk_cache_dir(root)
.join(kind)
.join(format!("{key}.json"))
}
fn conflict_matrix_read_disk_cache<T: for<'de> Deserialize<'de>>(
root: &Path,
kind: &str,
key: &str,
) -> Option<T> {
let path = conflict_matrix_disk_cache_path(root, kind, key);
let bytes = fs::read(path).ok()?;
serde_json::from_slice(&bytes).ok()
}
fn conflict_matrix_write_disk_cache<T: Serialize>(root: &Path, kind: &str, key: &str, value: &T) {
let path = conflict_matrix_disk_cache_path(root, kind, key);
let Some(parent) = path.parent() else {
return;
};
if fs::create_dir_all(parent).is_err() {
return;
}
if let Ok(bytes) = serde_json::to_vec(value) {
let _ = fs::write(path, bytes);
}
}
fn conflict_matrix_document_watermark(path: &Path) -> Result<String> {
if path.is_dir() {
let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
return Ok(hash_bytes_hex(
format!("directory:{}", canonical.display()).as_bytes(),
));
}
let bytes = fs::read(path)
.with_context(|| format!("reading conflict-matrix document {}", path.display()))?;
Ok(hash_bytes_hex(&bytes))
}
fn conflict_matrix_staged_diff_watermark(root: &Path) -> String {
match Command::new("git")
.arg("-C")
.arg(root)
.args(["diff", "--cached", "--raw", "--no-ext-diff"])
.output()
{
Ok(output) => {
let mut bytes = Vec::new();
bytes.extend_from_slice(output.status.to_string().as_bytes());
bytes.extend_from_slice(&output.stdout);
bytes.extend_from_slice(&output.stderr);
hash_bytes_hex(&bytes)
}
Err(err) => hash_bytes_hex(format!("git-diff-cached-unavailable:{err:#}").as_bytes()),
}
}
fn conflict_matrix_preparation_cache_summary(
root: &Path,
path: &Path,
scope: Option<&str>,
) -> Result<ConflictMatrixPreparationCacheSummary> {
let source_watermark = traversal_source_watermark(root, path, scope, false)?
.unwrap_or_else(|| "unavailable".to_string());
let document_watermark = conflict_matrix_document_watermark(path)?;
let staged_diff_watermark = conflict_matrix_staged_diff_watermark(root);
let key = content_hash(&vec![
format!("version:{CONFLICT_MATRIX_PREPARATION_CACHE_VERSION}"),
format!("root:{}", root.display()),
format!("path:{}", path.display()),
format!("scope:{}", scope.unwrap_or("root")),
format!("source:{source_watermark}"),
format!("document:{document_watermark}"),
format!("staged_diff:{staged_diff_watermark}"),
])?;
Ok(ConflictMatrixPreparationCacheSummary {
version: CONFLICT_MATRIX_PREPARATION_CACHE_VERSION.to_string(),
key,
status: "memory_miss".to_string(),
source_watermark,
document_watermark,
staged_diff_watermark,
})
}
fn conflict_matrix_prepared_inputs_cache_hit(
mut cached: ConflictMatrixPreparedInputs,
status: &str,
duration_micros: u128,
detail: &str,
) -> ConflictMatrixPreparedInputs {
cached.preparation_cache.status = status.to_string();
let cached_detail = format!(
"reused from {status} conflict-matrix preparation cache by source/document/staged-diff watermark; cost accounted in preparation_cache_lookup"
);
cached.preparation_timings = vec![
graph_db_backend_eval_phase_timing("preparation_cache_lookup", duration_micros, detail),
graph_db_backend_eval_phase_timing("session_review_compute", 0, &cached_detail),
graph_db_backend_eval_phase_timing(
"session_review_compute.target_context_build",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"session_review_compute.session_discovery",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"session_review_compute.session_digest_total",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"session_review_compute.session_cost_total",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"session_review_compute.session_aggregation",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"session_review_compute.report_assembly",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing("status_index_gate", 0, &cached_detail),
graph_db_backend_eval_phase_timing(
"status_index_gate.prepare_agent_doc_index_gate",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"status_index_gate.context_pack_status_reminders",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing(
"status_index_gate.load_tag_ontology_preview_context",
0,
&cached_detail,
),
graph_db_backend_eval_phase_timing("context_pack_diff", 0, &cached_detail),
graph_db_backend_eval_phase_timing("exploration_materialization", 0, &cached_detail),
graph_db_backend_eval_phase_timing("graph_orchestration", 0, &cached_detail),
graph_db_backend_eval_phase_timing("staged_diff", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.context_resolution", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.diff_digest", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.test_path_scan", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.index_open", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.call_edge_impacts", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.route_handler_impacts", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.import_impacts", 0, &cached_detail),
graph_db_backend_eval_phase_timing("impact.report_assembly", 0, &cached_detail),
];
cached
}
pub(crate) fn prepare_conflict_matrix_inputs(
root: &Path,
path: &Path,
scope: Option<&str>,
impact_limit: usize,
) -> Result<ConflictMatrixPreparedInputs> {
let cache_lookup_started = Instant::now();
let mut cache_summary = conflict_matrix_preparation_cache_summary(root, path, scope)?;
if let Some(cached) = conflict_matrix_preparation_cache()
.lock()
.map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
.get(&cache_summary.key)
.cloned()
{
return Ok(conflict_matrix_prepared_inputs_cache_hit(
cached,
"memory_hit",
cache_lookup_started.elapsed().as_micros(),
"reused prepared context-pack, staged diff, and impact packet from memory by source/document/staged-diff watermark",
));
}
if let Some(cached) = conflict_matrix_read_disk_cache::<ConflictMatrixPreparedInputs>(
root,
"inputs",
&cache_summary.key,
) {
let cached = conflict_matrix_prepared_inputs_cache_hit(
cached,
"disk_hit",
cache_lookup_started.elapsed().as_micros(),
"reused prepared context-pack, staged diff, and impact packet from .tsift/conflict-matrix-cache by source/document/staged-diff watermark",
);
conflict_matrix_preparation_cache()
.lock()
.map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
.insert(cached.preparation_cache.key.clone(), cached.clone());
return Ok(cached);
}
let mut preparation_timings = vec![graph_db_backend_eval_phase_timing(
"preparation_cache_lookup",
cache_lookup_started.elapsed().as_micros(),
"no prepared packet matched the source/document/staged-diff watermark",
)];
cache_summary.status = "computed".to_string();
let (context_pack_report, context_pack_timings) = build_context_pack_report_with_profile(
path,
None,
None,
None,
ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Normal), false),
)?;
preparation_timings.extend(context_pack_timings);
let context_pack = ConflictMatrixPreparedContext::from_context_pack(&context_pack_report);
let cached_diff = graph_db_backend_eval_timed_phase(
&mut preparation_timings,
"staged_diff",
"cached/staged diff digest used for ownership overlap checks",
|| {
diff_digest::compute(
root,
diff_digest::DiffDigestOptions {
cached: true,
revision: None,
max_parsed_files: None,
},
)
.with_context(|| format!("computing cached diff digest for {}", root.display()))
},
)?;
let impact_started = Instant::now();
let (impact_report, impact_sub_phases) = impact::compute_with_phases(
root,
impact::ImpactOptions {
cached: true,
revision: None,
scope,
limit: impact_limit,
},
)
.with_context(|| format!("computing cached impact report for {}", root.display()))?;
let impact_total_micros = impact_started.elapsed().as_micros();
preparation_timings.push(graph_db_backend_eval_phase_timing(
"impact",
impact_total_micros,
"cached impact analysis used for affected-test ownership checks",
));
for sub in &impact_sub_phases {
preparation_timings.push(graph_db_backend_eval_phase_timing(
&format!("impact.{}", sub.name),
sub.duration_micros,
&sub.detail,
));
}
let prepared = ConflictMatrixPreparedInputs {
context_pack,
cached_diff,
impact_report,
preparation_cache: cache_summary,
preparation_timings,
};
conflict_matrix_preparation_cache()
.lock()
.map_err(|_| anyhow::anyhow!("conflict-matrix preparation cache lock poisoned"))?
.insert(prepared.preparation_cache.key.clone(), prepared.clone());
conflict_matrix_write_disk_cache(root, "inputs", &prepared.preparation_cache.key, &prepared);
Ok(prepared)
}
fn conflict_matrix_evidence_packet_summary(
root: &Path,
scope: Option<&str>,
target: &str,
depth: usize,
limit: usize,
evidence: &GraphDbEvidenceReport,
) -> ConflictMatrixEvidencePacketSummary {
ConflictMatrixEvidencePacketSummary {
target: evidence.target.clone(),
packet_id: evidence.packet_id.clone(),
target_node_id: evidence.target_node.id.clone(),
projection_hash: evidence.projection_hash.clone(),
replay_command: evidence
.replay_commands
.first()
.cloned()
.unwrap_or_else(|| {
format!(
"tsift graph-db --path {}{} evidence {} --depth {} --limit {} --json",
shell_quote(root.to_string_lossy().as_ref()),
graph_db_scope_arg(scope),
shell_quote(target),
depth,
limit
)
}),
}
}
fn conflict_matrix_shared_preparation_summary(
graph: &ConflictMatrixGraphSnapshot,
evidence: &[ConflictMatrixPreparedEvidence],
evidence_cache_status: &str,
) -> ConflictMatrixSharedPreparationSummary {
ConflictMatrixSharedPreparationSummary {
evidence_cache_status: evidence_cache_status.to_string(),
graph_nodes: graph.nodes.len(),
graph_edges: graph.edges.len(),
evidence_packets: evidence.len(),
source_handles: evidence
.iter()
.map(|entry| entry.report.source_handles.len())
.sum(),
worker_context: evidence
.iter()
.map(|entry| entry.report.worker_context.len())
.sum(),
worker_results: evidence
.iter()
.map(|entry| entry.report.worker_results.len())
.sum(),
semantic_rows: evidence
.iter()
.map(|entry| entry.report.semantic_related.len())
.sum(),
dispatch_trace_snapshot_nodes: graph.nodes.len(),
dispatch_trace_snapshot_edges: graph.edges.len(),
}
}
#[allow(dead_code)]
fn conflict_matrix_graph_snapshot(store: &impl GraphStore) -> Result<ConflictMatrixGraphSnapshot> {
let nodes = store.all_nodes()?;
let edges = store.all_edges()?;
let index = conflict_matrix_graph_index(&nodes);
Ok(ConflictMatrixGraphSnapshot {
nodes,
edges,
index,
})
}
fn insert_conflict_graph_node(
nodes: &mut BTreeMap<String, SubstrateGraphNode>,
node: SubstrateGraphNode,
) {
nodes.entry(node.id.clone()).or_insert(node);
}
fn insert_conflict_graph_edge(
edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
edge: SubstrateGraphEdge,
) {
edges
.entry((edge.from_id.clone(), edge.kind.clone(), edge.to_id.clone()))
.or_insert(edge);
}
fn conflict_matrix_files_from_evidence(evidence: &GraphDbEvidenceReport) -> BTreeSet<String> {
let mut files = BTreeSet::new();
if matches!(
evidence.target_node.kind.as_str(),
"file" | "symbol" | "route"
) && let Some(path) = evidence.target_node.properties.get("path")
{
files.insert(path.clone());
}
for node in &evidence.source_handles {
if let Some(handle) = conflict_matrix_source_handle(node) {
files.insert(handle.file);
}
}
files
}
fn conflict_matrix_add_path_nodes<S: GraphStore>(
store: &S,
nodes: &mut BTreeMap<String, SubstrateGraphNode>,
evidence: &GraphDbEvidenceReport,
) -> Result<()> {
for path in &evidence.shortest_paths {
let Some(graph_path) = &path.path else {
continue;
};
for id in &graph_path.nodes {
if nodes.contains_key(id) {
continue;
}
if let Some(node) = store.node(id)? {
insert_conflict_graph_node(nodes, node);
}
}
}
Ok(())
}
fn conflict_matrix_add_file_symbol_nodes<S: GraphStore>(
store: &S,
nodes: &mut BTreeMap<String, SubstrateGraphNode>,
files: &BTreeSet<String>,
) -> Result<()> {
for file in files {
for kind in ["file", "route", "symbol"] {
let page = store.paged_nodes_by_kind(
kind,
GraphQueryOptions {
property_filters: vec![GraphPropertyFilter {
key: "path".to_string(),
value: file.clone(),
}],
..GraphQueryOptions::default()
},
)?;
for node in page.nodes {
insert_conflict_graph_node(nodes, node);
}
}
}
Ok(())
}
fn conflict_matrix_add_target_ref_nodes<S: GraphStore>(
store: &S,
nodes: &mut BTreeMap<String, SubstrateGraphNode>,
target_node: &SubstrateGraphNode,
) -> Result<()> {
let Some(ref_id) = target_node.properties.get("ref_id") else {
return Ok(());
};
for kind in ["backlog", "job_packet", "worker_result"] {
let page = store.paged_nodes_by_kind(
kind,
GraphQueryOptions {
property_filters: vec![GraphPropertyFilter {
key: "ref_id".to_string(),
value: ref_id.clone(),
}],
..GraphQueryOptions::default()
},
)?;
for node in page.nodes {
insert_conflict_graph_node(nodes, node);
}
}
Ok(())
}
fn conflict_matrix_add_target_neighborhood<S: GraphStore>(
store: &S,
nodes: &mut BTreeMap<String, SubstrateGraphNode>,
edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
target_node: &SubstrateGraphNode,
depth: usize,
limit: usize,
) -> Result<()> {
let node_limit = if limit == 0 {
None
} else {
Some(limit.saturating_mul(depth.max(1)).saturating_mul(8).max(64))
};
if let Some(page) = store.paged_neighborhood(
&target_node.id,
depth,
None,
GraphQueryOptions {
limit: node_limit,
..GraphQueryOptions::default()
},
)? {
for node in page.nodes {
insert_conflict_graph_node(nodes, node);
}
for edge in page.edges {
insert_conflict_graph_edge(edges, edge);
}
}
Ok(())
}
fn conflict_matrix_add_scoped_edges<S: GraphStore>(
store: &S,
nodes: &BTreeMap<String, SubstrateGraphNode>,
edges: &mut BTreeMap<(String, String, String), SubstrateGraphEdge>,
) -> Result<()> {
let node_ids = nodes.keys().cloned().collect::<BTreeSet<_>>();
for edge in store.edges_between_nodes(&node_ids)? {
insert_conflict_graph_edge(edges, edge);
}
Ok(())
}
fn conflict_matrix_target_scoped_graph_snapshot<S: GraphStore>(
store: &S,
evidence: &[ConflictMatrixPreparedEvidence],
depth: usize,
limit: usize,
) -> Result<ConflictMatrixGraphSnapshot> {
let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
let mut edges = BTreeMap::<(String, String, String), SubstrateGraphEdge>::new();
let mut files = BTreeSet::new();
for prepared in evidence {
let report = &prepared.report;
insert_conflict_graph_node(&mut nodes, report.target_node.clone());
for node in report
.worker_context
.iter()
.chain(report.source_handles.iter())
.chain(report.worker_results.iter())
.chain(report.semantic_related.iter())
{
insert_conflict_graph_node(&mut nodes, node.clone());
}
files.extend(conflict_matrix_files_from_evidence(report));
conflict_matrix_add_target_ref_nodes(store, &mut nodes, &report.target_node)?;
conflict_matrix_add_path_nodes(store, &mut nodes, report)?;
conflict_matrix_add_target_neighborhood(
store,
&mut nodes,
&mut edges,
&report.target_node,
depth,
limit,
)?;
}
conflict_matrix_add_file_symbol_nodes(store, &mut nodes, &files)?;
conflict_matrix_add_scoped_edges(store, &nodes, &mut edges)?;
let nodes = nodes.into_values().collect::<Vec<_>>();
let edges = edges.into_values().collect::<Vec<_>>();
let index = conflict_matrix_graph_index(&nodes);
Ok(ConflictMatrixGraphSnapshot {
nodes,
edges,
index,
})
}
#[allow(clippy::too_many_arguments)]
fn collect_conflict_matrix_evidence_packets<S: GraphStore>(
root: &Path,
scope: Option<&str>,
backend: &str,
targets: &[String],
depth: usize,
limit: usize,
store: &S,
freshness: GraphDbFreshnessReport,
) -> Result<Vec<ConflictMatrixPreparedEvidence>> {
let mut evidence = Vec::new();
for target in targets {
let report = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
root,
scope,
backend,
target,
depth,
limit,
store,
freshness: freshness.clone(),
warnings: Vec::new(),
})
.with_context(|| format!("collecting graph-db evidence for {target}"))?;
let summary =
conflict_matrix_evidence_packet_summary(root, scope, target, depth, limit, &report);
evidence.push(ConflictMatrixPreparedEvidence { report, summary });
}
Ok(evidence)
}
fn conflict_matrix_graph_preparation_cache_key(
prepared: &ConflictMatrixPreparedInputs,
scope: Option<&str>,
backend: &str,
targets: &[String],
depth: usize,
limit: usize,
freshness: &GraphDbFreshnessReport,
) -> Result<String> {
content_hash(&serde_json::json!({
"version": CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION,
"prepared_inputs_key": prepared.preparation_cache.key.as_str(),
"scope": scope.unwrap_or("root"),
"backend": backend,
"targets": targets,
"depth": depth,
"limit": limit,
"projection_version": freshness.projection_version.as_deref(),
"projection_hash": freshness.content_hash.as_deref(),
"source_watermark": freshness.source_watermark.as_deref(),
}))
}
fn conflict_matrix_graph_prepared_cache_hit(
cached: ConflictMatrixGraphPreparedCache,
status: &str,
) -> ConflictMatrixGraphPreparedInputs {
let mut shared_preparation = cached.shared_preparation;
shared_preparation.evidence_cache_status = status.to_string();
let index = conflict_matrix_graph_index(&cached.nodes);
ConflictMatrixGraphPreparedInputs {
targets: cached.targets,
graph: ConflictMatrixGraphSnapshot {
nodes: cached.nodes,
edges: cached.edges,
index,
},
evidence: cached.evidence,
shared_preparation,
}
}
fn conflict_matrix_graph_prepared_cache_from_inputs(
key: &str,
prepared: &ConflictMatrixGraphPreparedInputs,
) -> ConflictMatrixGraphPreparedCache {
ConflictMatrixGraphPreparedCache {
version: CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION.to_string(),
key: key.to_string(),
targets: prepared.targets.clone(),
nodes: prepared.graph.nodes.clone(),
edges: prepared.graph.edges.clone(),
evidence: prepared.evidence.clone(),
shared_preparation: prepared.shared_preparation.clone(),
}
}
#[allow(clippy::too_many_arguments)]
fn prepare_conflict_matrix_graph_orchestration<S: GraphStore>(
root: &Path,
scope: Option<&str>,
backend: &str,
raw_targets: &[String],
prepared: &ConflictMatrixPreparedInputs,
depth: usize,
limit: usize,
store: &S,
freshness: GraphDbFreshnessReport,
) -> Result<ConflictMatrixGraphPreparedInputs> {
let targets = resolve_conflict_matrix_targets(store, raw_targets, &prepared.context_pack)?;
let graph_cache_key = conflict_matrix_graph_preparation_cache_key(
prepared, scope, backend, &targets, depth, limit, &freshness,
)?;
if let Some(cached) = conflict_matrix_read_disk_cache::<ConflictMatrixGraphPreparedCache>(
root,
"graph",
&graph_cache_key,
) && cached.version == CONFLICT_MATRIX_GRAPH_PREPARATION_CACHE_VERSION
&& cached.key == graph_cache_key
&& cached.targets == targets
{
return Ok(conflict_matrix_graph_prepared_cache_hit(cached, "disk_hit"));
}
let evidence = collect_conflict_matrix_evidence_packets(
root, scope, backend, &targets, depth, limit, store, freshness,
)?;
let graph = conflict_matrix_target_scoped_graph_snapshot(store, &evidence, depth, limit)?;
let shared_preparation =
conflict_matrix_shared_preparation_summary(&graph, &evidence, "computed");
let prepared_graph = ConflictMatrixGraphPreparedInputs {
targets,
graph,
evidence,
shared_preparation,
};
let cache = conflict_matrix_graph_prepared_cache_from_inputs(&graph_cache_key, &prepared_graph);
conflict_matrix_write_disk_cache(root, "graph", &graph_cache_key, &cache);
Ok(prepared_graph)
}
#[allow(clippy::too_many_arguments)]
fn build_conflict_matrix_report_from_prepared_graph(
root: &Path,
path: &Path,
scope: Option<&str>,
depth: usize,
limit: usize,
impact_limit: usize,
freshness: GraphDbFreshnessReport,
extra_warnings: Vec<String>,
prepared: &ConflictMatrixPreparedInputs,
graph_prepared: &ConflictMatrixGraphPreparedInputs,
) -> Result<ConflictMatrixReport> {
let context_pack = &prepared.context_pack;
let targets = graph_prepared.targets.clone();
let graph_index = &graph_prepared.graph.index;
let mut warnings = context_pack.status_reminders.clone();
warnings.extend(extra_warnings);
let mut candidates = Vec::new();
let mut evidence_packets = Vec::new();
for prepared_evidence in &graph_prepared.evidence {
let evidence = &prepared_evidence.report;
warnings.extend(evidence.warnings.clone());
evidence_packets.push(prepared_evidence.summary.clone());
candidates.push(conflict_matrix_candidate_from_evidence(
root,
evidence,
graph_index,
&prepared.cached_diff,
&prepared.impact_report,
));
}
apply_conflict_matrix_worker_feedback_controls(&mut candidates);
candidates.sort_by(|left, right| {
left.risk
.cmp(&right.risk)
.then_with(|| left.risk_score.cmp(&right.risk_score))
.then_with(|| {
right
.worker_feedback
.closure_rank_score
.cmp(&left.worker_feedback.closure_rank_score)
})
.then_with(|| {
right
.semantic_dispatch_score
.cmp(&left.semantic_dispatch_score)
})
.then_with(|| left.target.cmp(&right.target))
});
for (idx, candidate) in candidates.iter_mut().enumerate() {
candidate.rank = idx + 1;
}
warnings.extend(candidates.iter().flat_map(|candidate| {
candidate
.worker_feedback
.warnings
.iter()
.map(|warning| format!("{}: {warning}", candidate.target))
}));
let conflicts = build_conflict_matrix_pairs(&candidates);
apply_conflict_matrix_ownership_blocks(&mut candidates);
apply_conflict_matrix_scheduler_fields(&mut candidates, &conflicts);
let worker_prompt_packets = conflict_matrix_worker_prompt_packets(&candidates);
let per_target_fail_closed = conflict_matrix_per_target_fail_closed(&candidates);
let cross_target_parallel_safe = conflicts
.iter()
.all(|pair| pair.risk <= ConflictMatrixRisk::Medium);
let fail_closed = !per_target_fail_closed.is_empty()
|| conflicts
.iter()
.any(|pair| pair.risk == ConflictMatrixRisk::FailClosed);
let can_parallel = !fail_closed && cross_target_parallel_safe;
let next_commands =
conflict_matrix_next_commands(root, path, scope, &targets, depth, limit, impact_limit);
let orchestration = conflict_matrix_orchestration_observability(
&freshness,
&candidates,
&conflicts,
&next_commands,
);
let inputs = ConflictMatrixInputSummary {
graph_db_evidence_targets: targets.clone(),
evidence_packets,
shared_preparation: graph_prepared.shared_preparation.clone(),
preparation_cache: prepared.preparation_cache.clone(),
preparation_timings: prepared.preparation_timings.clone(),
context_pack_command: format!(
"tsift --envelope context-pack {} --budget normal",
shell_quote(path.to_string_lossy().as_ref())
),
cached_diff_command: format!(
"tsift diff-digest --cached {} --json",
shell_quote(root.to_string_lossy().as_ref())
),
impact_command: format!(
"tsift impact {} --cached{} --limit {} --json",
shell_quote(root.to_string_lossy().as_ref()),
scope
.map(|scope| format!(" --scope {}", shell_quote(scope)))
.unwrap_or_default(),
impact_limit
),
};
let context_summary = conflict_matrix_context_summary(context_pack);
Ok(ConflictMatrixReport {
contract_version: CONFLICT_MATRIX_CONTRACT_VERSION,
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
targets,
can_parallel,
fail_closed,
cross_target_parallel_safe,
per_target_fail_closed,
inputs,
context_pack: context_summary,
cached_diff: prepared.cached_diff.clone(),
impact: prepared.impact_report.clone(),
candidates,
worker_prompt_packets,
conflicts,
orchestration,
next_commands,
warnings,
})
}
#[allow(clippy::too_many_arguments)]
fn build_conflict_matrix_report_with_prepared<S: GraphStore>(
root: &Path,
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
store: &S,
freshness: GraphDbFreshnessReport,
extra_warnings: Vec<String>,
prepared: &ConflictMatrixPreparedInputs,
) -> Result<ConflictMatrixReport> {
let graph_prepared = prepare_conflict_matrix_graph_orchestration(
root,
scope,
"sqlite",
raw_targets,
prepared,
depth,
limit,
store,
freshness.clone(),
)?;
build_conflict_matrix_report_from_prepared_graph(
root,
path,
scope,
depth,
limit,
impact_limit,
freshness,
extra_warnings,
prepared,
&graph_prepared,
)
}
#[allow(clippy::too_many_arguments)]
fn build_conflict_matrix_report_with_store<S: GraphStore>(
root: &Path,
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
store: &S,
freshness: GraphDbFreshnessReport,
extra_warnings: Vec<String>,
) -> Result<ConflictMatrixReport> {
let prepared = prepare_conflict_matrix_inputs(root, path, scope, impact_limit)?;
build_conflict_matrix_report_with_prepared(
root,
path,
scope,
raw_targets,
depth,
limit,
impact_limit,
store,
freshness,
extra_warnings,
&prepared,
)
}
fn build_conflict_matrix_report(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
) -> Result<ConflictMatrixReport> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
write_traversal_graph_store(&root, path, scope)
.with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
}
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
.with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
let mut warnings = Vec::new();
if let Some(recovery) = store.read_only_recovery() {
warnings.push(graph_db_read_recovery_diagnostic(recovery));
}
build_conflict_matrix_report_with_store(
&root,
path,
scope,
raw_targets,
depth,
limit,
impact_limit,
&store,
freshness,
warnings,
)
}
fn cmd_conflict_matrix(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
format: OutputFormat,
) -> Result<()> {
let report =
build_conflict_matrix_report(path, scope, raw_targets, depth, limit, impact_limit)?;
if format.json_output {
print_json_or_envelope(
&report,
&format,
"conflict-matrix",
"parallel-planning",
ToolEnvelopeSummary {
text: format!(
"Conflict matrix for {} target(s): can_parallel={} fail_closed={} cross_target_parallel_safe={} per_target_fail_closed={}",
report.targets.len(),
report.can_parallel,
report.fail_closed,
report.cross_target_parallel_safe,
report.per_target_fail_closed.len()
),
metrics: vec![
envelope_metric("targets", report.targets.len()),
envelope_metric("candidates", report.candidates.len()),
envelope_metric("conflicts", report.conflicts.len()),
envelope_metric("can_parallel", report.can_parallel),
envelope_metric("fail_closed", report.fail_closed),
envelope_metric(
"cross_target_parallel_safe",
report.cross_target_parallel_safe,
),
envelope_metric(
"per_target_fail_closed",
report.per_target_fail_closed.len(),
),
],
},
report.fail_closed,
report.next_commands.clone(),
)
} else {
print_conflict_matrix_human(&report, format.compact);
Ok(())
}
}
#[derive(Serialize)]
struct DispatchTraceSummary {
backlog: usize,
job_packet: usize,
worker_result: usize,
worker_context: usize,
source_handle: usize,
semantic_rows: usize,
}
#[derive(Serialize)]
struct DispatchTraceReport {
contract_version: &'static str,
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
targets: Vec<String>,
projection_freshness: GraphDbFreshnessReport,
projection_hashes: Vec<String>,
evidence_packet_ids: Vec<String>,
shared_preparation: ConflictMatrixSharedPreparationSummary,
worker_prompt_packets: Vec<ConflictMatrixWorkerPromptPacket>,
worker_feedback: Vec<ConflictMatrixWorkerFeedback>,
summary: DispatchTraceSummary,
nodes: Vec<SubstrateGraphNode>,
edges: Vec<SubstrateGraphEdge>,
conflict_matrix_decisions: Vec<String>,
replay_commands: Vec<String>,
repair_commands: Vec<String>,
truncated: bool,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
fn dispatch_trace_allowed_node_kind(kind: &str) -> bool {
matches!(
kind,
"session"
| "backlog"
| "job_packet"
| "worker_result"
| "worker_context"
| "source_handle"
| "semantic_concept"
| "semantic_entity"
| "file"
| "symbol"
| "route"
)
}
fn dispatch_trace_kind_rank(kind: &str) -> usize {
match kind {
"backlog" => 0,
"job_packet" => 1,
"worker_result" => 2,
"worker_context" => 3,
"source_handle" => 4,
"file" => 5,
"symbol" => 6,
"route" => 7,
"semantic_concept" => 8,
"semantic_entity" => 9,
"session" => 10,
_ => 99,
}
}
fn dispatch_trace_summary(nodes: &[SubstrateGraphNode]) -> DispatchTraceSummary {
DispatchTraceSummary {
backlog: nodes.iter().filter(|node| node.kind == "backlog").count(),
job_packet: nodes
.iter()
.filter(|node| node.kind == "job_packet")
.count(),
worker_result: nodes
.iter()
.filter(|node| node.kind == "worker_result")
.count(),
worker_context: nodes
.iter()
.filter(|node| node.kind == "worker_context")
.count(),
source_handle: nodes
.iter()
.filter(|node| node.kind == "source_handle")
.count(),
semantic_rows: nodes
.iter()
.filter(|node| matches!(node.kind.as_str(), "semantic_concept" | "semantic_entity"))
.count(),
}
}
fn dispatch_trace_shared_preparation_summary(
graph_nodes: &[SubstrateGraphNode],
graph_edges: &[SubstrateGraphEdge],
conflict: &ConflictMatrixReport,
) -> ConflictMatrixSharedPreparationSummary {
ConflictMatrixSharedPreparationSummary {
evidence_cache_status: conflict
.inputs
.shared_preparation
.evidence_cache_status
.clone(),
graph_nodes: graph_nodes.len(),
graph_edges: graph_edges.len(),
evidence_packets: conflict.orchestration.evidence_packet_ids.len(),
source_handles: conflict
.candidates
.iter()
.map(|candidate| candidate.source_handles.len())
.sum(),
worker_context: conflict
.candidates
.iter()
.map(|candidate| candidate.worker_context_handles.len())
.sum(),
worker_results: conflict
.candidates
.iter()
.map(|candidate| candidate.worker_feedback.total)
.sum(),
semantic_rows: conflict
.candidates
.iter()
.map(|candidate| candidate.semantic_related.len())
.sum(),
dispatch_trace_snapshot_nodes: graph_nodes.len(),
dispatch_trace_snapshot_edges: graph_edges.len(),
}
}
fn dispatch_trace_collect_ids(
targets: &[String],
candidates: &[ConflictMatrixCandidate],
graph_nodes: &[SubstrateGraphNode],
graph_edges: &[SubstrateGraphEdge],
depth: usize,
limit: usize,
) -> (BTreeSet<String>, bool) {
let target_refs = targets
.iter()
.map(|target| target.trim_start_matches('#').to_string())
.collect::<BTreeSet<_>>();
let mut ids = BTreeSet::new();
for candidate in candidates {
ids.insert(candidate.target_node_id.clone());
for source in &candidate.source_handles {
ids.insert(source.handle.clone());
}
for handle in &candidate.worker_context_handles {
ids.insert(handle.clone());
}
for semantic in &candidate.semantic_related {
ids.insert(semantic.handle.clone());
}
}
for node in graph_nodes {
if !dispatch_trace_allowed_node_kind(&node.kind) {
continue;
}
if node
.properties
.get("ref_id")
.is_some_and(|ref_id| target_refs.contains(ref_id))
{
ids.insert(node.id.clone());
}
}
let node_by_id = graph_nodes
.iter()
.map(|node| (node.id.as_str(), node))
.collect::<BTreeMap<_, _>>();
let max_nodes = if limit == 0 {
usize::MAX
} else {
limit
.saturating_mul(targets.len().max(1))
.saturating_mul(12)
.max(64)
};
let mut truncated = false;
for _ in 0..depth.max(1) {
let before = ids.len();
let current_ids = ids.clone();
for edge in graph_edges {
if ids.len() >= max_nodes {
truncated = true;
break;
}
let touches = current_ids.contains(&edge.from_id) || current_ids.contains(&edge.to_id);
if !touches {
continue;
}
for endpoint in [&edge.from_id, &edge.to_id] {
let Some(node) = node_by_id.get(endpoint.as_str()) else {
continue;
};
if dispatch_trace_allowed_node_kind(&node.kind) {
ids.insert(endpoint.clone());
}
}
}
if ids.len() == before || truncated {
break;
}
}
(ids, truncated)
}
#[allow(clippy::too_many_arguments)]
fn build_dispatch_trace_report_from_conflict_snapshot(
root: &Path,
scope: Option<&str>,
conflict: ConflictMatrixReport,
graph_nodes: Vec<SubstrateGraphNode>,
graph_edges: Vec<SubstrateGraphEdge>,
depth: usize,
limit: usize,
extra_warnings: Vec<String>,
) -> Result<DispatchTraceReport> {
let shared_preparation =
dispatch_trace_shared_preparation_summary(&graph_nodes, &graph_edges, &conflict);
let (ids, truncated) = dispatch_trace_collect_ids(
&conflict.targets,
&conflict.candidates,
&graph_nodes,
&graph_edges,
depth,
limit,
);
let mut nodes = graph_nodes
.into_iter()
.filter(|node| ids.contains(&node.id))
.collect::<Vec<_>>();
nodes.sort_by(|left, right| {
dispatch_trace_kind_rank(&left.kind)
.cmp(&dispatch_trace_kind_rank(&right.kind))
.then(left.id.cmp(&right.id))
});
let node_ids = nodes
.iter()
.map(|node| node.id.as_str())
.collect::<BTreeSet<_>>();
let mut edges = graph_edges
.into_iter()
.filter(|edge| {
node_ids.contains(edge.from_id.as_str()) && node_ids.contains(edge.to_id.as_str())
})
.collect::<Vec<_>>();
edges.sort_by(|left, right| {
left.from_id
.cmp(&right.from_id)
.then(left.kind.cmp(&right.kind))
.then(left.to_id.cmp(&right.to_id))
});
let mut warnings = conflict.warnings;
warnings.extend(extra_warnings);
Ok(DispatchTraceReport {
contract_version: DISPATCH_TRACE_CONTRACT_VERSION,
root: conflict.root,
scope: conflict.scope,
targets: conflict.targets,
projection_freshness: conflict.orchestration.projection_freshness,
projection_hashes: conflict.orchestration.projection_hashes,
evidence_packet_ids: conflict.orchestration.evidence_packet_ids,
shared_preparation,
worker_prompt_packets: conflict.worker_prompt_packets,
worker_feedback: conflict
.candidates
.iter()
.map(|candidate| candidate.worker_feedback.clone())
.collect(),
summary: dispatch_trace_summary(&nodes),
nodes,
edges,
conflict_matrix_decisions: conflict.orchestration.conflict_matrix_decisions,
replay_commands: conflict.next_commands,
repair_commands: graph_db_repair_commands(root, scope),
truncated,
warnings,
})
}
fn build_dispatch_trace_report(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
impact_limit: usize,
) -> Result<DispatchTraceReport> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
let source_watermark = traversal_source_watermark(&root, path, scope, false)?;
if graph_db_backend_eval_cached_refresh(&root, scope, source_watermark.as_deref())?.is_none() {
write_traversal_graph_store(&root, path, scope)
.with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
}
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
.with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
let extra_warnings = store
.read_only_recovery()
.map(graph_db_read_recovery_diagnostic)
.into_iter()
.collect::<Vec<_>>();
let prepared = prepare_conflict_matrix_inputs(&root, path, scope, impact_limit)?;
let graph_prepared = prepare_conflict_matrix_graph_orchestration(
&root,
scope,
"sqlite",
raw_targets,
&prepared,
depth,
limit,
&store,
freshness.clone(),
)?;
let conflict = build_conflict_matrix_report_from_prepared_graph(
&root,
path,
scope,
depth,
limit,
impact_limit,
freshness,
extra_warnings.clone(),
&prepared,
&graph_prepared,
)?;
build_dispatch_trace_report_from_conflict_snapshot(
&root,
scope,
conflict,
graph_prepared.graph.nodes,
graph_prepared.graph.edges,
depth,
limit,
extra_warnings,
)
}
fn dispatch_trace_html(report: &DispatchTraceReport) -> Result<String> {
let json = serde_json::to_string(report)?.replace("</", "<\\/");
let mut html = String::new();
html.push_str(
"<!doctype html><html><head><meta charset=\"utf-8\"><title>tsift dispatch trace</title>",
);
html.push_str(
r#"<style>
:root{color-scheme:light dark;--bg:#f7f8fb;--panel:#fff;--text:#17202a;--muted:#5c6674;--line:#d7dce3;--edge:#8b98a8;--accent:#0f766e}
@media (prefers-color-scheme:dark){:root{--bg:#111318;--panel:#1b2028;--text:#ecf1f7;--muted:#a8b3c1;--line:#323946;--edge:#667386;--accent:#2dd4bf}}
*{box-sizing:border-box}body{margin:0;background:var(--bg);color:var(--text);font-family:Inter,ui-sans-serif,system-ui,sans-serif;line-height:1.4}.page{max-width:1280px;margin:0 auto;padding:20px}.top{display:flex;align-items:flex-end;justify-content:space-between;gap:16px;margin-bottom:14px}.top h1{font-size:22px;margin:0}.meta{color:var(--muted);font-size:13px}.layout{display:grid;grid-template-columns:minmax(0,1fr) 360px;gap:14px}.panel,.side{background:var(--panel);border:1px solid var(--line);border-radius:8px;overflow:hidden}.side{padding:14px;overflow:auto;max-height:720px}.side h2{font-size:15px;margin:12px 0 8px}.side h2:first-child{margin-top:0}.list{display:grid;gap:8px}.row{border:1px solid var(--line);border-radius:6px;padding:8px}.kind{font-size:11px;text-transform:uppercase;color:var(--muted);letter-spacing:.04em}.label{font-weight:650;overflow-wrap:anywhere}.handle,code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:12px;color:var(--muted);overflow-wrap:anywhere}svg{width:100%;height:680px;display:block}.edge{stroke:var(--edge);stroke-width:1.4;opacity:.72}.node{stroke:var(--panel);stroke-width:2}.node-label{font-size:12px;paint-order:stroke;stroke:var(--panel);stroke-width:4px;stroke-linejoin:round;fill:var(--text)}@media(max-width:900px){.top{display:block}.layout{grid-template-columns:1fr}.side{max-height:none}svg{height:560px}}
</style>"#,
);
html.push_str("</head><body><div class=\"page\">");
html.push_str(&format!(
"<header class=\"top\"><div><h1>tsift dispatch trace</h1><div class=\"meta\">targets <code>{}</code> | evidence <code>{}</code> | nodes <code>{}</code> | worker_prompt_packets <code>{}</code></div></div><div class=\"meta\"><code>{}</code></div></header>",
html_escape(&report.targets.join(", ")),
report.evidence_packet_ids.len(),
report.nodes.len(),
report.worker_prompt_packets.len(),
html_escape(report.contract_version)
));
html.push_str(
r#"<main class="layout"><section class="panel"><svg id="graph-canvas" role="img" aria-label="Dispatch trace graph"></svg></section><aside class="side"><h2>Worker Prompt Packets</h2><div id="packets" class="list"></div><h2>Worker Feedback</h2><div id="feedback" class="list"></div><h2>Nodes</h2><div id="nodes" class="list"></div></aside></main>"#,
);
html.push_str("<script id=\"trace-data\" type=\"application/json\">");
html.push_str(&json);
html.push_str(
r##"</script><script>
const report = JSON.parse(document.getElementById("trace-data").textContent);
const svg = document.getElementById("graph-canvas");
const nodeList = document.getElementById("nodes");
const packets = document.getElementById("packets");
const feedback = document.getElementById("feedback");
const nodes = report.nodes.map((node, index) => ({...node, index}));
const nodeById = new Map(nodes.map(node => [node.id, node]));
const edges = report.edges.filter(edge => nodeById.has(edge.from_id) && nodeById.has(edge.to_id));
const colorByKind = new Map([["backlog","#dc2626"],["job_packet","#ea580c"],["worker_result","#15803d"],["worker_context","#475569"],["source_handle","#64748b"],["semantic_concept","#9a3412"],["semantic_entity","#b45309"],["file","#2563eb"],["symbol","#16a34a"],["route","#7c3aed"],["session","#0891b2"]]);
function color(kind){return colorByKind.get(kind)||"#6b7280";}
function text(value){return value == null ? "" : String(value);}
function escapeHtml(value){return text(value).replace(/[&<>"']/g, ch => ({"&":"&","<":"<",">":">","\"":""","'":"'"}[ch]));}
function layout(){
const rect = svg.getBoundingClientRect();
const width = rect.width || 900, height = rect.height || 680, cx = width / 2, cy = height / 2;
const kinds = [...new Set(nodes.map(node => node.kind))].sort();
const counts = new Map();
for (const node of nodes) counts.set(node.kind, (counts.get(node.kind)||0)+1);
const offsets = new Map();
for (const node of nodes) {
const group = kinds.indexOf(node.kind);
const index = offsets.get(node.kind) || 0;
offsets.set(node.kind, index + 1);
const total = counts.get(node.kind) || 1;
const ring = Math.min(width, height) * (0.18 + ((group % 4) * 0.09));
const angle = Math.PI * 2 * index / Math.max(total, 1) + group * 0.53;
node.x = cx + Math.cos(angle) * ring;
node.y = cy + Math.sin(angle) * ring;
}
}
function draw(){
svg.innerHTML = "";
for (const edge of edges) {
const from = nodeById.get(edge.from_id), to = nodeById.get(edge.to_id);
const line = document.createElementNS("http://www.w3.org/2000/svg", "line");
line.setAttribute("x1", from.x); line.setAttribute("y1", from.y);
line.setAttribute("x2", to.x); line.setAttribute("y2", to.y);
line.setAttribute("class", "edge");
line.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = edge.kind;
svg.appendChild(line);
}
for (const node of nodes) {
const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");
circle.setAttribute("cx", node.x); circle.setAttribute("cy", node.y);
circle.setAttribute("r", node.kind.startsWith("semantic_") ? 8 : 6);
circle.setAttribute("fill", color(node.kind));
circle.setAttribute("class", "node");
circle.appendChild(document.createElementNS("http://www.w3.org/2000/svg", "title")).textContent = node.kind + ": " + node.label;
svg.appendChild(circle);
const label = document.createElementNS("http://www.w3.org/2000/svg", "text");
label.setAttribute("x", node.x + 9); label.setAttribute("y", node.y + 4);
label.setAttribute("class", "node-label");
label.textContent = node.label.length > 34 ? node.label.slice(0,31) + "..." : node.label;
svg.appendChild(label);
}
}
packets.innerHTML = report.worker_prompt_packets.map(packet => `<div class="row"><div class="kind">${escapeHtml(packet.contract_version)} - ${escapeHtml(packet.risk)} - parallel_safe ${packet.parallel_safe ? "true" : "false"} - closure ${packet.worker_feedback ? packet.worker_feedback.closure_rank_score : 0}</div><div class="label">${escapeHtml(packet.title)}</div><div class="handle">${escapeHtml(packet.packet_id)}</div><div class="handle">blocks ${escapeHtml((packet.blocks||[]).join(", ") || "none")} | blocked_by ${escapeHtml((packet.blocked_by||[]).join(", ") || "none")}</div></div>`).join("") || "<div class=\"meta\">No packets.</div>";
feedback.innerHTML = report.worker_feedback.map(item => `<div class="row"><div class="kind">completed ${item.completed} - blocked ${item.blocked} - closure ${item.closure_rank_score}</div><div>files ${escapeHtml((item.touched_files||[]).join(", ") || "none")}</div><div>tests ${escapeHtml((item.expected_tests||[]).join(" && ") || "none")}</div>${item.repeated_blockage ? "<div class=\"label\">Repeated blockage</div>" : ""}${(item.stale_expected_tests||[]).length ? `<div class="label">Stale tests: ${escapeHtml(item.stale_expected_tests.join(", "))}</div>` : ""}${(item.follow_up_debt||[]).length ? `<div class="label">Follow-up debt: ${escapeHtml(item.follow_up_debt.join(", "))}</div>` : ""}</div>`).join("") || "<div class=\"meta\">No worker results.</div>";
nodeList.innerHTML = nodes.map(node => `<div class="row"><div class="kind">${escapeHtml(node.kind)}</div><div class="label">${escapeHtml(node.label)}</div><div class="handle">${escapeHtml(node.id)}</div></div>`).join("");
window.addEventListener("resize", () => { layout(); draw(); });
layout(); draw();
</script></div></body></html>"##,
);
Ok(html)
}
struct DispatchTraceOptions<'a> {
path: &'a Path,
scope: Option<&'a str>,
raw_targets: &'a [String],
depth: usize,
limit: usize,
impact_limit: usize,
trace_format: DispatchTraceFormat,
}
fn cmd_dispatch_trace(
options: DispatchTraceOptions<'_>,
output_format: OutputFormat,
) -> Result<()> {
let report = build_dispatch_trace_report(
options.path,
options.scope,
options.raw_targets,
options.depth,
options.limit,
options.impact_limit,
)?;
match options.trace_format {
DispatchTraceFormat::Json => {
if output_format.envelope {
print_json_or_envelope(
&report,
&output_format,
"dispatch-trace",
"operator-review",
ToolEnvelopeSummary {
text: format!(
"Dispatch trace for {} target(s): {} graph node(s), {} worker prompt packet(s)",
report.targets.len(),
report.nodes.len(),
report.worker_prompt_packets.len()
),
metrics: vec![
envelope_metric("targets", report.targets.len()),
envelope_metric("nodes", report.nodes.len()),
envelope_metric("edges", report.edges.len()),
envelope_metric(
"worker_prompt_packets",
report.worker_prompt_packets.len(),
),
],
},
report.truncated,
report.replay_commands.clone(),
)
} else {
println!(
"{}",
to_json_schema(
&report,
output_format.pretty,
output_format.terse,
output_format.schema
)?
);
Ok(())
}
}
DispatchTraceFormat::Html => {
println!("{}", dispatch_trace_html(&report)?);
Ok(())
}
}
}
#[derive(Clone, Debug)]
struct DependencyDagProfile {
id: String,
graph_node_id: String,
label: String,
path: Option<String>,
line: Option<i64>,
detail: Option<String>,
source_files: BTreeSet<String>,
source_symbols: BTreeSet<String>,
config_files: BTreeSet<String>,
expected_tests: BTreeSet<String>,
semantic_refs: BTreeMap<String, ConflictMatrixSemanticRef>,
worker_feedback: ConflictMatrixWorkerFeedback,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagNode {
id: String,
graph_node_id: String,
label: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
detail: Option<String>,
source_files: Vec<String>,
source_symbols: Vec<String>,
config_files: Vec<String>,
expected_tests: Vec<String>,
semantic_refs: Vec<ConflictMatrixSemanticRef>,
worker_feedback: ConflictMatrixWorkerFeedback,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagEdge {
from: String,
to: String,
kind: String,
weight: usize,
reasons: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_files: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_symbols: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_tests: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_config_files: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
shared_semantic_refs: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagTopoBatch {
batch: usize,
targets: Vec<String>,
}
#[derive(Clone, Debug, Serialize)]
struct DependencyDagCycleDiagnostics {
has_cycles: bool,
blocked_nodes: Vec<String>,
cycle_edges: Vec<DependencyDagEdge>,
}
#[derive(Serialize)]
struct DependencyDagSummary {
nodes: usize,
edges: usize,
topo_batches: usize,
has_cycles: bool,
}
#[derive(Serialize)]
struct DependencyDagReport {
contract_version: &'static str,
root: String,
#[serde(skip_serializing_if = "Option::is_none")]
scope: Option<String>,
path: String,
targets: Vec<String>,
projection_freshness: GraphDbFreshnessReport,
projection_hashes: Vec<String>,
nodes: Vec<DependencyDagNode>,
edges: Vec<DependencyDagEdge>,
topo_batches: Vec<DependencyDagTopoBatch>,
cycle_diagnostics: DependencyDagCycleDiagnostics,
summary: DependencyDagSummary,
replay_commands: Vec<String>,
repair_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
fn dependency_dag_backlog_node_for_target(
store: &impl GraphStore,
target: &str,
) -> Result<SubstrateGraphNode> {
let resolved = graph_db_resolve_evidence_target(store, target)?
.with_context(|| format!("dependency-dag target not found: {target}"))?;
if resolved.kind == "backlog" {
return Ok(resolved);
}
let Some(ref_id) = resolved.properties.get("ref_id").cloned() else {
bail!(
"dependency-dag target {} resolved to {} without a backlog ref_id",
target,
resolved.kind
);
};
store
.nodes_by_kind("backlog")?
.into_iter()
.filter(|node| node.properties.get("ref_id") == Some(&ref_id))
.min_by(|left, right| {
left.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok())
.cmp(
&right
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
)
.then(left.id.cmp(&right.id))
})
.with_context(|| format!("dependency-dag backlog node not found for #{ref_id}"))
}
fn dependency_dag_resolve_backlog_nodes(
root: &Path,
path: &Path,
store: &impl GraphStore,
raw_targets: &[String],
) -> Result<Vec<SubstrateGraphNode>> {
let mut nodes = Vec::new();
let mut seen = BTreeSet::new();
if raw_targets.is_empty() {
let hinted_path = if path.is_absolute() {
path.to_path_buf()
} else {
root.join(path)
};
let hinted_markdown = hinted_path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext.eq_ignore_ascii_case("md"));
let hinted_rel = hinted_markdown.then(|| {
relativize_pathbuf(&hinted_path, root)
.to_string_lossy()
.replace('\\', "/")
});
for node in store.nodes_by_kind("backlog")? {
if let Some(expected_path) = &hinted_rel
&& node.properties.get("path") != Some(expected_path)
{
continue;
}
if seen.insert(node.id.clone()) {
nodes.push(node);
}
}
if nodes.is_empty() && hinted_rel.is_some() {
for node in store.nodes_by_kind("backlog")? {
if seen.insert(node.id.clone()) {
nodes.push(node);
}
}
}
} else {
for target in raw_targets {
let normalized = normalize_conflict_target(target).unwrap_or_else(|| target.clone());
let node = dependency_dag_backlog_node_for_target(store, &normalized)?;
if seen.insert(node.id.clone()) {
nodes.push(node);
}
}
}
if nodes.is_empty() {
bail!("dependency-dag needs at least one resolvable backlog id");
}
nodes.sort_by(|left, right| {
left.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok())
.cmp(
&right
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
)
.then(left.id.cmp(&right.id))
});
Ok(nodes)
}
fn dependency_dag_node_id(node: &SubstrateGraphNode) -> String {
node.properties
.get("ref_id")
.cloned()
.unwrap_or_else(|| node.label.trim_start_matches('#').to_string())
}
fn dependency_dag_node_profile(
root: &Path,
store: &impl GraphStore,
node: &SubstrateGraphNode,
graph_nodes_by_id: &BTreeMap<String, SubstrateGraphNode>,
graph_edges: &[SubstrateGraphEdge],
depth: usize,
limit: usize,
) -> Result<DependencyDagProfile> {
let id = dependency_dag_node_id(node);
let mut source_files = BTreeSet::new();
let mut source_symbols = BTreeSet::new();
for edge in graph_edges
.iter()
.filter(|edge| edge.from_id == node.id && edge.kind == "mentions")
{
let Some(target) = graph_nodes_by_id.get(&edge.to_id) else {
continue;
};
match target.kind.as_str() {
"file" | "route" => {
if let Some(path) = target.properties.get("path") {
source_files.insert(path.clone());
}
}
"symbol" => {
source_symbols.insert(target.label.clone());
if let Some(path) = target.properties.get("path") {
source_files.insert(path.clone());
}
}
_ => {}
}
}
let max_rows = if limit == 0 { usize::MAX } else { limit };
for (source, _) in
graph_db_reachable_nodes_by_kind(store, &node.id, "source_handle", depth, max_rows)?
{
if let Some(handle) = conflict_matrix_source_handle(&source) {
source_files.insert(handle.file);
}
}
let worker_results = graph_nodes_by_id
.values()
.filter(|candidate| {
candidate.kind == "worker_result"
&& candidate.properties.get("ref_id").map(String::as_str) == Some(id.as_str())
})
.cloned()
.collect::<Vec<_>>();
let worker_feedback = conflict_matrix_worker_feedback(&worker_results);
let expected_tests = worker_feedback.expected_tests.iter().cloned().collect();
let config_files = source_files
.iter()
.filter(|file| is_planner_config_path(file))
.cloned()
.collect();
let mut semantic_refs = BTreeMap::new();
for kind in ["semantic_concept", "semantic_entity"] {
for (semantic, _) in
graph_db_reachable_nodes_by_kind(store, &node.id, kind, depth, max_rows)?
{
let item = conflict_matrix_semantic_ref(root, &semantic);
semantic_refs
.entry(format!("{}:{}", item.kind, item.label))
.or_insert(item);
}
}
Ok(DependencyDagProfile {
id,
graph_node_id: node.id.clone(),
label: node.label.clone(),
path: node.properties.get("path").cloned(),
line: node
.properties
.get("line")
.and_then(|value| value.parse::<i64>().ok()),
detail: node.properties.get("detail").cloned(),
source_files,
source_symbols,
config_files,
expected_tests,
semantic_refs,
worker_feedback,
})
}
fn dependency_dag_marker_refs(text: &str, markers: &[&str]) -> Vec<String> {
let lower = text.to_ascii_lowercase();
let mut refs = Vec::new();
for marker in markers {
let mut offset = 0usize;
while let Some(pos) = lower[offset..].find(marker) {
let start = offset + pos + marker.len();
let segment = text[start..]
.split(['\n', '.'])
.next()
.unwrap_or(&text[start..]);
refs.extend(extract_conflict_target_refs(segment));
offset = start;
}
}
dedupe_preserve_order(refs)
}
fn dependency_dag_push_edge(
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
edge: DependencyDagEdge,
) {
if edge.from == edge.to {
return;
}
if seen.insert((edge.from.clone(), edge.to.clone(), edge.kind.clone())) {
edges.push(edge);
}
}
fn dependency_dag_explicit_edges(
profiles: &[DependencyDagProfile],
target_ids: &BTreeSet<String>,
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
) {
for profile in profiles {
let detail = profile.detail.as_deref().unwrap_or_default();
for dep in dependency_dag_marker_refs(
detail,
&[
"depends on",
"depends-on",
"deps:",
"after",
"blocked by",
"requires",
],
) {
if target_ids.contains(&dep) {
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: dep.clone(),
to: profile.id.clone(),
kind: "explicit_depends_on".to_string(),
weight: 1000,
reasons: vec![format!("{} declares dependency on #{dep}", profile.id)],
shared_files: Vec::new(),
shared_symbols: Vec::new(),
shared_tests: Vec::new(),
shared_config_files: Vec::new(),
shared_semantic_refs: Vec::new(),
},
);
}
}
for downstream in dependency_dag_marker_refs(detail, &["before", "unblocks"]) {
if target_ids.contains(&downstream) {
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: profile.id.clone(),
to: downstream.clone(),
kind: "explicit_before".to_string(),
weight: 900,
reasons: vec![format!(
"{} declares it should run before #{downstream}",
profile.id
)],
shared_files: Vec::new(),
shared_symbols: Vec::new(),
shared_tests: Vec::new(),
shared_config_files: Vec::new(),
shared_semantic_refs: Vec::new(),
},
);
}
}
}
}
fn dependency_dag_worker_follow_up_edges(
profiles: &[DependencyDagProfile],
target_ids: &BTreeSet<String>,
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
) {
for profile in profiles {
for follow_up in &profile.worker_feedback.follow_up_ids {
if target_ids.contains(follow_up) {
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: profile.id.clone(),
to: follow_up.clone(),
kind: "worker_result_follow_up".to_string(),
weight: 700,
reasons: vec![format!(
"worker_result for #{} references follow-up #{}",
profile.id, follow_up
)],
shared_files: Vec::new(),
shared_symbols: Vec::new(),
shared_tests: Vec::new(),
shared_config_files: Vec::new(),
shared_semantic_refs: Vec::new(),
},
);
}
}
}
}
fn dependency_dag_overlap_edges(
profiles: &[DependencyDagProfile],
edges: &mut Vec<DependencyDagEdge>,
seen: &mut BTreeSet<(String, String, String)>,
) {
for left_idx in 0..profiles.len() {
for right_idx in (left_idx + 1)..profiles.len() {
let left = &profiles[left_idx];
let right = &profiles[right_idx];
let shared_files = sorted_intersection(&left.source_files, &right.source_files);
let shared_symbols = sorted_intersection(&left.source_symbols, &right.source_symbols);
let shared_tests = sorted_intersection(&left.expected_tests, &right.expected_tests);
let shared_config_files = sorted_intersection(&left.config_files, &right.config_files);
let left_semantic = left.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
let right_semantic = right.semantic_refs.keys().cloned().collect::<BTreeSet<_>>();
let shared_semantic_refs = sorted_intersection(&left_semantic, &right_semantic);
if shared_files.is_empty()
&& shared_symbols.is_empty()
&& shared_tests.is_empty()
&& shared_config_files.is_empty()
&& shared_semantic_refs.is_empty()
{
continue;
}
let kind = if shared_files.is_empty()
&& shared_symbols.is_empty()
&& shared_tests.is_empty()
&& shared_config_files.is_empty()
{
"semantic_relation"
} else {
"shared_resource"
};
let mut reasons = Vec::new();
if !shared_files.is_empty() {
reasons.push(format!("shared files: {}", shared_files.join(", ")));
}
if !shared_symbols.is_empty() {
reasons.push(format!("shared symbols: {}", shared_symbols.join(", ")));
}
if !shared_tests.is_empty() {
reasons.push(format!("shared tests: {}", shared_tests.join(" && ")));
}
if !shared_config_files.is_empty() {
reasons.push(format!(
"shared config files: {}",
shared_config_files.join(", ")
));
}
if !shared_semantic_refs.is_empty() {
reasons.push(format!(
"shared semantic refs: {}",
shared_semantic_refs.join(", ")
));
}
let weight = shared_files.len() * 100
+ shared_config_files.len() * 100
+ shared_symbols.len() * 40
+ shared_tests.len() * 10
+ shared_semantic_refs.len() * 5;
dependency_dag_push_edge(
edges,
seen,
DependencyDagEdge {
from: left.id.clone(),
to: right.id.clone(),
kind: kind.to_string(),
weight,
reasons,
shared_files,
shared_symbols,
shared_tests,
shared_config_files,
shared_semantic_refs,
},
);
}
}
}
fn dependency_dag_topo_batches(
targets: &[String],
edges: &[DependencyDagEdge],
) -> (Vec<DependencyDagTopoBatch>, DependencyDagCycleDiagnostics) {
let target_set = targets.iter().cloned().collect::<BTreeSet<_>>();
let order = targets
.iter()
.enumerate()
.map(|(idx, id)| (id.clone(), idx))
.collect::<BTreeMap<_, _>>();
let mut indegree = targets
.iter()
.map(|id| (id.clone(), 0usize))
.collect::<BTreeMap<_, _>>();
let mut outgoing = BTreeMap::<String, Vec<String>>::new();
let mut seen_pairs = BTreeSet::<(String, String)>::new();
for edge in edges {
if !target_set.contains(&edge.from) || !target_set.contains(&edge.to) {
continue;
}
if !seen_pairs.insert((edge.from.clone(), edge.to.clone())) {
continue;
}
*indegree.entry(edge.to.clone()).or_default() += 1;
outgoing
.entry(edge.from.clone())
.or_default()
.push(edge.to.clone());
}
for values in outgoing.values_mut() {
values.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
values.dedup();
}
let mut processed = BTreeSet::new();
let mut batches = Vec::new();
loop {
let mut ready = targets
.iter()
.filter(|id| !processed.contains(*id))
.filter(|id| indegree.get(*id).copied().unwrap_or(0) == 0)
.cloned()
.collect::<Vec<_>>();
ready.sort_by_key(|id| order.get(id).copied().unwrap_or(usize::MAX));
if ready.is_empty() {
break;
}
for id in &ready {
processed.insert(id.clone());
for next in outgoing.get(id).into_iter().flatten() {
if let Some(value) = indegree.get_mut(next) {
*value = value.saturating_sub(1);
}
}
}
batches.push(DependencyDagTopoBatch {
batch: batches.len() + 1,
targets: ready,
});
}
let blocked_nodes = targets
.iter()
.filter(|id| !processed.contains(*id))
.cloned()
.collect::<Vec<_>>();
let blocked_set = blocked_nodes.iter().cloned().collect::<BTreeSet<_>>();
let cycle_edges = edges
.iter()
.filter(|edge| blocked_set.contains(&edge.from) && blocked_set.contains(&edge.to))
.cloned()
.collect::<Vec<_>>();
(
batches,
DependencyDagCycleDiagnostics {
has_cycles: !blocked_nodes.is_empty(),
blocked_nodes,
cycle_edges,
},
)
}
fn dependency_dag_replay_commands(
path: &Path,
scope: Option<&str>,
targets: &[String],
depth: usize,
limit: usize,
) -> Vec<String> {
let target_args = targets
.iter()
.map(|target| shell_quote(target))
.collect::<Vec<_>>()
.join(" ");
let mut command = format!(
"tsift dependency-dag --path {}{} --depth {} --limit {} --json",
shell_quote(path.to_string_lossy().as_ref()),
scope
.map(|scope| format!(" --scope {}", shell_quote(scope)))
.unwrap_or_default(),
depth,
limit
);
if !target_args.is_empty() {
command.push(' ');
command.push_str(&target_args);
}
vec![command]
}
fn build_dependency_dag_report(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
) -> Result<DependencyDagReport> {
let root = lint::resolve_project_root_or_canonical_path(path)?;
write_traversal_graph_store(&root, path, scope)
.with_context(|| format!("refreshing graph-db projection for {}", root.display()))?;
let graph_db = graph_substrate_db_path(&root, scope);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
.with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
let mut warnings = Vec::new();
if let Some(recovery) = store.read_only_recovery() {
warnings.push(graph_db_read_recovery_diagnostic(recovery));
}
let freshness = sqlite_graph_freshness(&store, scope.unwrap_or("root"))?;
if freshness.fail_closed {
bail!(
"dependency-dag graph projection failed closed: {}; repair: {}",
freshness.diagnostics.join("; "),
graph_db_repair_commands(&root, scope).join("; ")
);
}
let target_nodes = dependency_dag_resolve_backlog_nodes(&root, path, &store, raw_targets)?;
let graph_nodes = store.all_nodes()?;
let graph_edges = store.all_edges()?;
let graph_nodes_by_id = graph_nodes
.into_iter()
.map(|node| (node.id.clone(), node))
.collect::<BTreeMap<_, _>>();
let profiles = target_nodes
.iter()
.map(|node| {
dependency_dag_node_profile(
&root,
&store,
node,
&graph_nodes_by_id,
&graph_edges,
depth,
limit,
)
})
.collect::<Result<Vec<_>>>()?;
let targets = profiles
.iter()
.map(|profile| profile.id.clone())
.collect::<Vec<_>>();
let target_ids = targets.iter().cloned().collect::<BTreeSet<_>>();
let mut edges = Vec::new();
let mut seen_edges = BTreeSet::new();
dependency_dag_explicit_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
dependency_dag_worker_follow_up_edges(&profiles, &target_ids, &mut edges, &mut seen_edges);
dependency_dag_overlap_edges(&profiles, &mut edges, &mut seen_edges);
edges.sort_by(|left, right| {
left.from
.cmp(&right.from)
.then(left.to.cmp(&right.to))
.then(left.kind.cmp(&right.kind))
});
let (topo_batches, cycle_diagnostics) = dependency_dag_topo_batches(&targets, &edges);
let nodes = profiles
.into_iter()
.map(|profile| DependencyDagNode {
id: profile.id,
graph_node_id: profile.graph_node_id,
label: profile.label,
path: profile.path,
line: profile.line,
detail: profile.detail,
source_files: sorted_set(&profile.source_files),
source_symbols: sorted_set(&profile.source_symbols),
config_files: sorted_set(&profile.config_files),
expected_tests: sorted_set(&profile.expected_tests),
semantic_refs: profile.semantic_refs.into_values().collect(),
worker_feedback: profile.worker_feedback,
})
.collect::<Vec<_>>();
let projection_hashes = freshness
.content_hash
.clone()
.into_iter()
.collect::<Vec<_>>();
let replay_commands = dependency_dag_replay_commands(path, scope, &targets, depth, limit);
let repair_commands = graph_db_repair_commands(&root, scope);
let summary = DependencyDagSummary {
nodes: nodes.len(),
edges: edges.len(),
topo_batches: topo_batches.len(),
has_cycles: cycle_diagnostics.has_cycles,
};
Ok(DependencyDagReport {
contract_version: DEPENDENCY_DAG_CONTRACT_VERSION,
root: root.to_string_lossy().to_string(),
scope: scope.map(str::to_string),
path: path.to_string_lossy().to_string(),
targets,
projection_freshness: freshness,
projection_hashes,
nodes,
edges,
topo_batches,
cycle_diagnostics,
summary,
replay_commands,
repair_commands,
warnings,
})
}
fn print_dependency_dag_human(report: &DependencyDagReport, compact: bool) {
if compact {
println!(
"dependency-dag targets:{} edges:{} batches:{} cycles:{}",
report.targets.len(),
report.edges.len(),
report.topo_batches.len(),
report.cycle_diagnostics.has_cycles
);
} else {
println!("Dependency DAG");
println!(" targets: {}", report.targets.join(", "));
println!(" edges: {}", report.edges.len());
println!(" cycles: {}", report.cycle_diagnostics.has_cycles);
}
for batch in &report.topo_batches {
println!("batch #{}: {}", batch.batch, batch.targets.join(", "));
}
for edge in &report.edges {
println!(
"edge {} -> {} kind:{} weight:{}",
edge.from, edge.to, edge.kind, edge.weight
);
for reason in &edge.reasons {
println!(" reason: {reason}");
}
}
if report.cycle_diagnostics.has_cycles {
println!(
"cycle blocked nodes: {}",
report.cycle_diagnostics.blocked_nodes.join(", ")
);
}
for command in &report.replay_commands {
println!("replay: {command}");
}
for command in &report.repair_commands {
println!("repair: {command}");
}
for warning in &report.warnings {
println!("warning: {warning}");
}
}
fn cmd_dependency_dag(
path: &Path,
scope: Option<&str>,
raw_targets: &[String],
depth: usize,
limit: usize,
format: OutputFormat,
) -> Result<()> {
let report = build_dependency_dag_report(path, scope, raw_targets, depth, limit)?;
if format.json_output {
print_json_or_envelope(
&report,
&format,
"dependency-dag",
"topological-planning",
ToolEnvelopeSummary {
text: format!(
"Dependency DAG for {} target(s): edges={} batches={} cycles={}",
report.targets.len(),
report.edges.len(),
report.topo_batches.len(),
report.cycle_diagnostics.has_cycles
),
metrics: vec![
envelope_metric("targets", report.targets.len()),
envelope_metric("edges", report.edges.len()),
envelope_metric("topo_batches", report.topo_batches.len()),
envelope_metric("has_cycles", report.cycle_diagnostics.has_cycles),
],
},
report.cycle_diagnostics.has_cycles,
report.replay_commands.clone(),
)
} else {
print_dependency_dag_human(&report, format.compact);
Ok(())
}
}
pub(crate) fn render_log_digest_from_input(
path: &Path,
input: &str,
format: OutputFormat,
) -> Result<()> {
let report = log_digest::compute(path, input)?;
if format.json_output {
println!(
"{}",
to_json_schema(&report, format.pretty, format.terse, format.schema)?
);
return Ok(());
}
if format.compact {
println!(
"log lines:{} signals:{} repeats:{} files:{} syms:{} stacks:{}",
report.non_empty_lines,
report.signal_groups,
report.repeated_line_groups,
report.file_ref_groups,
report.symbol_ref_groups,
report.stack_groups
);
for signal in &report.signals {
let location = match (&signal.path, signal.line) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => "-".to_string(),
};
println!(
"{} sev:{} count:{} sums:{} msg:{}",
location,
signal.severity,
signal.occurrences,
log_digest_summary_label(signal.summary_state),
truncate_for_compact(&signal.message, 80)
);
}
for repeated in &report.repeated_lines {
println!(
"repeat count:{} line:{}",
repeated.occurrences,
truncate_for_compact(&repeated.line, 80)
);
}
for symbol in &report.symbol_refs {
println!(
"sym:{} count:{} sums:{}",
symbol.symbol,
symbol.occurrences,
log_digest_summary_label(symbol.summary_state)
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
println!("Log digest");
println!(" lines: {}", report.total_lines);
println!(" non-empty lines: {}", report.non_empty_lines);
println!(" signal groups: {}", report.signal_groups);
println!(
" repeated lines: {}",
report.repeated_line_groups
);
println!(
" repeated line instances: {}",
report.repeated_line_occurrences
);
println!(" file refs: {}", report.file_ref_groups);
println!(" symbol refs: {}", report.symbol_ref_groups);
println!(" stack groups: {}", report.stack_groups);
if !report.signals.is_empty() {
println!();
println!("Signals:");
for signal in &report.signals {
match (&signal.path, signal.line, signal.column) {
(Some(path), Some(line), Some(column)) => println!("{path}:{line}:{column}"),
(Some(path), Some(line), None) => println!("{path}:{line}"),
(Some(path), None, _) => println!("{path}"),
(None, _, _) => println!("(no file anchor)"),
}
println!(" severity: {}", signal.severity);
println!(" occurrences: {}", signal.occurrences);
println!(" message: {}", signal.message);
println!(
" cached summaries: {}",
log_digest_summary_label(signal.summary_state)
);
for summary in &signal.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
}
if !report.repeated_lines.is_empty() {
println!();
println!("Repeated lines:");
for repeated in &report.repeated_lines {
println!(
" {}x {}",
repeated.occurrences,
truncate_for_compact(&repeated.line, 180)
);
}
}
if !report.file_refs.is_empty() {
println!();
println!("Anchored files:");
for file_ref in &report.file_refs {
match (file_ref.line, file_ref.column) {
(Some(line), Some(column)) => println!("{}:{}:{}", file_ref.path, line, column),
(Some(line), None) => println!("{}:{}", file_ref.path, line),
(None, _) => println!("{}", file_ref.path),
}
println!(" occurrences: {}", file_ref.occurrences);
println!(
" cached summaries: {}",
log_digest_summary_label(file_ref.summary_state)
);
for summary in &file_ref.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
}
if !report.symbol_refs.is_empty() {
println!();
println!("Symbol candidates:");
for symbol in &report.symbol_refs {
println!("{}", symbol.symbol);
println!(" occurrences: {}", symbol.occurrences);
println!(
" cached summaries: {}",
log_digest_summary_label(symbol.summary_state)
);
for summary in &symbol.current_summaries {
println!(
" - {}: {}",
summary.symbol,
truncate_for_compact(&summary.summary, 160)
);
}
}
}
if !report.stack_traces.is_empty() {
println!();
println!("Stack groups:");
for stack in &report.stack_traces {
println!(" occurrences: {}", stack.occurrences);
for frame in &stack.frames {
println!(" - {}", frame);
}
}
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
pub(crate) fn metric_digest_trend_label(trend: metric_digest::MetricDigestTrend) -> &'static str {
match trend {
metric_digest::MetricDigestTrend::Improved => "improved",
metric_digest::MetricDigestTrend::Regressed => "regressed",
metric_digest::MetricDigestTrend::Flat => "flat",
metric_digest::MetricDigestTrend::Unknown => "changed",
}
}
pub(crate) fn metric_digest_gate_label(
decision: metric_digest::CommunitySearchGateDecision,
) -> &'static str {
match decision {
metric_digest::CommunitySearchGateDecision::Pass => "pass",
metric_digest::CommunitySearchGateDecision::Block => "block",
}
}
fn cmd_dci_benchmark(fixture_path: &Path, format: OutputFormat) -> Result<()> {
let input = fs::read_to_string(fixture_path)
.with_context(|| format!("reading dci-benchmark fixture: {}", fixture_path.display()))?;
let report = dci_benchmark::compute(&input)?;
if format.json_output {
println!(
"{}",
to_json_schema(&report, format.pretty, format.terse, format.schema)?
);
return Ok(());
}
if format.compact {
println!(
"dci tasks:{} strategies:{} warnings:{}",
report.tasks_loaded,
report.strategies_compared,
report.warnings.len()
);
for summary in &report.strategy_summaries {
println!(
"{} rank:{} loc:{}/{} rate:{} calls:{} latency_ms:{} tokens:{}",
summary.strategy,
summary.rank,
summary.localized,
summary.task_runs,
dci_benchmark::format_number(summary.localization_rate * 100.0),
dci_benchmark::format_number(summary.avg_tool_calls),
dci_benchmark::format_number(summary.avg_latency_ms),
dci_benchmark::format_number(summary.avg_estimated_tokens)
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
return Ok(());
}
println!("DCI benchmark");
if let Some(description) = &report.description {
println!(" description: {}", description);
}
println!(" tasks loaded: {}", report.tasks_loaded);
println!(" strategies compared: {}", report.strategies_compared);
println!();
println!("Strategy summary:");
for summary in &report.strategy_summaries {
println!(
" #{} {}: localization {}/{} ({:.1}%), avg calls {}, avg latency {}ms, avg tokens {}",
summary.rank,
summary.strategy,
summary.localized,
summary.task_runs,
summary.localization_rate * 100.0,
dci_benchmark::format_number(summary.avg_tool_calls),
dci_benchmark::format_number(summary.avg_latency_ms),
dci_benchmark::format_number(summary.avg_estimated_tokens)
);
}
println!();
println!("Task winners:");
for row in &report.task_rows {
let label = row
.label
.as_ref()
.map(|value| format!(" ({value})"))
.unwrap_or_default();
println!(" {}{}", row.task_id, label);
println!(" localized: {}", row.best_localization.join(", "));
println!(
" lowest calls: {}, lowest latency: {}, lowest tokens: {}",
row.lowest_tool_calls.as_deref().unwrap_or("-"),
row.lowest_latency.as_deref().unwrap_or("-"),
row.lowest_token_budget.as_deref().unwrap_or("-")
);
}
for warning in &report.warnings {
println!("warning: {warning}");
}
Ok(())
}
#[derive(Clone, Serialize)]
struct SessionReviewBudgetSessionPreview {
handle: String,
source: String,
path: String,
matched_by: Vec<String>,
total_tokens: u64,
largest_turn_total_tokens: u64,
prompt_targets: usize,
failures: usize,
expand: String,
}
#[derive(Clone, Serialize)]
struct SessionReviewBudgetPromptPreview {
handle: String,
text: String,
occurrences: usize,
expand: String,
}
#[derive(Clone, Serialize)]
struct SessionReviewBudgetFailurePreview {
handle: String,
kind: String,
message: String,
occurrences: usize,
#[serde(skip_serializing_if = "Option::is_none")]
command: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
session_path: Option<String>,
expand: String,
}
#[derive(Clone, Serialize)]
struct SessionReviewBudgetReport {
target: String,
target_kind: String,
max_items: usize,
max_bytes: usize,
sessions_matched: usize,
prompt_tokens: u64,
cached_input_tokens: u64,
total_tokens: u64,
#[serde(skip_serializing_if = "Option::is_none")]
latest_session_total_tokens: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
latest_session_largest_turn_total_tokens: Option<u64>,
truncated: bool,
sessions: Vec<SessionReviewBudgetSessionPreview>,
prompt_targets: Vec<SessionReviewBudgetPromptPreview>,
failures: Vec<SessionReviewBudgetFailurePreview>,
guardrails: Vec<String>,
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct SessionReviewNextTokenAction {
priority: usize,
kind: String,
severity: String,
message: String,
guidance: String,
#[serde(skip_serializing_if = "Option::is_none")]
compact_command: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
restart_command: Option<String>,
digest_commands: Vec<String>,
}
#[derive(Clone, Serialize)]
struct SessionReviewNextContextBudgetReport {
contract_version: &'static str,
target: String,
max_items: usize,
max_bytes: usize,
prompt_target_total: usize,
touched_file_total: usize,
touched_symbol_total: usize,
unresolved_failure_total: usize,
truncated: bool,
prompt_targets: Vec<String>,
touched_files: Vec<String>,
touched_symbols: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
touched_symbol_refs: Vec<CompactSymbolRefPreview>,
unresolved_failures: Vec<SessionReviewBudgetFailurePreview>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
next_token_actions: Vec<SessionReviewNextTokenAction>,
next_digest_commands: Vec<String>,
}
#[derive(Clone, Serialize)]
struct ContextPackGraphOrchestration {
contract_version: &'static str,
graph_db_command: String,
projection_freshness: GraphDbFreshnessReport,
projection_hashes: Vec<String>,
evidence_packet_ids: Vec<String>,
conflict_matrix_decisions: Vec<String>,
worker_ownership_blocks: Vec<String>,
follow_up_commands: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct ContextPackReport {
root: String,
target: String,
target_kind: String,
max_items: usize,
max_bytes: usize,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
status_reminders: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ontology_refs: Vec<CompactOntologyRefPreview>,
next_context: SessionReviewNextContextBudgetReport,
diff_digest: ContextPackDiffPreview,
test_digest: ContextPackOptionalSection<ContextPackTestPreview>,
log_digest: ContextPackOptionalSection<ContextPackLogPreview>,
exploration: ExplorationPacket,
graph_orchestration: ContextPackGraphOrchestration,
resume_commands: Vec<String>,
}
#[derive(Clone, Serialize)]
struct ContextPackOptionalSection<T> {
status: String,
command: String,
#[serde(skip_serializing_if = "Option::is_none")]
source: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
report: Option<T>,
}
#[derive(Clone, Serialize)]
struct ContextPackDiffPreview {
mode: String,
files_changed: usize,
files_with_current_summaries: usize,
symbols_touched: usize,
call_edges_added: usize,
call_edges_removed: usize,
truncated: bool,
files: Vec<ContextPackDiffFilePreview>,
}
#[derive(Clone, Serialize)]
struct ContextPackDiffFilePreview {
path: String,
status: String,
touched_symbols: Vec<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
touched_symbol_refs: Vec<CompactSymbolRefPreview>,
summary_state: String,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
summary_refs: Vec<ContextPackSummaryRefPreview>,
added_call_edges: usize,
removed_call_edges: usize,
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct ContextPackSummaryRefPreview {
handle: String,
symbol: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ontology_refs: Vec<CompactOntologyRefPreview>,
summary: String,
expand: String,
}
#[derive(Clone, Serialize)]
struct ContextPackTestPreview {
runner: String,
failures: usize,
grouped_failures: usize,
counts: ContextPackTestCounts,
truncated: bool,
failure_groups: Vec<ContextPackTestFailurePreview>,
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct ContextPackTestCounts {
#[serde(skip_serializing_if = "Option::is_none")]
passed: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
failed: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
skipped: Option<usize>,
}
#[derive(Clone, Serialize)]
struct ContextPackTestFailurePreview {
tests: Vec<String>,
message: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<usize>,
occurrences: usize,
summary_state: String,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
summary_refs: Vec<ContextPackSummaryRefPreview>,
}
#[derive(Clone, Serialize)]
struct ContextPackLogPreview {
total_lines: usize,
non_empty_lines: usize,
signal_groups: usize,
repeated_line_groups: usize,
file_ref_groups: usize,
symbol_ref_groups: usize,
stack_groups: usize,
truncated: bool,
signals: Vec<ContextPackLogSignalPreview>,
repeated_lines: Vec<ContextPackLogRepeatedLinePreview>,
file_refs: Vec<ContextPackLogFileRefPreview>,
symbol_refs: Vec<ContextPackLogSymbolRefPreview>,
warnings: Vec<String>,
}
#[derive(Clone, Serialize)]
struct ContextPackLogSignalPreview {
severity: String,
message: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<usize>,
occurrences: usize,
summary_state: String,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
summary_refs: Vec<ContextPackSummaryRefPreview>,
}
#[derive(Clone, Serialize)]
struct ContextPackLogRepeatedLinePreview {
line: String,
occurrences: usize,
}
#[derive(Clone, Serialize)]
struct ContextPackLogFileRefPreview {
path: String,
#[serde(skip_serializing_if = "Option::is_none")]
line: Option<usize>,
occurrences: usize,
summary_state: String,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
summary_refs: Vec<ContextPackSummaryRefPreview>,
}
#[derive(Clone, Serialize)]
struct ContextPackLogSymbolRefPreview {
handle: String,
symbol: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
ontology_refs: Vec<CompactOntologyRefPreview>,
occurrences: usize,
summary_state: String,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
summary_refs: Vec<ContextPackSummaryRefPreview>,
}
fn session_review_source_flag(source: &str) -> &'static str {
match source {
"claude_jsonl" => "claude-jsonl",
"codex_jsonl" => "codex-jsonl",
"agent_doc_log" => "agent-doc-log",
_ => "markdown",
}
}
pub(crate) fn build_session_review_budget_report(
report: &session_review::SessionReviewReport,
budget: ResponseBudget,
) -> SessionReviewBudgetReport {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let review_expand = format!(
"tsift session-review {} --json",
shell_quote(&report.target)
);
let sessions = report
.sessions
.iter()
.take(max_items)
.map(|entry| SessionReviewBudgetSessionPreview {
handle: stable_handle(
"srev",
&format!("{}:{}:{}", entry.source, entry.path, entry.total_tokens),
),
source: entry.source.clone(),
path: truncate_for_budget(&entry.path, max_bytes),
matched_by: entry
.matched_by
.iter()
.take(max_items)
.map(|value| truncate_for_budget(value, max_bytes))
.collect(),
total_tokens: entry.total_tokens,
largest_turn_total_tokens: entry.largest_turn_total_tokens,
prompt_targets: entry.prompt_target_count,
failures: entry.failure_groups,
expand: format!(
"tsift session-digest --path {} --input {} --source {}",
shell_quote(&report.root),
shell_quote(&entry.path),
session_review_source_flag(&entry.source)
),
})
.collect();
let prompt_targets = report
.prompt_targets
.iter()
.take(max_items)
.map(|entry| SessionReviewBudgetPromptPreview {
handle: stable_handle("spt", &entry.text),
text: truncate_for_budget(&entry.text, max_bytes),
occurrences: entry.occurrences,
expand: review_expand.clone(),
})
.collect();
let failures = report
.failures
.iter()
.take(max_items)
.map(|entry| SessionReviewBudgetFailurePreview {
handle: stable_handle("sfl", &format!("{}:{}", entry.kind, entry.message)),
kind: entry.kind.clone(),
message: truncate_for_budget(&entry.message, max_bytes),
occurrences: entry.occurrences,
command: entry
.command
.as_ref()
.map(|command| truncate_for_budget(command, max_bytes)),
session_path: entry
.session_path
.as_ref()
.map(|path| truncate_for_budget(path, max_bytes)),
expand: review_expand.clone(),
})
.collect();
let guardrails = report
.guardrails
.iter()
.take(max_items)
.map(|entry| truncate_for_budget(&entry.message, max_bytes))
.collect();
let warnings = report
.warnings
.iter()
.take(max_items)
.map(|entry| truncate_for_budget(entry, max_bytes))
.collect();
SessionReviewBudgetReport {
target: report.target.clone(),
target_kind: report.target_kind.clone(),
max_items,
max_bytes,
sessions_matched: report.sessions_matched,
prompt_tokens: report.prompt_tokens,
cached_input_tokens: report.cached_input_tokens,
total_tokens: report.total_tokens,
latest_session_total_tokens: report
.latest_session_cost
.as_ref()
.map(|cost| cost.total_tokens),
latest_session_largest_turn_total_tokens: report
.latest_session_cost
.as_ref()
.map(|cost| cost.largest_turn_total_tokens),
truncated: report.sessions.len() > max_items
|| report.prompt_targets.len() > max_items
|| report.failures.len() > max_items
|| report.guardrails.len() > max_items
|| report.warnings.len() > max_items,
sessions,
prompt_targets,
failures,
guardrails,
warnings,
}
}
pub(crate) fn build_session_review_next_context_budget_report(
report: &session_review::SessionReviewReport,
budget: ResponseBudget,
ontology: Option<&TagOntologyPreviewContext>,
) -> SessionReviewNextContextBudgetReport {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let follow_up_items = budget.follow_up_items();
let next_token_actions = build_next_token_actions(report, max_items, max_bytes);
let actionable_guardrail_failures = next_token_actions
.iter()
.map(|action| format!("guardrail:{}", action.kind))
.collect::<BTreeSet<_>>();
let unresolved_failures = report
.next_context
.unresolved_failures
.iter()
.filter(|entry| !actionable_guardrail_failures.contains(&entry.kind))
.collect::<Vec<_>>();
let unresolved_failure_total = unresolved_failures.len();
SessionReviewNextContextBudgetReport {
contract_version: SESSION_REVIEW_FOLLOW_UP_CONTRACT_VERSION,
target: report.next_context.target.clone(),
max_items,
max_bytes,
prompt_target_total: report.next_context.active_prompt_targets.len(),
touched_file_total: report.next_context.touched_files.len(),
touched_symbol_total: report.next_context.touched_symbols.len(),
unresolved_failure_total,
truncated: report.next_context.active_prompt_targets.len() > max_items
|| report.next_context.touched_files.len() > max_items
|| report.next_context.touched_symbols.len() > max_items
|| unresolved_failure_total > max_items
|| report.next_context.next_digest_commands.len() > follow_up_items,
prompt_targets: report
.next_context
.active_prompt_targets
.iter()
.take(max_items)
.map(|entry| truncate_for_budget(entry, max_bytes))
.collect(),
touched_files: report
.next_context
.touched_files
.iter()
.take(max_items)
.map(|entry| truncate_for_budget(entry, max_bytes))
.collect(),
touched_symbols: report
.next_context
.touched_symbols
.iter()
.take(max_items)
.map(|entry| truncate_for_budget(entry, max_bytes))
.collect(),
touched_symbol_refs: report
.next_context
.touched_symbols
.iter()
.take(max_items)
.map(|entry| {
build_compact_symbol_ref_with_ontology(
"ncsym",
&format!("{}:{}", report.next_context.target, entry),
entry,
None,
max_bytes,
ontology,
)
})
.collect(),
unresolved_failures: unresolved_failures
.iter()
.take(max_items)
.map(|entry| SessionReviewBudgetFailurePreview {
handle: stable_handle("snf", &format!("{}:{}", entry.kind, entry.message)),
kind: entry.kind.clone(),
message: truncate_for_budget(&entry.message, max_bytes),
occurrences: entry.occurrences,
command: entry
.command
.as_ref()
.map(|command| truncate_for_budget(command, max_bytes)),
session_path: entry
.session_path
.as_ref()
.map(|path| truncate_for_budget(path, max_bytes)),
expand: format!(
"tsift session-review {} --next-context --json",
shell_quote(&report.target)
),
})
.collect(),
next_token_actions,
next_digest_commands: report
.next_context
.next_digest_commands
.iter()
.take(follow_up_items)
.cloned()
.collect(),
}
}
fn build_next_token_actions(
report: &session_review::SessionReviewReport,
max_items: usize,
max_bytes: usize,
) -> Vec<SessionReviewNextTokenAction> {
let target = shell_quote(&report.target);
let doc_command_target =
(report.target_kind == "file" && report.target.ends_with(".md")).then_some(target.clone());
let mut actions = report
.guardrails
.iter()
.filter_map(|guardrail| {
let priority = token_action_priority(&guardrail.kind)?;
let compact_command = doc_command_target
.as_ref()
.map(|target| format!("agent-doc compact {target} --commit"));
let restart_command = doc_command_target
.as_ref()
.map(|target| format!("agent-doc start {target}"));
Some(SessionReviewNextTokenAction {
priority,
kind: guardrail.kind.clone(),
severity: guardrail.severity.clone(),
message: truncate_for_budget(&guardrail.message, max_bytes),
guidance: truncate_for_budget(&guardrail.guidance, max_bytes),
compact_command,
restart_command,
digest_commands: vec![
format!(
"tsift --envelope session-review {target} --next-context --budget normal"
),
format!("tsift --envelope context-pack {target} --budget normal"),
],
})
})
.collect::<Vec<_>>();
actions.sort_by(|left, right| {
left.priority
.cmp(&right.priority)
.then(left.kind.cmp(&right.kind))
});
actions.dedup_by(|left, right| left.kind == right.kind);
actions.truncate(max_items);
actions
}
fn token_action_priority(kind: &str) -> Option<usize> {
match kind {
"prompt_budget" => Some(1),
"cache_resend" => Some(2),
"restart_loop" => Some(3),
"noop_closeout" => Some(4),
_ => None,
}
}
pub(crate) fn print_session_review_budget_human(report: &SessionReviewBudgetReport) {
let latest_total = report
.latest_session_total_tokens
.map(format_compact_count)
.unwrap_or_else(|| "-".to_string());
let latest_largest_turn = report
.latest_session_largest_turn_total_tokens
.map(format_compact_count)
.unwrap_or_else(|| "-".to_string());
println!(
"session-review-budget target:{} kind:{} sessions:{}/{} aggregate_prompt:{} aggregate_cached:{} aggregate_total:{} latest_total:{} latest_largest_turn:{}",
shell_quote(&report.target),
report.target_kind,
report.sessions.len(),
report.sessions_matched,
format_compact_count(report.prompt_tokens),
format_compact_count(report.cached_input_tokens),
format_compact_count(report.total_tokens),
latest_total,
latest_largest_turn
);
for session in &report.sessions {
println!(
"session {} {} total:{} largest_turn:{} prompts:{} fails:{} expand:{}",
session.handle,
session.path,
format_compact_count(session.total_tokens),
format_compact_count(session.largest_turn_total_tokens),
session.prompt_targets,
session.failures,
session.expand
);
}
for prompt in &report.prompt_targets {
println!(
"prompt {} count:{} {} expand:{}",
prompt.handle, prompt.occurrences, prompt.text, prompt.expand
);
}
for failure in &report.failures {
println!(
"fail {} {} count:{} {}{}{} expand:{}",
failure.handle,
failure.kind,
failure.occurrences,
failure.message,
failure
.command
.as_ref()
.map(|command| format!(" command:{command}"))
.unwrap_or_default(),
failure
.session_path
.as_ref()
.map(|path| format!(" session:{path}"))
.unwrap_or_default(),
failure.expand
);
}
for guardrail in &report.guardrails {
println!("guardrail {guardrail}");
}
for warning in &report.warnings {
println!("warning {warning}");
}
if report.truncated {
println!(
"budget truncated items:{} bytes:{}",
report.max_items, report.max_bytes
);
}
}
pub(crate) fn print_session_review_next_context_budget_human(
report: &SessionReviewNextContextBudgetReport,
) {
println!(
"next-context-budget target:{} prompts:{}/{} files:{}/{} symbols:{}/{} failures:{}/{}",
shell_quote(&report.target),
report.prompt_targets.len(),
report.prompt_target_total,
report.touched_files.len(),
report.touched_file_total,
report.touched_symbols.len(),
report.touched_symbol_total,
report.unresolved_failures.len(),
report.unresolved_failure_total
);
for prompt in &report.prompt_targets {
println!("prompt {prompt}");
}
for file in &report.touched_files {
println!("file {file}");
}
for symbol in &report.touched_symbols {
if let Some(symbol_ref) = report
.touched_symbol_refs
.iter()
.find(|entry| entry.name == *symbol)
{
println!(
"symbol {}",
format_symbol_preview_line(
&symbol_ref.handle,
&symbol_ref.name,
symbol_ref.tag_alias.as_deref()
)
);
} else {
println!("symbol {symbol}");
}
}
for failure in &report.unresolved_failures {
println!(
"fail {} {} count:{} {}{}{} expand:{}",
failure.handle,
failure.kind,
failure.occurrences,
failure.message,
failure
.command
.as_ref()
.map(|command| format!(" command:{command}"))
.unwrap_or_default(),
failure
.session_path
.as_ref()
.map(|path| format!(" session:{path}"))
.unwrap_or_default(),
failure.expand
);
}
for action in &report.next_token_actions {
println!(
"token-action {} {} severity:{} {} guidance:{}",
action.priority, action.kind, action.severity, action.message, action.guidance
);
if let Some(command) = &action.compact_command {
println!("token-action-command {} compact {}", action.kind, command);
}
if let Some(command) = &action.restart_command {
println!("token-action-command {} restart {}", action.kind, command);
}
for command in &action.digest_commands {
println!("token-action-command {} digest {}", action.kind, command);
}
}
for command in &report.next_digest_commands {
println!("next {command}");
}
if report.truncated {
println!(
"budget truncated items:{} bytes:{}",
report.max_items, report.max_bytes
);
}
}
fn effective_context_budget(budget: ResponseBudget) -> ResponseBudget {
ResponseBudget::new(Some(budget.preview_items()), Some(budget.preview_bytes()))
}
fn build_context_summary_refs<'a>(
prefix: &str,
key_scope: &str,
file_path: Option<&str>,
snippets: impl Iterator<Item = (&'a str, &'a str)>,
budget: ResponseBudget,
ontology: Option<&TagOntologyPreviewContext>,
) -> Vec<ContextPackSummaryRefPreview> {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
snippets
.take(max_items)
.map(|(symbol, summary)| {
let tag_alias = tag_alias_from_name(symbol);
let ontology_refs = tag_alias
.as_deref()
.map(|alias| ontology_refs_for_alias(ontology, alias))
.unwrap_or_default();
let expand = match file_path {
Some(path) => format!("tsift summarize --file {}", shell_quote(path)),
None => format!("tsift summarize {}", shell_quote(symbol)),
};
ContextPackSummaryRefPreview {
handle: stable_handle(prefix, &format!("{key_scope}:{symbol}:{summary}")),
symbol: truncate_for_budget(symbol, max_bytes),
tag_alias: tag_alias.map(|alias| truncate_for_budget(&alias, max_bytes)),
ontology_refs,
summary: truncate_for_budget(summary, max_bytes),
expand,
}
})
.collect()
}
fn build_context_pack_diff_preview(
report: &diff_digest::DiffDigestReport,
budget: ResponseBudget,
ontology: Option<&TagOntologyPreviewContext>,
) -> ContextPackDiffPreview {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
ContextPackDiffPreview {
mode: diff_digest_mode_label(report.mode).to_string(),
files_changed: report.files_changed,
files_with_current_summaries: report.files_with_current_summaries,
symbols_touched: report.symbols_touched,
call_edges_added: report.call_edges_added,
call_edges_removed: report.call_edges_removed,
truncated: report.files.len() > max_items,
files: report
.files
.iter()
.take(max_items)
.map(|file| ContextPackDiffFilePreview {
path: truncate_for_budget(&file.path, max_bytes),
status: diff_digest_status_label(file.status).to_string(),
touched_symbols: file
.touched_symbols
.iter()
.take(max_items)
.map(|symbol| truncate_for_budget(symbol, max_bytes))
.collect(),
touched_symbol_refs: file
.touched_symbols
.iter()
.take(max_items)
.map(|symbol| {
build_compact_symbol_ref_with_ontology(
"cdsym",
&format!("{}:{}", file.path, symbol),
symbol,
None,
max_bytes,
ontology,
)
})
.collect(),
summary_state: diff_digest_summary_label(file.summary_state).to_string(),
summary_refs: build_context_summary_refs(
"cdsum",
&file.path,
Some(&file.path),
file.current_summaries
.iter()
.map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
budget,
ontology,
),
added_call_edges: file.added_call_edges.len(),
removed_call_edges: file.removed_call_edges.len(),
warnings: file
.warnings
.iter()
.take(max_items)
.map(|warning| truncate_for_budget(warning, max_bytes))
.collect(),
})
.collect(),
}
}
fn enrich_next_context_with_diff_symbols(
next_context: &mut SessionReviewNextContextBudgetReport,
diff_digest: &ContextPackDiffPreview,
ontology: Option<&TagOntologyPreviewContext>,
) {
let mut symbols = next_context.touched_symbols.clone();
for file in &diff_digest.files {
for symbol in &file.touched_symbol_refs {
if !symbols.iter().any(|existing| existing == &symbol.name) {
symbols.push(symbol.name.clone());
}
}
}
if symbols.is_empty() {
return;
}
let max_items = next_context.max_items;
let max_bytes = next_context.max_bytes;
next_context.touched_symbol_total = next_context.touched_symbol_total.max(symbols.len());
next_context.truncated |= symbols.len() > max_items;
next_context.touched_symbols = symbols
.iter()
.take(max_items)
.map(|entry| truncate_for_budget(entry, max_bytes))
.collect();
next_context.touched_symbol_refs = symbols
.iter()
.take(max_items)
.map(|entry| {
build_compact_symbol_ref_with_ontology(
"ncsym",
&format!("{}:{}", next_context.target, entry),
entry,
None,
max_bytes,
ontology,
)
})
.collect();
}
fn context_exploration_source_window(
root: &Path,
file: &str,
reason: String,
budget: &ExplorationBudget,
) -> ExplorationSourceWindow {
let start = 1;
let end = budget.lines_per_window;
ExplorationSourceWindow {
handle: stable_handle("xwin", &format!("context:{file}:{start}:{end}:{reason}")),
file: file.to_string(),
start,
end,
reason,
expand: source_read_command(root, file, start, budget.lines_per_window),
}
}
fn build_context_pack_exploration_packet(
root: &Path,
next_context: &SessionReviewNextContextBudgetReport,
diff_digest: &ContextPackDiffPreview,
) -> ExplorationPacket {
let node_count = diff_digest
.files_changed
.saturating_add(next_context.touched_file_total)
.saturating_add(next_context.touched_symbol_total);
let edge_count = diff_digest
.call_edges_added
.saturating_add(diff_digest.call_edges_removed)
.saturating_add(
diff_digest
.files
.iter()
.map(|file| file.touched_symbol_refs.len())
.sum::<usize>(),
);
let budget = exploration_budget_for_counts(node_count, edge_count);
let mut relationship_map = Vec::new();
for file in &diff_digest.files {
for symbol in &file.touched_symbol_refs {
if relationship_map.len() >= budget.relationship_limit {
break;
}
relationship_map.push(ExplorationRelation {
from: format!("file:{}", file.path),
relation: "touches_symbol".to_string(),
to: format!("symbol:{}", symbol.name),
label: Some(format!("{} diff", file.status)),
});
}
}
for symbol in &next_context.touched_symbol_refs {
if relationship_map.len() >= budget.relationship_limit {
break;
}
relationship_map.push(ExplorationRelation {
from: format!("context:{}", next_context.target),
relation: "mentions_symbol".to_string(),
to: format!("symbol:{}", symbol.name),
label: Some("session next-context symbol".to_string()),
});
}
let mut source_windows = Vec::new();
let mut seen_files = BTreeSet::new();
for file in &diff_digest.files {
if source_windows.len() >= budget.max_source_windows {
break;
}
if seen_files.insert(file.path.clone()) {
source_windows.push(context_exploration_source_window(
root,
&file.path,
format!("changed file ({})", file.status),
&budget,
));
}
}
for file in &next_context.touched_files {
if source_windows.len() >= budget.max_source_windows {
break;
}
if seen_files.insert(file.clone()) {
source_windows.push(context_exploration_source_window(
root,
file,
"session touched file".to_string(),
&budget,
));
}
}
let worker_seeds = if next_context.prompt_targets.is_empty() {
next_context.next_digest_commands.clone()
} else {
next_context.prompt_targets.clone()
};
let mut worker_context = Vec::new();
for (idx, prompt) in worker_seeds
.iter()
.take(budget.relationship_limit)
.enumerate()
{
let summary = truncate_for_budget(prompt, next_context.max_bytes);
worker_context.push(ExplorationWorkerContext {
handle: stable_handle(
"xwrk",
&format!("{}:{}:{}", next_context.target, idx, prompt),
),
target: next_context.target.clone(),
summary,
expand: format!(
"tsift --envelope context-pack {} --budget normal",
shell_quote(&next_context.target)
),
});
}
ExplorationPacket {
budget,
relationship_map,
source_windows,
worker_context,
no_reread_guidance:
"Use worker_context for bounded handoff scope, then source_windows expand commands before broad file reads; relationship_map explains why each window is in the handoff."
.to_string(),
}
}
fn exploration_ref_id(label: &str) -> String {
stable_handle("xref", label)
}
fn context_pack_exploration_projection(packet: &ExplorationPacket) -> Result<GraphProjection> {
let provenance = GraphProvenance::new("tsift.context-pack", "exploration");
let mut nodes = BTreeMap::<String, SubstrateGraphNode>::new();
let mut edges = Vec::new();
for relation in &packet.relationship_map {
for label in [&relation.from, &relation.to] {
let id = exploration_ref_id(label);
nodes.entry(id.clone()).or_insert_with(|| {
SubstrateGraphNode::new(id, "exploration_ref", label.clone())
.with_property("label", label.clone())
.with_provenance(provenance.clone())
});
}
let mut edge = SubstrateGraphEdge::new(
exploration_ref_id(&relation.from),
exploration_ref_id(&relation.to),
relation.relation.clone(),
)
.with_provenance(provenance.clone());
if let Some(label) = &relation.label {
edge = edge.with_property("label", label.clone());
}
edges.push(edge_with_content_freshness(edge)?);
}
for window in &packet.source_windows {
let label = format!("{}:{}-{}", window.file, window.start, window.end);
let node = SubstrateGraphNode::new(window.handle.clone(), "source_handle", label)
.with_property("handle", window.handle.clone())
.with_property("file", window.file.clone())
.with_property("start", window.start.to_string())
.with_property("end", window.end.to_string())
.with_property("reason", window.reason.clone())
.with_property("expand", window.expand.clone())
.with_provenance(provenance.clone());
nodes.insert(window.handle.clone(), node_with_content_freshness(node)?);
let file_ref = format!("file:{}", window.file);
let file_ref_id = exploration_ref_id(&file_ref);
nodes.entry(file_ref_id.clone()).or_insert_with(|| {
SubstrateGraphNode::new(file_ref_id.clone(), "exploration_ref", file_ref.clone())
.with_property("label", file_ref.clone())
.with_provenance(provenance.clone())
});
let edge = SubstrateGraphEdge::new(window.handle.clone(), file_ref_id, "expands_source")
.with_property("label", window.reason.clone())
.with_provenance(provenance.clone());
edges.push(edge_with_content_freshness(edge)?);
}
for worker in &packet.worker_context {
let node = SubstrateGraphNode::new(
worker.handle.clone(),
"worker_context",
worker.summary.clone(),
)
.with_property("handle", worker.handle.clone())
.with_property("target", worker.target.clone())
.with_property("summary", worker.summary.clone())
.with_property("expand", worker.expand.clone())
.with_provenance(provenance.clone());
nodes.insert(worker.handle.clone(), node_with_content_freshness(node)?);
let target_ref = format!("context:{}", worker.target);
let target_ref_id = exploration_ref_id(&target_ref);
nodes.entry(target_ref_id.clone()).or_insert_with(|| {
SubstrateGraphNode::new(target_ref_id.clone(), "exploration_ref", target_ref.clone())
.with_property("label", target_ref.clone())
.with_provenance(provenance.clone())
});
edges.push(edge_with_content_freshness(
SubstrateGraphEdge::new(worker.handle.clone(), target_ref_id, "scopes_context")
.with_property("label", "bounded worker context".to_string())
.with_provenance(provenance.clone()),
)?);
for window in &packet.source_windows {
edges.push(edge_with_content_freshness(
SubstrateGraphEdge::new(
worker.handle.clone(),
window.handle.clone(),
"scopes_source",
)
.with_property("label", window.reason.clone())
.with_provenance(provenance.clone()),
)?);
}
}
let mut nodes = nodes.into_values().collect::<Vec<_>>();
for node in &mut nodes {
if node.freshness.is_none() {
let fresh = node_with_content_freshness(node.clone())?;
*node = fresh;
}
}
Ok(GraphProjection { nodes, edges })
}
fn source_window_from_graph_node(node: SubstrateGraphNode) -> Result<ExplorationSourceWindow> {
let file = node
.properties
.get("file")
.cloned()
.with_context(|| format!("source handle {} missing file property", node.id))?;
let start = node
.properties
.get("start")
.with_context(|| format!("source handle {} missing start property", node.id))?
.parse::<usize>()
.with_context(|| format!("source handle {} has invalid start", node.id))?;
let end = node
.properties
.get("end")
.with_context(|| format!("source handle {} missing end property", node.id))?
.parse::<usize>()
.with_context(|| format!("source handle {} has invalid end", node.id))?;
Ok(ExplorationSourceWindow {
handle: node
.properties
.get("handle")
.cloned()
.unwrap_or_else(|| node.id.clone()),
file,
start,
end,
reason: node
.properties
.get("reason")
.cloned()
.unwrap_or_else(|| "source context".to_string()),
expand: node.properties.get("expand").cloned().unwrap_or_default(),
})
}
fn materialize_context_pack_exploration_packet(
root: &Path,
packet: ExplorationPacket,
) -> Result<ExplorationPacket> {
let projection = context_pack_exploration_projection(&packet)?;
let graph_db = graph_substrate_db_path(root, None);
let mut store = SqliteGraphStore::open(&graph_db)?;
store.upsert_projection(&projection)?;
let mut source_windows = Vec::new();
for window in &packet.source_windows {
let node = store
.node(&window.handle)?
.with_context(|| format!("source handle {} was not materialized", window.handle))?;
source_windows.push(source_window_from_graph_node(node)?);
}
let mut relationship_map = Vec::new();
for relation in &packet.relationship_map {
let from_id = exploration_ref_id(&relation.from);
let to_id = exploration_ref_id(&relation.to);
let from = store
.node(&from_id)?
.with_context(|| format!("exploration ref {} was not materialized", relation.from))?;
let to = store
.node(&to_id)?
.with_context(|| format!("exploration ref {} was not materialized", relation.to))?;
let edge = store
.outgoing_edges(&from_id, Some(&relation.relation))?
.into_iter()
.find(|edge| edge.to_id == to_id)
.with_context(|| {
format!(
"exploration relation {} -> {} ({}) was not materialized",
relation.from, relation.to, relation.relation
)
})?;
relationship_map.push(ExplorationRelation {
from: from.label,
relation: edge.kind,
to: to.label,
label: edge.properties.get("label").cloned(),
});
}
Ok(ExplorationPacket {
budget: packet.budget,
relationship_map,
source_windows,
worker_context: packet.worker_context,
no_reread_guidance: packet.no_reread_guidance,
})
}
fn build_context_pack_test_preview(
report: &test_digest::TestDigestReport,
budget: ResponseBudget,
ontology: Option<&TagOntologyPreviewContext>,
) -> ContextPackTestPreview {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
ContextPackTestPreview {
runner: report.runner.clone(),
failures: report.failures,
grouped_failures: report.grouped_failures,
counts: ContextPackTestCounts {
passed: report.counts.passed,
failed: report.counts.failed,
skipped: report.counts.skipped,
},
truncated: report.failure_groups.len() > max_items || report.warnings.len() > max_items,
failure_groups: report
.failure_groups
.iter()
.take(max_items)
.map(|failure| ContextPackTestFailurePreview {
tests: failure
.tests
.iter()
.take(max_items)
.map(|test| truncate_for_budget(test, max_bytes))
.collect(),
message: truncate_for_budget(&failure.message, max_bytes),
path: failure
.path
.as_ref()
.map(|path| truncate_for_budget(path, max_bytes)),
line: failure.line,
occurrences: failure.occurrences,
summary_state: test_digest_summary_label(failure.summary_state).to_string(),
summary_refs: build_context_summary_refs(
"ctsum",
failure.path.as_deref().unwrap_or("test-failure"),
failure.path.as_deref(),
failure
.current_summaries
.iter()
.map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
budget,
ontology,
),
})
.collect(),
warnings: report
.warnings
.iter()
.take(max_items)
.map(|warning| truncate_for_budget(warning, max_bytes))
.collect(),
}
}
fn build_context_pack_log_preview(
report: &log_digest::LogDigestReport,
budget: ResponseBudget,
ontology: Option<&TagOntologyPreviewContext>,
) -> ContextPackLogPreview {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
ContextPackLogPreview {
total_lines: report.total_lines,
non_empty_lines: report.non_empty_lines,
signal_groups: report.signal_groups,
repeated_line_groups: report.repeated_line_groups,
file_ref_groups: report.file_ref_groups,
symbol_ref_groups: report.symbol_ref_groups,
stack_groups: report.stack_groups,
truncated: report.signals.len() > max_items
|| report.repeated_lines.len() > max_items
|| report.file_refs.len() > max_items
|| report.symbol_refs.len() > max_items
|| report.warnings.len() > max_items,
signals: report
.signals
.iter()
.take(max_items)
.map(|signal| ContextPackLogSignalPreview {
severity: signal.severity.clone(),
message: truncate_for_budget(&signal.message, max_bytes),
path: signal
.path
.as_ref()
.map(|path| truncate_for_budget(path, max_bytes)),
line: signal.line,
occurrences: signal.occurrences,
summary_state: log_digest_summary_label(signal.summary_state).to_string(),
summary_refs: build_context_summary_refs(
"clsum",
signal.path.as_deref().unwrap_or("log-signal"),
signal.path.as_deref(),
signal
.current_summaries
.iter()
.map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
budget,
ontology,
),
})
.collect(),
repeated_lines: report
.repeated_lines
.iter()
.take(max_items)
.map(|line| ContextPackLogRepeatedLinePreview {
line: truncate_for_budget(&line.line, max_bytes),
occurrences: line.occurrences,
})
.collect(),
file_refs: report
.file_refs
.iter()
.take(max_items)
.map(|file| ContextPackLogFileRefPreview {
path: truncate_for_budget(&file.path, max_bytes),
line: file.line,
occurrences: file.occurrences,
summary_state: log_digest_summary_label(file.summary_state).to_string(),
summary_refs: build_context_summary_refs(
"clfsum",
&file.path,
Some(&file.path),
file.current_summaries
.iter()
.map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
budget,
ontology,
),
})
.collect(),
symbol_refs: report
.symbol_refs
.iter()
.take(max_items)
.map(|symbol| ContextPackLogSymbolRefPreview {
handle: stable_handle("clsym", &symbol.symbol),
symbol: truncate_for_budget(&symbol.symbol, max_bytes),
tag_alias: tag_alias_from_name(&symbol.symbol)
.map(|alias| truncate_for_budget(&alias, max_bytes)),
ontology_refs: tag_alias_from_name(&symbol.symbol)
.as_deref()
.map(|alias| ontology_refs_for_alias(ontology, alias))
.unwrap_or_default(),
occurrences: symbol.occurrences,
summary_state: log_digest_summary_label(symbol.summary_state).to_string(),
summary_refs: build_context_summary_refs(
"clssum",
&symbol.symbol,
None,
symbol
.current_summaries
.iter()
.map(|snippet| (snippet.symbol.as_str(), snippet.summary.as_str())),
budget,
ontology,
),
})
.collect(),
warnings: report
.warnings
.iter()
.take(max_items)
.map(|warning| truncate_for_budget(warning, max_bytes))
.collect(),
}
}
fn enrich_log_preview_with_diff_symbols(
log_preview: &mut ContextPackLogPreview,
diff_digest: &ContextPackDiffPreview,
ontology: Option<&TagOntologyPreviewContext>,
) {
if !log_preview.symbol_refs.is_empty() {
return;
}
let mut symbols = Vec::new();
for file in &diff_digest.files {
for symbol in &file.touched_symbol_refs {
if !symbols
.iter()
.any(|existing: &String| existing == &symbol.name)
{
symbols.push(symbol.name.clone());
}
}
}
if symbols.is_empty() {
return;
}
log_preview.symbol_ref_groups = log_preview.symbol_ref_groups.max(symbols.len());
log_preview.symbol_refs = symbols
.into_iter()
.map(|symbol| ContextPackLogSymbolRefPreview {
handle: stable_handle("clsym", &symbol),
symbol: symbol.clone(),
tag_alias: tag_alias_from_name(&symbol),
ontology_refs: tag_alias_from_name(&symbol)
.as_deref()
.map(|alias| ontology_refs_for_alias(ontology, alias))
.unwrap_or_default(),
occurrences: 1,
summary_state: "unavailable".to_string(),
summary_refs: Vec::new(),
})
.collect();
}
fn insert_ontology_refs(
refs: &mut BTreeMap<String, CompactOntologyRefPreview>,
candidates: &[CompactOntologyRefPreview],
) {
for candidate in candidates {
refs.entry(candidate.handle.clone())
.or_insert_with(|| candidate.clone());
}
}
fn collect_context_pack_ontology_refs(
next_context: &SessionReviewNextContextBudgetReport,
diff_digest: &ContextPackDiffPreview,
test_digest: &ContextPackOptionalSection<ContextPackTestPreview>,
log_digest: &ContextPackOptionalSection<ContextPackLogPreview>,
) -> Vec<CompactOntologyRefPreview> {
let mut refs = BTreeMap::new();
for symbol in &next_context.touched_symbol_refs {
insert_ontology_refs(&mut refs, &symbol.ontology_refs);
}
for file in &diff_digest.files {
for symbol in &file.touched_symbol_refs {
insert_ontology_refs(&mut refs, &symbol.ontology_refs);
}
for summary in &file.summary_refs {
insert_ontology_refs(&mut refs, &summary.ontology_refs);
}
}
if let Some(test) = &test_digest.report {
for failure in &test.failure_groups {
for summary in &failure.summary_refs {
insert_ontology_refs(&mut refs, &summary.ontology_refs);
}
}
}
if let Some(log) = &log_digest.report {
for signal in &log.signals {
for summary in &signal.summary_refs {
insert_ontology_refs(&mut refs, &summary.ontology_refs);
}
}
for file in &log.file_refs {
for summary in &file.summary_refs {
insert_ontology_refs(&mut refs, &summary.ontology_refs);
}
}
for symbol in &log.symbol_refs {
insert_ontology_refs(&mut refs, &symbol.ontology_refs);
for summary in &symbol.summary_refs {
insert_ontology_refs(&mut refs, &summary.ontology_refs);
}
}
}
refs.into_values().collect()
}
pub(crate) fn build_context_pack_report(
path: &Path,
test_input: Option<&Path>,
runner: Option<&str>,
log_input: Option<&Path>,
budget: ResponseBudget,
) -> Result<ContextPackReport> {
Ok(build_context_pack_report_with_profile(path, test_input, runner, log_input, budget)?.0)
}
fn build_context_pack_report_with_profile(
path: &Path,
test_input: Option<&Path>,
runner: Option<&str>,
log_input: Option<&Path>,
budget: ResponseBudget,
) -> Result<(ContextPackReport, Vec<GraphDbBackendEvalPhaseTiming>)> {
// #gdbgatecold: trusted scope share — `prepare_agent_doc_index_gate_cached`
// and `context_pack_status_reminders` both call `IndexDb::inspect_read_only`
// on the same `(root, .tsift/index.db)` cold path. While this guard is
// alive, the second call reuses the cached inspection on the same thread
// instead of paying the disk/SQLite walk a second time. Search runs
// entirely outside this scope, so freshness re-checks after a file
// mutation are unaffected.
let _inspect_scope = index::InspectScopeGuard::new();
let budget = effective_context_budget(budget);
let mut phases = Vec::new();
let session_review_started = Instant::now();
let (review, session_review_sub_phases) = session_review::compute_with_phases(path)?;
let session_review_total_micros = session_review_started.elapsed().as_micros();
phases.push(graph_db_backend_eval_phase_timing(
"session_review_compute",
session_review_total_micros,
"session-review prompt/touched-file/touched-symbol/failure aggregation for the context-pack handoff",
));
for sub_phase in &session_review_sub_phases {
phases.push(graph_db_backend_eval_phase_timing(
&format!("session_review_compute.{}", sub_phase.name),
sub_phase.duration_micros,
&sub_phase.detail,
));
}
let root = PathBuf::from(&review.root);
let status_index_gate_started = Instant::now();
let mut status_index_gate_sub_phases: Vec<(String, u128, String)> = Vec::with_capacity(3);
let index_gate_started = Instant::now();
let (gate, gate_cache_detail) =
prepare_agent_doc_index_gate_cached(&root, path, None, "context-pack handoff");
let index_gate_micros = index_gate_started.elapsed().as_micros();
status_index_gate_sub_phases.push((
"prepare_agent_doc_index_gate".to_string(),
index_gate_micros,
gate_cache_detail,
));
let reminders_started = Instant::now();
let mut status_reminders = gate.diagnostics.clone();
status_reminders.extend(context_pack_status_reminders(&root));
let reminders_micros = reminders_started.elapsed().as_micros();
status_index_gate_sub_phases.push((
"context_pack_status_reminders".to_string(),
reminders_micros,
"tsift status reminders for the cached preparation context".to_string(),
));
let ontology_started = Instant::now();
let ontology = load_tag_ontology_preview_context(&root);
let ontology_micros = ontology_started.elapsed().as_micros();
status_index_gate_sub_phases.push((
"load_tag_ontology_preview_context".to_string(),
ontology_micros,
"tag ontology preview context load".to_string(),
));
let status_index_gate_total_micros = status_index_gate_started.elapsed().as_micros();
phases.push(graph_db_backend_eval_phase_timing(
"status_index_gate",
status_index_gate_total_micros,
"agent-doc index gate, tsift status reminders, and ontology preview loading",
));
for (name, micros, detail) in &status_index_gate_sub_phases {
phases.push(graph_db_backend_eval_phase_timing(
&format!("status_index_gate.{name}"),
*micros,
detail,
));
}
let ontology_ref = ontology.as_ref();
let mut next_context =
build_session_review_next_context_budget_report(&review, budget, ontology_ref);
// #gdbprephot: cap working-tree diff_digest parsing to the preview budget.
// build_context_pack_diff_preview only emits files.take(preview_items),
// and enrich_next_context_with_diff_symbols / build_context_pack_exploration_packet
// only iterate diff_digest.files (the preview window). The full-fat parse
// of every working-tree changed file dominated context_pack_diff cost on
// repos with many unstaged edits.
let diff_parse_budget = budget.preview_items();
let diff_digest = graph_db_backend_eval_timed_phase(
&mut phases,
"context_pack_diff",
"working-tree diff digest preview used to enrich next-context symbols",
|| {
Ok(build_context_pack_diff_preview(
&diff_digest::compute(
&root,
diff_digest::DiffDigestOptions {
cached: false,
revision: None,
max_parsed_files: Some(diff_parse_budget),
},
)
.with_context(|| {
format!("computing context-pack diff digest for {}", root.display())
})?,
budget,
ontology_ref,
))
},
)?;
enrich_next_context_with_diff_symbols(&mut next_context, &diff_digest, ontology_ref);
let test_digest = match test_input {
Some(file_path) => {
let input = fs::read_to_string(file_path)
.with_context(|| format!("reading test output: {}", file_path.display()))?;
if input.trim().is_empty() {
bail!("no test output provided in {}", file_path.display());
}
let report = test_digest::compute(&root, &input, runner)?;
ContextPackOptionalSection {
status: "included".to_string(),
command: format!(
"tsift test-digest --path . --input {}{}",
shell_quote(file_path.to_str().unwrap_or_default()),
runner
.map(|value| format!(" --runner {}", shell_quote(value)))
.unwrap_or_default()
),
source: Some(file_path.display().to_string()),
report: Some(build_context_pack_test_preview(
&report,
budget,
ontology_ref,
)),
}
}
None => ContextPackOptionalSection {
status: "not_provided".to_string(),
command: "tsift test-digest --path . < test.log".to_string(),
source: None,
report: None,
},
};
let log_digest = match log_input {
Some(file_path) => {
let input = fs::read_to_string(file_path)
.with_context(|| format!("reading log output: {}", file_path.display()))?;
if input.trim().is_empty() {
bail!("no log output provided in {}", file_path.display());
}
let report = log_digest::compute(&root, &input)?;
let mut preview = build_context_pack_log_preview(&report, budget, ontology_ref);
enrich_log_preview_with_diff_symbols(&mut preview, &diff_digest, ontology_ref);
ContextPackOptionalSection {
status: "included".to_string(),
command: format!(
"tsift log-digest --path . --input {}",
shell_quote(file_path.to_str().unwrap_or_default())
),
source: Some(file_path.display().to_string()),
report: Some(preview),
}
}
None => ContextPackOptionalSection {
status: "not_provided".to_string(),
command: "tsift log-digest --path . < build.log".to_string(),
source: None,
report: None,
},
};
let ontology_refs =
collect_context_pack_ontology_refs(&next_context, &diff_digest, &test_digest, &log_digest);
let exploration = graph_db_backend_eval_timed_phase(
&mut phases,
"exploration_materialization",
"context-pack source-window and worker-context exploration packet projection",
|| {
materialize_context_pack_exploration_packet(
&root,
build_context_pack_exploration_packet(&root, &next_context, &diff_digest),
)
},
)?;
let graph_orchestration = graph_db_backend_eval_timed_phase(
&mut phases,
"graph_orchestration",
"context-pack graph freshness, evidence packet ids, and conflict-matrix follow-up commands",
|| context_pack_graph_orchestration(&root, path, &next_context, &exploration),
)?;
Ok((
ContextPackReport {
root: review.root,
target: review.target,
target_kind: review.target_kind,
max_items: budget.preview_items(),
max_bytes: budget.preview_bytes(),
status_reminders,
ontology_refs,
next_context,
diff_digest,
test_digest,
log_digest,
exploration,
graph_orchestration,
resume_commands: review.next_context.next_digest_commands,
},
phases,
))
}
fn context_pack_status_reminders(root: &Path) -> Vec<String> {
status::check_status(root)
.map(|report| report.reminders)
.unwrap_or_default()
}
fn context_pack_graph_orchestration(
root: &Path,
path: &Path,
next_context: &SessionReviewNextContextBudgetReport,
exploration: &ExplorationPacket,
) -> Result<ContextPackGraphOrchestration> {
let graph_db = graph_substrate_db_path(root, None);
let store = SqliteGraphStore::open_read_only_resilient(&graph_db)
.with_context(|| format!("opening graph-db projection: {}", graph_db.display()))?;
let projection_freshness = sqlite_graph_freshness(&store, "root")?;
let mut warnings = projection_freshness.diagnostics.clone();
if let Some(recovery) = store.read_only_recovery() {
warnings.push(graph_db_read_recovery_diagnostic(recovery));
}
let mut targets = next_context
.prompt_targets
.iter()
.flat_map(|prompt| extract_conflict_target_refs(prompt))
.collect::<Vec<_>>();
if targets.is_empty() {
targets.extend(
exploration
.worker_context
.iter()
.flat_map(|worker| extract_conflict_target_refs(&worker.summary)),
);
}
targets = dedupe_preserve_order(targets);
let mut evidence_packet_ids = Vec::new();
let mut resolvable_targets = Vec::new();
for target in &targets {
match graph_db_resolve_evidence_target(&store, target)? {
Some(node) => {
evidence_packet_ids.push(graph_db_evidence_packet_id(
target,
&node,
&projection_freshness,
));
resolvable_targets.push(target.clone());
}
None => warnings.push(format!("graph evidence target not found: {target}")),
}
}
let mut follow_up_commands = vec![format!(
"tsift graph-db --path {} status --json",
shell_quote(root.to_string_lossy().as_ref())
)];
for target in &resolvable_targets {
follow_up_commands.push(format!(
"tsift graph-db --path {} evidence {} --depth 3 --limit 8 --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(target)
));
}
if !resolvable_targets.is_empty() {
follow_up_commands.push(format!(
"tsift conflict-matrix --path {} {} --json",
shell_quote(path.to_string_lossy().as_ref()),
resolvable_targets
.iter()
.map(|target| shell_quote(target))
.collect::<Vec<_>>()
.join(" ")
));
}
let conflict_matrix_decisions = if resolvable_targets.is_empty() {
vec!["no resolvable backlog/job targets found for conflict-matrix".to_string()]
} else {
vec![format!(
"run conflict-matrix before parallel dispatch for {} target(s)",
resolvable_targets.len()
)]
};
let worker_ownership_blocks = exploration
.worker_context
.iter()
.map(|worker| format!("{} scopes {}", worker.handle, worker.summary))
.collect::<Vec<_>>();
let projection_hashes = projection_freshness
.content_hash
.clone()
.into_iter()
.collect();
Ok(ContextPackGraphOrchestration {
contract_version: CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION,
graph_db_command: format!(
"tsift graph-db --path {} status --json",
shell_quote(root.to_string_lossy().as_ref())
),
projection_freshness,
projection_hashes,
evidence_packet_ids,
conflict_matrix_decisions,
worker_ownership_blocks,
follow_up_commands: dedupe_preserve_order(follow_up_commands),
warnings,
})
}
pub(crate) fn print_context_pack_human(report: &ContextPackReport, compact: bool) {
if compact {
println!(
"context-pack target:{} prompts:{}/{} diff:{}/{} test:{} log:{}",
shell_quote(&report.target),
report.next_context.prompt_targets.len(),
report.next_context.prompt_target_total,
report.diff_digest.files.len(),
report.diff_digest.files_changed,
report.test_digest.status,
report.log_digest.status
);
for reminder in &report.status_reminders {
println!("reminder {reminder}");
}
for prompt in &report.next_context.prompt_targets {
println!("prompt {prompt}");
}
for action in &report.next_context.next_token_actions {
println!(
"token-action {} {} commands:{}",
action.priority,
action.kind,
action.digest_commands.len()
+ usize::from(action.compact_command.is_some())
+ usize::from(action.restart_command.is_some())
);
}
for file in &report.diff_digest.files {
println!(
"diff {} status:{} syms:{} sums:{}",
file.path,
file.status,
if file.touched_symbol_refs.is_empty() {
"-".to_string()
} else {
file.touched_symbol_refs
.iter()
.map(compact_symbol_ref_token)
.collect::<Vec<_>>()
.join(",")
},
if file.summary_refs.is_empty() {
"-".to_string()
} else {
file.summary_refs
.iter()
.map(|summary| summary.handle.as_str())
.collect::<Vec<_>>()
.join(",")
}
);
}
if let Some(test) = &report.test_digest.report {
println!(
"test runner:{} failures:{} groups:{}",
test.runner, test.failures, test.grouped_failures
);
} else {
println!("test {}", report.test_digest.command);
}
if let Some(log) = &report.log_digest.report {
println!(
"log lines:{} signals:{} files:{} syms:{}",
log.non_empty_lines, log.signal_groups, log.file_ref_groups, log.symbol_ref_groups
);
} else {
println!("log {}", report.log_digest.command);
}
println!(
"explore windows:{} relations:{} budget:{}",
report.exploration.source_windows.len(),
report.exploration.relationship_map.len(),
report.exploration.budget.project_size
);
println!(
"graph-orchestration freshness:{} evidence:{} ownership:{}",
report.graph_orchestration.projection_freshness.status,
report.graph_orchestration.evidence_packet_ids.len(),
report.graph_orchestration.worker_ownership_blocks.len()
);
return;
}
println!("Context pack");
println!(" target: {}", report.target);
println!(" target kind: {}", report.target_kind);
println!(" root: {}", report.root);
println!(
" preview budget: {} items / {} bytes",
report.max_items, report.max_bytes
);
if !report.status_reminders.is_empty() {
println!(" status reminders:");
for reminder in &report.status_reminders {
println!(" - {reminder}");
}
}
println!();
println!("Next context");
println!(
" prompt targets: {}/{}",
report.next_context.prompt_targets.len(),
report.next_context.prompt_target_total
);
println!(
" touched files: {}/{}",
report.next_context.touched_files.len(),
report.next_context.touched_file_total
);
println!(
" touched symbols: {}/{}",
report.next_context.touched_symbols.len(),
report.next_context.touched_symbol_total
);
println!(
" unresolved failures: {}/{}",
report.next_context.unresolved_failures.len(),
report.next_context.unresolved_failure_total
);
if !report.next_context.prompt_targets.is_empty() {
for prompt in &report.next_context.prompt_targets {
println!(" - prompt: {prompt}");
}
}
if !report.next_context.touched_files.is_empty() {
for path in &report.next_context.touched_files {
println!(" - file: {path}");
}
}
if !report.next_context.touched_symbols.is_empty() {
for symbol in &report.next_context.touched_symbol_refs {
println!(
" - symbol: {}",
format_symbol_preview_line(
&symbol.handle,
&symbol.name,
symbol.tag_alias.as_deref()
)
);
}
}
if !report.next_context.next_token_actions.is_empty() {
println!(" token actions:");
for action in &report.next_context.next_token_actions {
println!(
" - [{}:{}] {} | guidance: {}",
action.priority, action.kind, action.message, action.guidance
);
if let Some(command) = &action.compact_command {
println!(" compact: {command}");
}
if let Some(command) = &action.restart_command {
println!(" restart: {command}");
}
for command in &action.digest_commands {
println!(" digest: {command}");
}
}
}
println!();
println!("Diff digest");
println!(" mode: {}", report.diff_digest.mode);
println!(
" files changed: {}/{}",
report.diff_digest.files.len(),
report.diff_digest.files_changed
);
println!(
" touched symbols: {}",
report.diff_digest.symbols_touched
);
println!(
" call edges: +{} / -{}",
report.diff_digest.call_edges_added, report.diff_digest.call_edges_removed
);
for file in &report.diff_digest.files {
println!(" - {} [{}]", file.path, file.status);
if !file.touched_symbol_refs.is_empty() {
println!(
" symbols: {}",
file.touched_symbol_refs
.iter()
.map(|symbol| format_symbol_preview_line(
&symbol.handle,
&symbol.name,
symbol.tag_alias.as_deref()
))
.collect::<Vec<_>>()
.join(" | ")
);
}
if !file.warnings.is_empty() {
println!(" warnings: {}", file.warnings.join(" | "));
}
if !file.summary_refs.is_empty() {
println!(
" summaries: {}",
file.summary_refs
.iter()
.map(format_summary_ref_line)
.collect::<Vec<_>>()
.join(" | ")
);
}
}
println!();
println!("Test digest");
println!(" status: {}", report.test_digest.status);
match &report.test_digest.report {
Some(test) => {
println!(" runner: {}", test.runner);
println!(" failures: {}", test.failures);
println!(" failure groups: {}", test.grouped_failures);
for failure in &test.failure_groups {
let location = match (&failure.path, failure.line) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => "(no file anchor)".to_string(),
};
println!(
" - {} count:{} msg:{}",
location, failure.occurrences, failure.message
);
if !failure.summary_refs.is_empty() {
println!(
" summaries: {}",
failure
.summary_refs
.iter()
.map(format_summary_ref_line)
.collect::<Vec<_>>()
.join(" | ")
);
}
}
}
None => println!(" capture: {}", report.test_digest.command),
}
println!();
println!("Log digest");
println!(" status: {}", report.log_digest.status);
match &report.log_digest.report {
Some(log) => {
println!(" non-empty lines: {}", log.non_empty_lines);
println!(" signal groups: {}", log.signal_groups);
println!(" file refs: {}", log.file_ref_groups);
println!(" symbol refs: {}", log.symbol_ref_groups);
for signal in &log.signals {
let location = match (&signal.path, signal.line) {
(Some(path), Some(line)) => format!("{path}:{line}"),
(Some(path), None) => path.clone(),
_ => "(no file anchor)".to_string(),
};
println!(
" - {} {} count:{} msg:{}",
location, signal.severity, signal.occurrences, signal.message
);
if !signal.summary_refs.is_empty() {
println!(
" summaries: {}",
signal
.summary_refs
.iter()
.map(format_summary_ref_line)
.collect::<Vec<_>>()
.join(" | ")
);
}
}
for symbol in &log.symbol_refs {
println!(
" - symbol: {} count:{} state:{}",
format_symbol_preview_line(
&symbol.handle,
&symbol.symbol,
symbol.tag_alias.as_deref()
),
symbol.occurrences,
symbol.summary_state
);
if !symbol.summary_refs.is_empty() {
println!(
" summaries: {}",
symbol
.summary_refs
.iter()
.map(format_summary_ref_line)
.collect::<Vec<_>>()
.join(" | ")
);
}
}
}
None => println!(" capture: {}", report.log_digest.command),
}
println!();
println!("Exploration packet");
println!(
" budget: {} ({} windows x {} lines)",
report.exploration.budget.project_size,
report.exploration.budget.max_source_windows,
report.exploration.budget.lines_per_window
);
for window in &report.exploration.source_windows {
println!(
" - window {}:{}-{} ({})",
window.file, window.start, window.end, window.reason
);
println!(" expand: {}", window.expand);
}
for relation in &report.exploration.relationship_map {
println!(
" - relation {} -{}-> {}",
relation.from, relation.relation, relation.to
);
}
println!();
println!("Graph orchestration");
println!(
" projection freshness: {}",
report.graph_orchestration.projection_freshness.status
);
for evidence in &report.graph_orchestration.evidence_packet_ids {
println!(" - evidence: {evidence}");
}
for decision in &report.graph_orchestration.conflict_matrix_decisions {
println!(" - decision: {decision}");
}
for block in &report.graph_orchestration.worker_ownership_blocks {
println!(" - ownership: {block}");
}
for command in &report.graph_orchestration.follow_up_commands {
println!(" - next: {command}");
}
println!();
println!("Resume commands:");
for command in &report.resume_commands {
println!(" - {}", command);
}
}
pub(crate) fn format_compact_count(value: u64) -> String {
if value >= 1_000_000 {
format!("{:.1}M", value as f64 / 1_000_000.0)
} else if value >= 1_000 {
format!("{:.1}K", value as f64 / 1_000.0)
} else {
value.to_string()
}
}
fn cmd_digest_runner(
kind: &str,
path: &Path,
runner: Option<&str>,
shell_command: &str,
format: OutputFormat,
) -> Result<()> {
let digest_kind = DigestRunnerKind::parse(kind)?;
let root = transcript_artifact_root(path)?;
let execution = run_digest_runner_command(shell_command)?;
let output = &execution.output;
let captured = String::from_utf8_lossy(&output.stdout).into_owned();
let exit_code = output.status.code().unwrap_or(-1);
if format.json_output && format.envelope {
let artifact_key = format!(
"{}:{}:{}:{}",
digest_kind.as_str(),
shell_command,
execution.executed_command,
captured
);
let artifact = if captured.trim().is_empty() {
None
} else {
let (suffix, expand) = match digest_kind {
DigestRunnerKind::Test => (
"test.log",
format!(
"tsift test-digest --path {} --input {}{} --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(
root.join(".tsift/artifacts")
.join(format!("{}.test.log", stable_handle("tart", &artifact_key)))
.to_string_lossy()
.as_ref()
),
runner
.map(|value| format!(" --runner {}", shell_quote(value)))
.unwrap_or_default()
),
),
DigestRunnerKind::Log => (
"log",
format!(
"tsift log-digest --path {} --input {} --json",
shell_quote(root.to_string_lossy().as_ref()),
shell_quote(
root.join(".tsift/artifacts")
.join(format!("{}.log", stable_handle("tart", &artifact_key)))
.to_string_lossy()
.as_ref()
)
),
),
};
Some(persist_transcript_artifact(
&root,
"tart",
suffix,
&artifact_key,
&captured,
expand,
)?)
};
let filter_report = execution.filter.as_ref().map(DigestRunnerFilter::to_json);
match digest_kind {
DigestRunnerKind::Test => {
let digest_report = test_digest::compute(path, &captured, runner)?;
let report = serde_json::json!({
"kind": digest_kind.as_str(),
"command": shell_command,
"executed_command": execution.executed_command,
"exit_code": exit_code,
"success": output.status.success(),
"filter": filter_report,
"artifact": artifact,
"digest": digest_report,
});
let mut follow_up = artifact
.as_ref()
.map(|entry| vec![entry.expand.clone()])
.unwrap_or_default();
follow_up.push(format!(
"tsift rewrite --run {}",
shell_quote(shell_command)
));
let summary_text = if output.status.success() && digest_report.failures == 0 {
format!("test run passed for {}", runner.unwrap_or("auto"))
} else {
format!("test run captured {} failure(s)", digest_report.failures)
};
print_json_or_envelope(
&report,
&format,
"digest-runner",
"test-run",
ToolEnvelopeSummary {
text: summary_text,
metrics: vec![
envelope_metric("runner", &digest_report.runner),
envelope_metric("exit_code", exit_code),
envelope_metric("filter", execution.filter_label()),
envelope_metric("failures", digest_report.failures),
envelope_metric("groups", digest_report.grouped_failures),
envelope_metric(
"artifact",
artifact
.as_ref()
.map(|entry| entry.handle.as_str())
.unwrap_or("-"),
),
],
},
false,
follow_up,
)?;
}
DigestRunnerKind::Log => {
let digest_report = log_digest::compute(path, &captured)?;
let report = serde_json::json!({
"kind": digest_kind.as_str(),
"command": shell_command,
"executed_command": execution.executed_command,
"exit_code": exit_code,
"success": output.status.success(),
"filter": filter_report,
"artifact": artifact,
"digest": digest_report,
});
let mut follow_up = artifact
.as_ref()
.map(|entry| vec![entry.expand.clone()])
.unwrap_or_default();
follow_up.push(format!(
"tsift rewrite --run {}",
shell_quote(shell_command)
));
let summary_text = if output.status.success() && digest_report.signal_groups == 0 {
"command finished without log signals".to_string()
} else {
format!(
"command emitted {} log signal group(s)",
digest_report.signal_groups
)
};
print_json_or_envelope(
&report,
&format,
"digest-runner",
"command-run",
ToolEnvelopeSummary {
text: summary_text,
metrics: vec![
envelope_metric("exit_code", exit_code),
envelope_metric("filter", execution.filter_label()),
envelope_metric("signals", digest_report.signal_groups),
envelope_metric("file_refs", digest_report.file_ref_groups),
envelope_metric(
"artifact",
artifact
.as_ref()
.map(|entry| entry.handle.as_str())
.unwrap_or("-"),
),
],
},
false,
follow_up,
)?;
}
}
if output.status.success() {
return Ok(());
}
if let Some(code) = output.status.code() {
std::process::exit(code);
}
bail!("digest-wrapped command terminated by signal: {shell_command}");
}
if captured.trim().is_empty() {
let label = match digest_kind {
DigestRunnerKind::Test => "test",
DigestRunnerKind::Log => "log",
};
println!("No {label} output captured.");
} else {
match digest_kind {
DigestRunnerKind::Test => {
render_test_digest_from_input(path, &captured, runner, format)?
}
DigestRunnerKind::Log => render_log_digest_from_input(path, &captured, format)?,
}
}
if output.status.success() {
return Ok(());
}
if let Some(code) = output.status.code() {
std::process::exit(code);
}
bail!("digest-wrapped command terminated by signal: {shell_command}");
}
struct DigestRunnerExecution {
output: std::process::Output,
executed_command: String,
filter: Option<DigestRunnerFilter>,
}
impl DigestRunnerExecution {
fn filter_label(&self) -> &'static str {
self.filter
.as_ref()
.map(|filter| filter.tool)
.unwrap_or("none")
}
}
struct DigestRunnerFilter {
tool: &'static str,
command: String,
}
impl DigestRunnerFilter {
fn to_json(&self) -> serde_json::Value {
serde_json::json!({
"tool": self.tool,
"command": self.command,
})
}
}
fn run_digest_runner_command(shell_command: &str) -> Result<DigestRunnerExecution> {
let filter = rtk_rewrite_for_digest_runner(shell_command);
let executed_command = filter
.as_ref()
.map(|filter| filter.command.as_str())
.unwrap_or(shell_command);
let output = Command::new("sh")
.arg("-lc")
.arg(format!("({executed_command}) 2>&1"))
.stdout(Stdio::piped())
.output()
.with_context(|| format!("running digest-wrapped command: {executed_command}"))?;
Ok(DigestRunnerExecution {
output,
executed_command: executed_command.to_string(),
filter,
})
}
fn rtk_rewrite_for_digest_runner(shell_command: &str) -> Option<DigestRunnerFilter> {
if shell_command.trim_start().starts_with("rtk ") || find_command_on_path("rtk").is_none() {
return None;
}
let output = Command::new("rtk")
.arg("rewrite")
.arg(shell_command)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let rewritten = String::from_utf8_lossy(&output.stdout).trim().to_string();
if rewritten.is_empty() || rewritten == shell_command {
return None;
}
Some(DigestRunnerFilter {
tool: "rtk",
command: rewritten,
})
}
fn find_command_on_path(command: &str) -> Option<PathBuf> {
let path_var = std::env::var_os("PATH")?;
std::env::split_paths(&path_var)
.map(|dir| dir.join(command))
.find(|candidate| candidate.is_file())
}
pub(crate) fn open_existing_summary_db_read_only(db_path: &Path) -> Result<summarize::SummaryDb> {
if !db_path.exists() {
bail!("no summaries.db found — run `tsift summarize --extract <path>` first");
}
summarize::SummaryDb::open_read_only_resilient(db_path)
}
fn status_index_needs_fix(report: &status::StatusReport) -> bool {
!matches!(report.index, status::IndexStatus::Fresh { .. })
}
fn status_instructions_need_fix(report: &status::StatusReport) -> bool {
!matches!(report.instructions, init::InstructionStatus::Current { .. })
}
pub(crate) fn apply_status_fixes(root: &Path, report: &status::StatusReport) -> Result<()> {
if status_instructions_need_fix(report) {
eprintln!("status fix: refreshing tsift instructions");
init::init(root, false, false)?;
}
if !status_index_needs_fix(report) {
return Ok(());
}
let scopes = config::Config::submodule_dirs(root)?;
if scopes.is_empty() {
eprintln!("status fix: refreshing index");
run_index_update(
&root.join(".tsift/index.db"),
root,
"status --fix refreshing index".to_string(),
root,
None,
false,
false,
)?;
return Ok(());
}
let cfg = config::Config::load(root)?;
for scope in scopes {
if !scope.source_root.exists() {
eprintln!(
"status fix: skipping missing submodule `{}` ({})",
scope.id,
scope.source_root.display()
);
continue;
}
eprintln!("status fix: refreshing submodule `{}` index", scope.id);
run_index_update(
&cfg.db_path_for(root, &scope.id),
&scope.source_root,
format!("status --fix refreshing submodule `{}` index", scope.id),
root,
Some(scope.id.as_str()),
false,
false,
)?;
}
Ok(())
}
pub(crate) fn status_missing_workspace_scopes(report: &status::StatusReport) -> bool {
match &report.index {
status::IndexStatus::Fresh { missing_scopes, .. }
| status::IndexStatus::Stale { missing_scopes, .. }
| status::IndexStatus::Missing { missing_scopes } => !missing_scopes.is_empty(),
}
}
pub(crate) fn autoindex_missing_workspace_scopes(
root: &Path,
report: &status::StatusReport,
) -> Result<()> {
let missing_scopes = match &report.index {
status::IndexStatus::Fresh { missing_scopes, .. }
| status::IndexStatus::Stale { missing_scopes, .. }
| status::IndexStatus::Missing { missing_scopes } => missing_scopes,
};
if missing_scopes.is_empty() {
return Ok(());
}
let missing_scope_ids = missing_scopes
.iter()
.map(|scope| scope.scope.as_str())
.collect::<std::collections::HashSet<_>>();
let cfg = config::Config::load(root)?;
for scope in config::Config::submodule_dirs(root)? {
if !missing_scope_ids.contains(scope.id.as_str()) || !scope.source_root.exists() {
continue;
}
let db_path = cfg.db_path_for(root, &scope.id);
run_index_update(
&db_path,
&scope.source_root,
format!(
"autoindexing missing submodule `{}` during status",
scope.id
),
root,
Some(scope.id.as_str()),
false,
false,
)?;
}
Ok(())
}
pub(crate) fn emit_summary_stats_warnings(stats: &summarize::SummaryStats, root: &Path) {
for warning in &stats.warnings {
let rel_path = relativize_pathbuf(&warning.path, root);
eprintln!(
"warning: summarize stats {}: {}",
rel_path.display(),
warning.message
);
}
}
fn contextualize_error(err: anyhow::Error, context: String) -> anyhow::Error {
Result::<(), anyhow::Error>::Err(err)
.context(context)
.unwrap_err()
}
fn should_attach_lock_diagnostics(err: &anyhow::Error) -> bool {
let message = err.to_string();
message.contains("another tsift index writer is already active")
|| substrate::error_mentions_locked_db(err)
}
fn add_write_lock_context(
err: anyhow::Error,
action: String,
root: &std::path::Path,
scope: Option<&str>,
) -> anyhow::Error {
if !should_attach_lock_diagnostics(&err) {
return contextualize_error(err, action);
}
let Ok(report) = status::check_locks(root, None, scope) else {
return contextualize_error(err, action);
};
contextualize_error(
err,
format!(
"{}\n\nlock diagnostics:\n{}",
action,
status::format_locks_human(&report, false).trim_end()
),
)
}
pub(crate) fn run_index_update(
db_path: &std::path::Path,
source_root: &std::path::Path,
action: String,
root: &std::path::Path,
scope: Option<&str>,
rebuild: bool,
prune: bool,
) -> Result<index::IndexSummary> {
let result = (|| {
let db = index::IndexDb::open(db_path)?;
if rebuild {
db.rebuild(source_root)
} else if prune {
db.apply_changes_pruned(source_root)
} else {
db.apply_changes(source_root)
}
})();
let summary = result.map_err(|err| add_write_lock_context(err, action, root, scope))?;
emit_index_warnings(&summary, source_root, scope);
Ok(summary)
}
pub(crate) fn relativize_index_summary(summary: &mut index::IndexSummary, root: &Path) {
for change in &mut summary.changes {
change.path = relativize_pathbuf(&change.path, root);
}
for warning in &mut summary.warnings {
warning.path = relativize_pathbuf(&warning.path, root);
}
}
fn emit_index_warnings(summary: &index::IndexSummary, root: &Path, scope: Option<&str>) {
for warning in &summary.warnings {
let rel_path = relativize_pathbuf(&warning.path, root);
let stage = match warning.stage {
index::IndexWarningStage::ReadSource => "read failed",
index::IndexWarningStage::ExtractSymbols => "symbol extraction failed",
index::IndexWarningStage::ExtractCallSites => "call extraction failed",
index::IndexWarningStage::ExtractRoutes => "route extraction failed",
};
let scope_prefix = scope.map(|name| format!("[{}] ", name)).unwrap_or_default();
let lang_suffix = warning
.language
.as_deref()
.map(|lang| format!(" [{}]", lang))
.unwrap_or_default();
eprintln!(
"warning: {}{}{}: {}: {}",
scope_prefix,
rel_path.display(),
lang_suffix,
stage,
warning.message
);
}
}
pub(crate) fn load_summarize_config(root: &std::path::Path) -> summarize::SummarizeConfig {
let config_path = root.join(".tsift/config.toml");
if !config_path.exists() {
return summarize::SummarizeConfig::default();
}
#[derive(serde::Deserialize, Default)]
struct RawConfig {
#[serde(default)]
summarize: Option<RawSummarize>,
}
#[derive(serde::Deserialize)]
struct RawSummarize {
model: Option<String>,
max_file_tokens: Option<usize>,
api_key_env: Option<String>,
}
let content = std::fs::read_to_string(&config_path).unwrap_or_default();
let raw: RawConfig = toml::from_str(&content).unwrap_or_default();
let defaults = summarize::SummarizeConfig::default();
match raw.summarize {
Some(s) => summarize::SummarizeConfig {
model: s.model.unwrap_or(defaults.model),
max_file_tokens: s.max_file_tokens.unwrap_or(defaults.max_file_tokens),
api_key_env: s.api_key_env.unwrap_or(defaults.api_key_env),
},
None => defaults,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ExtractSymbolContext {
db_path: PathBuf,
source_root: PathBuf,
}
pub(crate) fn find_symbols_db_for_file(
root: &Path,
file_path: &Path,
) -> Result<Option<ExtractSymbolContext>> {
let cfg = config::Config::load(root)?;
let mut submodules = config::Config::submodule_dirs(root)?;
submodules.sort_by(|left, right| {
right
.source_root
.components()
.count()
.cmp(&left.source_root.components().count())
});
for scope in submodules {
if !file_path.starts_with(&scope.source_root) {
continue;
}
let db_path = cfg.db_path_for(root, &scope.id);
if db_path.exists() {
return Ok(Some(ExtractSymbolContext {
db_path,
source_root: scope.source_root,
}));
}
}
let single = root.join(".tsift/index.db");
if single.exists() && file_path.starts_with(root) {
return Ok(Some(ExtractSymbolContext {
db_path: single,
source_root: root.to_path_buf(),
}));
}
Ok(None)
}
pub(crate) fn resolve_extract_base(path: &Path) -> Result<PathBuf> {
let canonical = path
.canonicalize()
.with_context(|| format!("canonicalizing {}", path.display()))?;
Ok(if canonical.is_dir() {
canonical
} else {
canonical
.parent()
.map(Path::to_path_buf)
.unwrap_or(canonical)
})
}
fn normalize_extract_scope_path(path: &Path) -> Result<PathBuf> {
if path.exists() {
return path
.canonicalize()
.with_context(|| format!("canonicalizing extract scope {}", path.display()));
}
Ok(summarize::normalize_lexical_path(path))
}
pub(crate) fn resolve_extract_scope(root: &Path, extract_path: &Path) -> Result<PathBuf> {
let scope = if extract_path.is_absolute() {
extract_path.to_path_buf()
} else {
root.join(extract_path)
};
normalize_extract_scope_path(&scope)
}
pub(crate) fn summarize_diff_matches_scope(changed_path: &Path, extract_scope: &Path) -> bool {
normalize_extract_scope_path(changed_path)
.unwrap_or_else(|_| summarize::normalize_lexical_path(changed_path))
.starts_with(extract_scope)
}
pub(crate) fn summarize_relative_file_path(root: &Path, file_path: &Path) -> String {
summarize::normalize_summary_file_key(file_path.strip_prefix(root).unwrap_or(file_path))
}
pub(crate) fn summarize_full_extract_deleted_summary_paths(
summary_db: &summarize::SummaryDb,
root: &Path,
extract_scope: &Path,
files_to_extract: &[PathBuf],
) -> Result<BTreeSet<String>> {
let live_paths = files_to_extract
.iter()
.map(|file_path| summarize_relative_file_path(root, file_path))
.collect::<BTreeSet<_>>();
let mut deleted = BTreeSet::new();
for cached_path in summary_db.cached_file_paths()? {
if !summarize_diff_matches_scope(&root.join(&cached_path), extract_scope) {
continue;
}
if !live_paths.contains(&cached_path) {
deleted.insert(cached_path);
}
}
Ok(deleted)
}
#[derive(Debug, Clone)]
struct SearchIndexTarget {
label: String,
db_path: PathBuf,
source_root: PathBuf,
scope_name: Option<String>,
reindex_cmd: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SearchIndexState {
Missing,
Fresh,
Stale { stale_files: usize },
}
fn resolve_search_index_targets(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
federated: bool,
) -> Result<Vec<SearchIndexTarget>> {
if let Some(scope_name) = scope {
let scope = config::Config::resolve_submodule(root, scope_name)?;
let cfg = config::Config::load(root)?;
return Ok(vec![SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
}]);
}
if federated {
let cfg = config::Config::load(root)?;
let mut targets = Vec::new();
for scope in config::Config::submodule_dirs(root)? {
if !cfg.federation_for_scope(&scope) {
continue;
}
targets.push(SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --workspace {}", root.display()),
});
}
return Ok(targets);
}
if let Some(scope) = config::Config::infer_submodule_from_path(root, path_hint)? {
let cfg = config::Config::load(root)?;
return Ok(vec![SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
}]);
}
if let Some(scope) = infer_agent_doc_task_submodule(root, path_hint)? {
let cfg = config::Config::load(root)?;
return Ok(vec![SearchIndexTarget {
label: format!("submodule `{}` index", scope.id),
db_path: cfg.db_path_for(root, &scope.id),
source_root: scope.source_root.clone(),
scope_name: Some(scope.id.clone()),
reindex_cmd: format!("tsift index --submodule {} {}", scope.id, root.display()),
}]);
}
let scopes = config::Config::submodule_dirs(root)?;
if !scopes.is_empty() {
let root_db = root.join(".tsift/index.db");
if !root_db.exists() {
let available_scopes = scopes
.iter()
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>()
.join(", ");
let cfg = config::Config::load(root)?;
let indexed_scopes = scopes
.iter()
.filter(|scope| cfg.db_path_for(root, &scope.id).exists())
.map(|scope| scope.id.as_str())
.collect::<Vec<_>>();
let indexed_label = if indexed_scopes.is_empty() {
"none".to_string()
} else {
indexed_scopes.join(", ")
};
bail!(
"workspace root {} has no shared root index at {}. Default search requires `--scope <scope>` or `--federated` when the workspace uses scoped `.tsift/indexes/*/index.db` files. Available scopes: {}. Indexed scopes: {}.",
root.display(),
root_db.display(),
available_scopes,
indexed_label,
);
}
}
Ok(vec![SearchIndexTarget {
label: "index".to_string(),
db_path: root.join(".tsift/index.db"),
source_root: root.to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", root.display()),
}])
}
fn inspect_search_index(target: &SearchIndexTarget) -> Result<SearchIndexState> {
if !target.source_root.exists() || !target.db_path.exists() {
return Ok(SearchIndexState::Missing);
}
let inspection =
index::IndexDb::inspect_read_only(&target.db_path, &target.source_root, false)?;
let stale_files =
inspection.summary.new + inspection.summary.modified + inspection.summary.deleted;
if stale_files == 0 {
Ok(SearchIndexState::Fresh)
} else {
Ok(SearchIndexState::Stale { stale_files })
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct RebuildSearchTarget {
label: String,
reason: RebuildSearchReason,
reindex_cmd: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum RebuildSearchReason {
Missing,
Stale { stale_files: usize },
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct DegradedSearchTarget {
label: String,
reason: RebuildSearchReason,
reindex_cmd: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum DegradedSearchMode {
ReadOnly,
Exact,
}
#[derive(Debug)]
struct SearchPrecheck {
targets: Vec<SearchIndexTarget>,
degraded_targets: Vec<DegradedSearchTarget>,
}
fn is_active_writer_lock_error(err: &anyhow::Error) -> bool {
err.chain().any(|cause| {
cause
.to_string()
.contains("another tsift index writer is already active")
})
}
fn infer_agent_doc_task_submodule(
root: &Path,
path_hint: &Path,
) -> Result<Option<config::WorkspaceScope>> {
let hinted_path = if path_hint.is_absolute() {
path_hint.to_path_buf()
} else {
root.join(path_hint)
};
let Ok(relative) = hinted_path.strip_prefix(root) else {
return Ok(None);
};
let mut components = relative.components();
let Some(std::path::Component::Normal(first)) = components.next() else {
return Ok(None);
};
if first != "tasks" {
return Ok(None);
}
let Some(file_stem) = relative.file_stem().and_then(|stem| stem.to_str()) else {
return Ok(None);
};
config::Config::find_submodule(root, file_stem)
}
fn degraded_search_target(
target: &SearchIndexTarget,
reason: RebuildSearchReason,
) -> DegradedSearchTarget {
DegradedSearchTarget {
label: target.label.clone(),
reason,
reindex_cmd: target.reindex_cmd.clone(),
}
}
fn apply_search_index_update(
root: &Path,
target: &SearchIndexTarget,
) -> Result<index::IndexSummary> {
run_index_update(
&target.db_path,
&target.source_root,
format!("autoindexing {}", target.label),
root,
target.scope_name.as_deref(),
false,
false,
)
}
fn collect_rebuild_search_targets(
targets: &[SearchIndexTarget],
) -> Result<Vec<RebuildSearchTarget>> {
let mut rebuild_targets = Vec::new();
for target in targets {
let reason = match inspect_search_index(target)? {
SearchIndexState::Missing => RebuildSearchReason::Missing,
SearchIndexState::Fresh => continue,
SearchIndexState::Stale { stale_files } => RebuildSearchReason::Stale { stale_files },
};
rebuild_targets.push(RebuildSearchTarget {
label: target.label.clone(),
reason,
reindex_cmd: target.reindex_cmd.clone(),
});
}
Ok(rebuild_targets)
}
fn rebuild_search_target_detail(target: &RebuildSearchTarget) -> String {
match target.reason {
RebuildSearchReason::Missing => format!("{} is missing", target.label),
RebuildSearchReason::Stale { stale_files } => {
let file_suffix = if stale_files == 1 { "" } else { "s" };
format!(
"{} is stale ({} file{})",
target.label, stale_files, file_suffix
)
}
}
}
fn rebuild_search_targets_message(rebuild_targets: &[RebuildSearchTarget]) -> String {
if rebuild_targets.len() == 1 {
let target = &rebuild_targets[0];
return format!(
"{}. Run `{}` to rebuild before retrying.",
rebuild_search_target_detail(target),
target.reindex_cmd
);
}
let summary: Vec<String> = rebuild_targets
.iter()
.take(3)
.map(rebuild_search_target_detail)
.collect();
let overflow = rebuild_targets.len().saturating_sub(summary.len());
let mut details = summary.join(", ");
if overflow > 0 {
details.push_str(&format!(", +{} more", overflow));
}
let reindex_cmd = rebuild_targets[0].reindex_cmd.clone();
format!(
"{} indexes need rebuild: {}. Run `{}` to rebuild before retrying.",
rebuild_targets.len(),
details,
reindex_cmd
)
}
pub(crate) fn precheck_search_indexes(
root: &Path,
path_hint: &Path,
scope: Option<&str>,
federated: bool,
autoindex: bool,
) -> Result<SearchPrecheck> {
let targets = resolve_search_index_targets(root, path_hint, scope, federated)?;
let mut stale_targets = Vec::new();
let mut degraded_targets = Vec::new();
for target in &targets {
match inspect_search_index(target)? {
SearchIndexState::Missing => {
if autoindex && let Err(err) = apply_search_index_update(root, target) {
if is_active_writer_lock_error(&err) {
degraded_targets
.push(degraded_search_target(target, RebuildSearchReason::Missing));
} else {
return Err(err);
}
}
}
SearchIndexState::Fresh => {}
SearchIndexState::Stale { stale_files } => {
if autoindex {
if let Err(err) = apply_search_index_update(root, target) {
if is_active_writer_lock_error(&err) {
degraded_targets.push(degraded_search_target(
target,
RebuildSearchReason::Stale { stale_files },
));
} else {
return Err(err);
}
}
} else {
stale_targets.push(RebuildSearchTarget {
label: target.label.clone(),
reason: RebuildSearchReason::Stale { stale_files },
reindex_cmd: target.reindex_cmd.clone(),
});
}
}
}
}
if stale_targets.is_empty() {
return Ok(SearchPrecheck {
targets,
degraded_targets,
});
}
bail!(
"tsift search aborted: {} \
or re-run without `--no-autoindex`.",
rebuild_search_targets_message(&stale_targets),
);
}
pub(crate) fn degraded_search_mode(targets: &[DegradedSearchTarget]) -> Option<DegradedSearchMode> {
if targets.is_empty() {
return None;
}
if targets
.iter()
.all(|target| matches!(target.reason, RebuildSearchReason::Missing))
{
Some(DegradedSearchMode::Exact)
} else {
Some(DegradedSearchMode::ReadOnly)
}
}
fn degraded_search_targets_summary(targets: &[DegradedSearchTarget]) -> String {
if targets.len() == 1 {
let target = &targets[0];
return match target.reason {
RebuildSearchReason::Missing => format!("{} is missing", target.label),
RebuildSearchReason::Stale { stale_files } => {
let file_suffix = if stale_files == 1 { "" } else { "s" };
format!(
"{} is stale ({} file{})",
target.label, stale_files, file_suffix
)
}
};
}
let missing = targets
.iter()
.filter(|target| matches!(target.reason, RebuildSearchReason::Missing))
.count();
let stale = targets.len().saturating_sub(missing);
let mut parts = Vec::new();
if stale > 0 {
let suffix = if stale == 1 { "" } else { "es" };
parts.push(format!("{stale} stale index{suffix}"));
}
if missing > 0 {
let suffix = if missing == 1 { "" } else { "es" };
parts.push(format!("{missing} missing index{suffix}"));
}
parts.join(", ")
}
pub(crate) fn emit_degraded_search_note(
targets: &[DegradedSearchTarget],
mode: DegradedSearchMode,
) {
let summary = degraded_search_targets_summary(targets);
let reindex_cmd = &targets[0].reindex_cmd;
match mode {
DegradedSearchMode::ReadOnly => eprintln!(
"note: active tsift writer detected; skipping autoindex because {}. \
Continuing with read-only search and the current index snapshot; symbol hits may lag. \
Retry `{}` after the active writer finishes for fresh index results.",
summary, reindex_cmd
),
DegradedSearchMode::Exact => eprintln!(
"note: active tsift writer detected; skipping autoindex because {}. \
Continuing with exact live-file search. Retry `{}` after the active writer finishes \
for indexed symbol hits.",
summary, reindex_cmd
),
}
}
fn search_timeout_message(
timeout_secs: u64,
strategy: &str,
targets: &[SearchIndexTarget],
) -> Result<String> {
let rebuild_targets = collect_rebuild_search_targets(targets)?;
if rebuild_targets.is_empty() {
return Ok(format!(
"tsift search timed out after {}s (strategy: {}). \
The search root looks fresh, so reindexing is unlikely to help. \
Re-run with `--timeout 0` to disable the timeout, narrow `--path` / `--scope`, \
or try a different strategy.",
timeout_secs, strategy,
));
}
Ok(format!(
"tsift search timed out after {}s (strategy: {}). {}",
timeout_secs,
strategy,
rebuild_search_targets_message(&rebuild_targets),
))
}
fn is_exact_preferring_query_char(ch: char) -> bool {
matches!(ch, '-' | '_' | '/' | '\\' | '.' | ':' | '#' | '@')
}
fn query_prefers_exact_search(query: &str) -> bool {
let trimmed = query.trim();
!trimmed.is_empty()
&& !trimmed.chars().any(char::is_whitespace)
&& trimmed.chars().any(|ch| ch.is_alphanumeric())
&& trimmed.chars().any(is_exact_preferring_query_char)
&& trimmed
.chars()
.all(|ch| ch.is_alphanumeric() || is_exact_preferring_query_char(ch))
}
pub(crate) fn resolve_search_strategy(query: &str, strategy: Option<String>) -> String {
strategy.unwrap_or_else(|| {
if query_prefers_exact_search(query) {
"exact".to_string()
} else {
"lexical".to_string()
}
})
}
#[derive(Serialize)]
struct SearchBudgetSymbolPreview {
handle: String,
#[serde(skip_serializing_if = "Option::is_none")]
tag_alias: Option<String>,
match_type: String,
kind: String,
name: String,
file: String,
line: i64,
score: f64,
match_count: usize,
surface_count: usize,
file_count: usize,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
surface_examples: Vec<String>,
expand: String,
}
#[derive(Serialize)]
struct SearchBudgetHitPreview {
handle: String,
rank: usize,
path: String,
confidence: String,
score: f64,
preview: String,
expand: String,
}
#[derive(Serialize)]
struct SearchScaleSignals {
preview_symbols: usize,
symbol_families: usize,
raw_symbol_matches: usize,
preview_hits: usize,
returned_hits: usize,
indexed_artifacts: usize,
skipped_artifacts: usize,
max_items: usize,
max_bytes: usize,
}
#[derive(Serialize)]
struct SearchScaleGuard {
level: String,
warning: String,
signals: SearchScaleSignals,
narrow_commands: Vec<String>,
}
#[derive(Serialize)]
struct SearchBudgetReport {
query: String,
strategy: String,
indexed_artifacts: usize,
skipped_artifacts: usize,
max_items: usize,
max_bytes: usize,
symbol_total: usize,
raw_symbol_total: usize,
hit_total: usize,
truncated: bool,
#[serde(skip_serializing_if = "Option::is_none")]
scale_guard: Option<SearchScaleGuard>,
symbols: Vec<SearchBudgetSymbolPreview>,
hits: Vec<SearchBudgetHitPreview>,
}
const SEARCH_BUDGET_SURFACE_PREVIEW_LIMIT: usize = 3;
struct SearchBudgetSymbolFamily {
canonical_family: Option<String>,
canonical_tag_alias: Option<String>,
representative_name: String,
representative_kind: String,
representative_match_type: String,
representative_file: String,
representative_line: i64,
representative_score: f64,
seen_surfaces: HashSet<String>,
seen_files: HashSet<String>,
surface_examples: Vec<String>,
match_count: usize,
}
fn search_budget_family_query(tag_alias: Option<&str>, fallback_name: &str) -> String {
if let Some(alias) = tag_alias
&& let Some(query) = family_query_from_tag_alias(alias)
{
return query;
}
fallback_name.to_string()
}
fn build_search_budget_family_expand(
strategy: &str,
path: &str,
tag_alias: Option<&str>,
fallback_name: &str,
) -> String {
let query = search_budget_family_query(tag_alias, fallback_name);
let effective_strategy = if strategy == "exact" {
"lexical"
} else {
strategy
};
build_search_budget_follow_up(&query, effective_strategy, path)
}
fn format_search_budget_symbol_name(name: &str, surface_count: usize, max_bytes: usize) -> String {
let preview = if surface_count > 1 {
let extra = surface_count - 1;
let label = if extra == 1 { "variant" } else { "variants" };
format!("{name} (+{extra} {label})")
} else {
name.to_string()
};
truncate_for_budget(&preview, max_bytes)
}
fn format_search_budget_symbol_file(file: &str, file_count: usize, max_bytes: usize) -> String {
let preview = if file_count > 1 {
let extra = file_count - 1;
let label = if extra == 1 { "file" } else { "files" };
format!("{file} (+{extra} {label})")
} else {
file.to_string()
};
truncate_for_budget(&preview, max_bytes)
}
pub(crate) fn build_search_budget_follow_up(query: &str, strategy: &str, path: &str) -> String {
let mut command = format!(
"tsift search {} --path {} --limit 20",
shell_quote(query),
shell_quote(path)
);
if strategy == "exact" {
command.push_str(" --exact");
} else if strategy != "lexical" {
command.push_str(&format!(" --strategy {}", shell_quote(strategy)));
}
command
}
fn build_search_exact_narrow_command(query: &str, path: &str, max_items: usize) -> String {
format!(
"tsift search {} --path {} --limit {} --exact",
shell_quote(query),
shell_quote(path),
max_items.max(1)
)
}
fn build_search_path_narrow_command(query: &str, strategy: &str, path: &str) -> String {
let mut command = format!(
"tsift search {} --path {} --limit 20",
shell_quote(query),
shell_quote(path)
);
if strategy == "exact" {
command.push_str(" --exact");
} else if strategy != "lexical" {
command.push_str(&format!(" --strategy {}", shell_quote(strategy)));
}
command
}
#[allow(clippy::too_many_arguments)]
fn build_search_scale_guard(
query: &str,
strategy: &str,
root: &Path,
response: &sift::SearchResponse,
symbol_total: usize,
raw_symbol_total: usize,
hit_total: usize,
max_items: usize,
max_bytes: usize,
symbols: &[SearchBudgetSymbolPreview],
hits: &[SearchBudgetHitPreview],
) -> Option<SearchScaleGuard> {
let broad_symbols = symbol_total > max_items || raw_symbol_total > max_items;
let broad_hits = hit_total > max_items;
let broad_corpus = response
.indexed_artifacts
.saturating_add(response.skipped_artifacts)
>= 250;
if !broad_symbols && !broad_hits && !broad_corpus {
return None;
}
let mut narrow_commands = Vec::new();
let root_path = root.to_string_lossy();
if strategy != "exact" {
narrow_commands.push(build_search_exact_narrow_command(
query,
root_path.as_ref(),
max_items,
));
}
if let Some(symbol) = symbols.first() {
narrow_commands.push(symbol.expand.clone());
}
if let Some(hit) = hits.first() {
narrow_commands.push(build_search_path_narrow_command(query, strategy, &hit.path));
}
narrow_commands.push(
"tsift workflow search --json # preserve handles, expand only cited parents".to_string(),
);
Some(SearchScaleGuard {
level: if broad_hits || broad_symbols {
"high-hit".to_string()
} else {
"corpus-size".to_string()
},
warning: "Broad search surface: inspect the preview first and run a narrowing command before dispatching parallel agents."
.to_string(),
signals: SearchScaleSignals {
preview_symbols: symbols.len(),
symbol_families: symbol_total,
raw_symbol_matches: raw_symbol_total,
preview_hits: hits.len(),
returned_hits: hit_total,
indexed_artifacts: response.indexed_artifacts,
skipped_artifacts: response.skipped_artifacts,
max_items,
max_bytes,
},
narrow_commands: dedupe_preserve_order(narrow_commands),
})
}
pub(crate) fn build_search_budget_report(
query: &str,
strategy: &str,
root: &Path,
response: &sift::SearchResponse,
symbol_hits: &[index::SymbolHit],
absolute: bool,
budget: ResponseBudget,
) -> SearchBudgetReport {
let max_items = budget.preview_items();
let max_bytes = budget.preview_bytes();
let raw_symbol_total = symbol_hits.len();
let hit_total = response.hits.len();
let mut family_positions = HashMap::new();
let mut families = Vec::new();
for hit in symbol_hits {
let display_file = if absolute {
hit.file.clone()
} else {
relativize(&hit.file, root)
};
let canonical_family = canonical_tag_family_from_symbol(&hit.name, hit.tags.as_deref());
let family_key = canonical_family
.as_ref()
.map(|family| family.canonical.clone())
.unwrap_or_else(|| hit.name.clone());
let position = *family_positions.entry(family_key).or_insert_with(|| {
families.push(SearchBudgetSymbolFamily {
canonical_family: canonical_family
.as_ref()
.map(|family| family.canonical.clone()),
canonical_tag_alias: canonical_family
.as_ref()
.map(|family| family.tag_alias.clone()),
representative_name: hit.name.clone(),
representative_kind: hit.kind.clone(),
representative_match_type: hit.match_type.clone(),
representative_file: display_file.clone(),
representative_line: hit.line,
representative_score: hit.score,
seen_surfaces: HashSet::new(),
seen_files: HashSet::new(),
surface_examples: Vec::new(),
match_count: 0,
});
families.len() - 1
});
let family = &mut families[position];
family.match_count += 1;
if family.seen_surfaces.insert(hit.name.clone())
&& family.surface_examples.len() < SEARCH_BUDGET_SURFACE_PREVIEW_LIMIT
{
family
.surface_examples
.push(truncate_for_budget(&hit.name, max_bytes));
}
family.seen_files.insert(display_file);
}
let symbol_total = families.len();
let symbols: Vec<SearchBudgetSymbolPreview> = families
.into_iter()
.take(max_items)
.map(|family| {
let file_count = family.seen_files.len();
let surface_count = family.seen_surfaces.len();
let key = format!(
"{}:{}:{}:{}:{}:{}:{}",
family
.canonical_family
.as_deref()
.or(family.canonical_tag_alias.as_deref())
.unwrap_or(&family.representative_name),
family.canonical_tag_alias.as_deref().unwrap_or(""),
family.representative_kind,
family.representative_file,
family.representative_line,
query,
strategy
);
SearchBudgetSymbolPreview {
handle: stable_handle("sfam", &key),
tag_alias: family
.canonical_tag_alias
.as_deref()
.map(|alias| truncate_for_budget(alias, max_bytes)),
match_type: family.representative_match_type,
kind: family.representative_kind,
name: format_search_budget_symbol_name(
&family.representative_name,
surface_count,
max_bytes,
),
file: format_search_budget_symbol_file(
&family.representative_file,
file_count,
max_bytes,
),
line: family.representative_line,
score: family.representative_score,
match_count: family.match_count,
surface_count,
file_count,
surface_examples: family.surface_examples,
expand: build_search_budget_family_expand(
strategy,
root.to_string_lossy().as_ref(),
family.canonical_tag_alias.as_deref(),
&family.representative_name,
),
}
})
.collect();
let hits: Vec<SearchBudgetHitPreview> = response
.hits
.iter()
.take(max_items)
.map(|hit| {
let display_path = if absolute {
hit.path.clone()
} else {
relativize(&hit.path, root)
};
let key = format!("{}:{}:{}:{}", display_path, hit.rank, hit.score, query);
let preview = compact_snippet(&hit.snippet)
.map(|snippet| truncate_for_budget(&snippet, max_bytes))
.unwrap_or_default();
SearchBudgetHitPreview {
handle: stable_handle("shit", &key),
rank: hit.rank,
path: truncate_for_budget(&display_path, max_bytes),
confidence: format!("{:?}", hit.confidence),
score: hit.score,
preview,
expand: build_search_budget_follow_up(query, strategy, &display_path),
}
})
.collect();
let scale_guard = build_search_scale_guard(
query,
strategy,
root,
response,
symbol_total,
raw_symbol_total,
hit_total,
max_items,
max_bytes,
&symbols,
&hits,
);
SearchBudgetReport {
query: query.to_string(),
strategy: strategy.to_string(),
indexed_artifacts: response.indexed_artifacts,
skipped_artifacts: response.skipped_artifacts,
max_items,
max_bytes,
symbol_total,
raw_symbol_total,
hit_total,
truncated: symbol_total > max_items || hit_total > max_items,
scale_guard,
symbols,
hits,
}
}
pub(crate) fn print_search_budget_human(report: &SearchBudgetReport) {
println!(
"search-budget q:{} strategy:{} symbols:{}/{} raw-symbols:{} hits:{}/{} indexed:{} skipped:{}",
shell_quote(&report.query),
report.strategy,
report.symbols.len(),
report.symbol_total,
report.raw_symbol_total,
report.hits.len(),
report.hit_total,
report.indexed_artifacts,
report.skipped_artifacts
);
for symbol in &report.symbols {
let variants = if symbol.surface_examples.is_empty() {
String::new()
} else {
format!(" variants:{}", symbol.surface_examples.join(", "))
};
println!(
"sym {} [{}] {} {}:{} sc:{} matches:{} files:{}{} expand:{}",
format_symbol_preview_line(&symbol.handle, &symbol.name, symbol.tag_alias.as_deref()),
symbol.match_type,
symbol.kind,
symbol.file,
symbol.line,
format_score(symbol.score, true),
symbol.match_count,
symbol.file_count,
variants,
symbol.expand
);
}
for hit in &report.hits {
if hit.preview.is_empty() {
println!(
"hit {} #{} {} [{} {}] expand:{}",
hit.handle,
hit.rank,
hit.path,
hit.confidence,
format_score(hit.score, true),
hit.expand
);
} else {
println!(
"hit {} #{} {} [{} {}] {} expand:{}",
hit.handle,
hit.rank,
hit.path,
hit.confidence,
format_score(hit.score, true),
hit.preview,
hit.expand
);
}
}
if report.truncated {
println!(
"budget truncated items:{} bytes:{}",
report.max_items, report.max_bytes
);
}
if let Some(guard) = &report.scale_guard {
println!("scale guard [{}]: {}", guard.level, guard.warning);
println!(
"signals preview-symbols:{} symbol-families:{} raw-symbols:{} preview-hits:{} hits:{} indexed:{} skipped:{} budget-items:{} budget-bytes:{}",
guard.signals.preview_symbols,
guard.signals.symbol_families,
guard.signals.raw_symbol_matches,
guard.signals.preview_hits,
guard.signals.returned_hits,
guard.signals.indexed_artifacts,
guard.signals.skipped_artifacts,
guard.signals.max_items,
guard.signals.max_bytes
);
for command in &guard.narrow_commands {
println!("narrow: {command}");
}
}
}
pub(crate) fn collect_source_files(path: &std::path::Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
if path.is_file() {
files.push(path.to_path_buf());
return Ok(files);
}
let walker = ignore::WalkBuilder::new(path)
.hidden(true)
.git_ignore(true)
.build();
for entry in walker {
let entry = entry?;
if entry.file_type().is_some_and(|ft| ft.is_file()) {
let p = entry.path();
if let Some(ext) = p.extension() {
let ext = ext.to_string_lossy();
if matches!(
ext.as_ref(),
"rs" | "py"
| "ts"
| "tsx"
| "js"
| "jsx"
| "kt"
| "kts"
| "zig"
| "sh"
| "bash"
| "zsh"
) {
files.push(p.to_path_buf());
}
}
}
}
Ok(files)
}
#[cfg(test)]
mod tests {
use super::*;
use std::cell::RefCell;
use substrate::{ConvexEdgeRow, ConvexGraphClient, ConvexGraphStore, ConvexNodeRow};
fn parse_cli<I, T>(itr: I) -> Cli
where
I: IntoIterator<Item = T> + Send + 'static,
T: Into<std::ffi::OsString> + Clone + Send + 'static,
{
std::thread::Builder::new()
.name("cli-parse".to_string())
.stack_size(16 * 1024 * 1024)
.spawn(move || Cli::parse_from(itr))
.unwrap()
.join()
.unwrap()
}
fn try_parse_cli<I, T>(itr: I) -> std::result::Result<Cli, clap::Error>
where
I: IntoIterator<Item = T> + Send + 'static,
T: Into<std::ffi::OsString> + Clone + Send + 'static,
{
std::thread::Builder::new()
.name("cli-try-parse".to_string())
.stack_size(16 * 1024 * 1024)
.spawn(move || Cli::try_parse_from(itr))
.unwrap()
.join()
.unwrap()
}
#[derive(Default)]
struct MemoryConvexGraphClient {
nodes: RefCell<BTreeMap<String, ConvexNodeRow>>,
edges: RefCell<BTreeMap<String, ConvexEdgeRow>>,
}
impl ConvexGraphClient for MemoryConvexGraphClient {
fn upsert_node_row(&self, row: &ConvexNodeRow) -> Result<()> {
self.nodes
.borrow_mut()
.insert(row.external_id.clone(), row.clone());
Ok(())
}
fn upsert_edge_row(&self, row: &ConvexEdgeRow) -> Result<()> {
self.edges
.borrow_mut()
.insert(row.edge_key.clone(), row.clone());
Ok(())
}
fn delete_node_row(&self, external_id: &str) -> Result<usize> {
Ok(usize::from(
self.nodes.borrow_mut().remove(external_id).is_some(),
))
}
fn delete_edge_row(&self, edge_key: &str) -> Result<usize> {
Ok(usize::from(
self.edges.borrow_mut().remove(edge_key).is_some(),
))
}
fn node_row(&self, external_id: &str) -> Result<Option<ConvexNodeRow>> {
Ok(self.nodes.borrow().get(external_id).cloned())
}
fn node_rows(&self) -> Result<Vec<ConvexNodeRow>> {
Ok(self.nodes.borrow().values().cloned().collect())
}
fn edge_rows(&self) -> Result<Vec<ConvexEdgeRow>> {
Ok(self.edges.borrow().values().cloned().collect())
}
fn node_rows_by_kind(&self, kind: &str) -> Result<Vec<ConvexNodeRow>> {
Ok(self
.nodes
.borrow()
.values()
.filter(|row| row.kind == kind)
.cloned()
.collect())
}
fn outgoing_edge_rows(
&self,
from_external_id: &str,
kind: Option<&str>,
) -> Result<Vec<ConvexEdgeRow>> {
Ok(self
.edges
.borrow()
.values()
.filter(|row| row.from_external_id == from_external_id)
.filter(|row| kind.is_none_or(|kind| row.kind == kind))
.cloned()
.collect())
}
}
fn init_git_repo(path: &Path) {
let status = std::process::Command::new("git")
.args(["init"])
.current_dir(path)
.status()
.unwrap();
assert!(status.success(), "git init failed");
let status = std::process::Command::new("git")
.args(["add", "."])
.current_dir(path)
.status()
.unwrap();
assert!(status.success(), "git add failed");
let status = std::process::Command::new("git")
.args([
"-c",
"user.name=tsift-tests",
"-c",
"user.email=tsift-tests@example.com",
"commit",
"--quiet",
"-m",
"init",
])
.current_dir(path)
.status()
.unwrap();
assert!(status.success(), "git commit failed");
}
fn write_empty_root_index(root: &Path) {
let index_dir = root.join(".tsift");
fs::create_dir_all(&index_dir).unwrap();
fs::write(index_dir.join("index.db"), "").unwrap();
}
fn write_repeated_lines(path: &Path, line: &str, lines: usize) -> PathBuf {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
let body = std::iter::repeat_n(line, lines)
.collect::<Vec<_>>()
.join("\n");
fs::write(path, format!("{body}\n")).unwrap();
path.to_path_buf()
}
// --- classify_task ---
#[test]
fn route_search_defaults_to_haiku() {
let (tier, model) = classify_task("find all uses of authenticate");
assert_eq!(tier, "haiku");
assert!(
model.contains("haiku"),
"expected haiku model, got {}",
model
);
}
#[test]
fn route_edit_keywords_to_sonnet() {
for kw in &[
"edit the file",
"fix the bug",
"update the config",
"remove dead code",
"create a new module",
] {
let (tier, _) = classify_task(kw);
assert_eq!(tier, "sonnet", "expected sonnet for {:?}", kw);
}
}
#[test]
fn route_architecture_keywords_to_opus() {
for kw in &[
"design the API",
"architecture review",
"plan the migration",
"analyze the system",
"evaluate trade-offs",
] {
let (tier, _) = classify_task(kw);
assert_eq!(tier, "opus", "expected opus for {:?}", kw);
}
}
#[test]
fn route_architecture_beats_edit() {
// "design and implement" — architecture signal wins (checked first)
let (tier, _) = classify_task("design and implement the new auth service");
assert_eq!(tier, "opus");
}
#[test]
fn cli_accepts_global_compact_flag() {
let cli = parse_cli(["tsift", "--compact", "status"]);
assert!(cli.compact);
assert!(matches!(cli.command, Some(Commands::Status { .. })));
}
#[test]
fn summarize_diff_scope_matches_relative_directory() {
let root = Path::new("/repo");
let extract_scope = resolve_extract_scope(root, Path::new("src/feature")).unwrap();
assert!(summarize_diff_matches_scope(
Path::new("/repo/src/feature/main.rs"),
&extract_scope
));
assert!(!summarize_diff_matches_scope(
Path::new("/repo/src/other/main.rs"),
&extract_scope
));
}
#[test]
fn summarize_diff_scope_matches_relative_file() {
let root = Path::new("/repo");
let extract_scope = resolve_extract_scope(root, Path::new("src/feature/main.rs")).unwrap();
assert!(summarize_diff_matches_scope(
Path::new("/repo/src/feature/main.rs"),
&extract_scope
));
assert!(!summarize_diff_matches_scope(
Path::new("/repo/src/feature/lib.rs"),
&extract_scope
));
}
#[test]
fn summarize_extract_scope_walks_relative_paths_from_root() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let main_rs = source_dir.join("main.rs");
std::fs::write(&main_rs, "fn alpha() {}\n").unwrap();
let extract_scope = resolve_extract_scope(dir.path(), Path::new("src")).unwrap();
let files = collect_source_files(&extract_scope).unwrap();
assert_eq!(files, vec![main_rs]);
}
#[test]
fn summarize_extract_base_uses_nested_path_instead_of_project_root() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
std::fs::write(dir.path().join("root.rs"), "fn root_level() {}\n").unwrap();
let nested_file = nested.join("main.rs");
std::fs::write(&nested_file, "fn nested_only() {}\n").unwrap();
let extract_base = resolve_extract_base(&nested).unwrap();
let extract_scope = resolve_extract_scope(&extract_base, Path::new(".")).unwrap();
let files = collect_source_files(&extract_scope).unwrap();
assert_eq!(extract_scope, nested);
assert_eq!(files, vec![nested_file]);
}
#[test]
fn summarize_extract_base_uses_parent_of_file_path() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let file_path = nested.join("main.rs");
std::fs::write(&file_path, "fn nested_only() {}\n").unwrap();
let extract_base = resolve_extract_base(&file_path).unwrap();
assert_eq!(extract_base, nested);
}
#[test]
fn summarize_extract_scope_normalizes_dotdot_segments() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let extract_scope = resolve_extract_scope(dir.path(), Path::new("src/../src")).unwrap();
assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
assert!(summarize_diff_matches_scope(
&source_dir.join("main.rs"),
&extract_scope
));
}
#[cfg(unix)]
#[test]
fn summarize_extract_scope_canonicalizes_absolute_symlink_paths() {
use std::os::unix::fs::symlink;
let dir = tempfile::tempdir().unwrap();
let real_root = dir.path().join("real");
let source_dir = real_root.join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let symlink_scope = dir.path().join("scope-link");
symlink(&source_dir, &symlink_scope).unwrap();
let extract_scope = resolve_extract_scope(&real_root, &symlink_scope).unwrap();
assert_eq!(extract_scope, source_dir.canonicalize().unwrap());
assert!(summarize_diff_matches_scope(
&source_dir.join("lib.rs"),
&extract_scope
));
}
#[test]
fn summarize_diff_extract_includes_untracked_files() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
init_git_repo(dir.path());
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let new_file = source_dir.join("new.rs");
std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
let files = summarize::git_changed_files(dir.path()).unwrap();
assert_eq!(files.existing, vec![new_file]);
assert!(files.deleted.is_empty());
}
#[test]
fn summarize_diff_extract_treats_unborn_head_as_untracked_only() {
let dir = tempfile::tempdir().unwrap();
let status = std::process::Command::new("git")
.args(["init"])
.current_dir(dir.path())
.status()
.unwrap();
assert!(status.success(), "git init failed");
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let new_file = source_dir.join("new.rs");
std::fs::write(&new_file, "fn alpha_helper() {}\n").unwrap();
let files = summarize::git_changed_files(dir.path()).unwrap();
assert_eq!(files.existing, vec![new_file]);
assert!(files.deleted.is_empty());
}
#[test]
fn summarize_diff_extract_tracks_deleted_files() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let deleted_file = source_dir.join("gone.rs");
std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
init_git_repo(dir.path());
std::fs::remove_file(&deleted_file).unwrap();
let files = summarize::git_changed_files(dir.path()).unwrap();
assert!(files.existing.is_empty());
assert_eq!(files.deleted, vec![deleted_file]);
}
#[test]
fn summarize_diff_extract_tracks_git_renames() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let old_file = source_dir.join("old.rs");
let new_file = source_dir.join("new.rs");
std::fs::write(&old_file, "fn stale() {}\n").unwrap();
init_git_repo(dir.path());
let status = std::process::Command::new("git")
.args(["mv", "src/old.rs", "src/new.rs"])
.current_dir(dir.path())
.status()
.unwrap();
assert!(status.success(), "git mv failed");
let files = summarize::git_changed_files(dir.path()).unwrap();
assert_eq!(files.existing, vec![new_file]);
assert_eq!(files.deleted, vec![old_file]);
}
#[test]
fn summarize_diff_extract_deletes_removed_summary_rows() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let deleted_file = source_dir.join("gone.rs");
std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
init_git_repo(dir.path());
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "stale".to_string(),
file_path: "src/gone.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "stale summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
std::fs::remove_file(&deleted_file).unwrap();
cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
true,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap();
assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
}
#[test]
fn summarize_diff_extract_deletes_renamed_summary_rows() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let old_file = source_dir.join("old.rs");
std::fs::write(&old_file, "fn stale() {}\n").unwrap();
std::fs::write(dir.path().join("README.md"), "# repo\n").unwrap();
init_git_repo(dir.path());
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "stale".to_string(),
file_path: "src/old.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "stale summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
let status = std::process::Command::new("git")
.args(["mv", "src/old.rs", "src/new.rs"])
.current_dir(dir.path())
.status()
.unwrap();
assert!(status.success(), "git mv failed");
cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
true,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap();
assert!(summary_db.get_by_file("src/old.rs").unwrap().is_empty());
}
#[test]
fn summarize_full_extract_deletes_removed_summary_rows_when_scope_is_empty() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let deleted_file = source_dir.join("gone.rs");
std::fs::write(&deleted_file, "fn stale() {}\n").unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "stale".to_string(),
file_path: "src/gone.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "stale summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
std::fs::remove_file(&deleted_file).unwrap();
cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
false,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap();
assert!(summary_db.get_by_file("src/gone.rs").unwrap().is_empty());
}
#[test]
fn summarize_extract_fails_fast_when_summary_writer_lock_is_live() {
let dir = tempfile::tempdir().unwrap();
let source_dir = dir.path().join("src");
std::fs::create_dir_all(&source_dir).unwrap();
let file = source_dir.join("lib.rs");
std::fs::write(&file, "fn helper() {}\n").unwrap();
let content = std::fs::read(&file).unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "lib.rs".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: summarize::content_hash(&content),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "test".to_string(),
tokens_input: Some(100),
tokens_output: Some(50),
})
.unwrap();
drop(summary_db);
let lock_path = summarize::writer_lock_path(&dir.path().join(".tsift/summaries.db"));
let _lock = hold_writer_lock(&lock_path);
let err = cmd_summarize(
None,
None,
Some(PathBuf::from("src")),
false,
false,
dir.path(),
false,
true,
false,
false,
false,
)
.unwrap_err();
let message = err.to_string();
assert!(message.contains("another tsift summarize extractor is already active"));
assert!(message.contains("tsift summarize --extract"));
}
#[test]
fn summarize_stats_fails_closed_when_cache_missing() {
let dir = tempfile::tempdir().unwrap();
let err = cmd_summarize(
None,
None,
None,
false,
true,
dir.path(),
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(
err.to_string().contains("no summaries.db found"),
"got: {err}"
);
assert!(!dir.path().join(".tsift/summaries.db").exists());
}
#[test]
fn summarize_stats_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = tempfile::tempdir().unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "alpha_helper".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "claude-haiku-4-5-20251001".to_string(),
tokens_input: Some(100),
tokens_output: Some(40),
})
.unwrap();
drop(summary_db);
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
let result = cmd_summarize(
None,
None,
None,
false,
true,
dir.path(),
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn summarize_symbol_query_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = tempfile::tempdir().unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "alpha_helper".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "claude-haiku-4-5-20251001".to_string(),
tokens_input: Some(100),
tokens_output: Some(40),
})
.unwrap();
drop(summary_db);
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/summaries.db"));
let result = cmd_summarize(
Some("alpha_helper".to_string()),
None,
None,
false,
false,
dir.path(),
false,
true,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn summarize_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let summary_db =
summarize::SummaryDb::open(&dir.path().join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "alpha_helper".to_string(),
file_path: "src/lib.rs".to_string(),
content_hash: "hash1".to_string(),
summary: "cached summary".to_string(),
entities: None,
relationships: None,
concept_labels: None,
extracted_at: "1700000000".to_string(),
model: "claude-haiku-4-5-20251001".to_string(),
tokens_input: Some(100),
tokens_output: Some(40),
})
.unwrap();
let result = cmd_summarize(
Some("alpha_helper".to_string()),
None,
None,
false,
false,
&nested,
false,
true,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!nested.join(".tsift/summaries.db").exists());
}
#[test]
fn summarize_extract_uses_matching_scoped_index_for_workspace_file() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join(".gitmodules"),
r#"[submodule "src/alpha"]
path = src/alpha
url = https://example.com/alpha
[submodule "src/beta"]
path = src/beta
url = https://example.com/beta
"#,
)
.unwrap();
let alpha_root = dir.path().join("src/alpha");
let beta_root = dir.path().join("src/beta");
std::fs::create_dir_all(alpha_root.join("src")).unwrap();
std::fs::create_dir_all(beta_root.join("src")).unwrap();
std::fs::create_dir_all(dir.path().join(".tsift/indexes/alpha")).unwrap();
std::fs::create_dir_all(dir.path().join(".tsift/indexes/beta")).unwrap();
std::fs::write(alpha_root.join("src/lib.rs"), "fn alpha_helper() {}\n").unwrap();
let beta_file = beta_root.join("src/lib.rs");
std::fs::write(&beta_file, "fn beta_helper() {}\n").unwrap();
std::fs::write(dir.path().join(".tsift/indexes/alpha/index.db"), "").unwrap();
std::fs::write(dir.path().join(".tsift/indexes/beta/index.db"), "").unwrap();
let context = find_symbols_db_for_file(dir.path(), &beta_file)
.unwrap()
.expect("expected matching scoped index");
assert_eq!(
context.db_path,
dir.path().join(".tsift/indexes/beta/index.db")
);
assert_eq!(context.source_root, beta_root);
}
// --- apply_edit_op ---
fn make_op(old: &str, new: &str, replace_all: bool) -> EditOp {
EditOp {
file: PathBuf::from("dummy.txt"),
old: old.to_string(),
new: new.to_string(),
replace_all,
}
}
#[test]
fn edit_replaces_single_occurrence() {
let content = "hello world";
let op = make_op("world", "rust", false);
let (result, count) = apply_edit_op(content, &op).unwrap();
assert_eq!(result, "hello rust");
assert_eq!(count, 1);
}
#[test]
fn edit_replace_all_replaces_every_occurrence() {
let content = "foo foo foo";
let op = make_op("foo", "bar", true);
let (result, count) = apply_edit_op(content, &op).unwrap();
assert_eq!(result, "bar bar bar");
assert_eq!(count, 3);
}
#[test]
fn edit_fails_when_old_not_found() {
let content = "hello world";
let op = make_op("missing", "x", false);
assert!(apply_edit_op(content, &op).is_err());
}
#[test]
fn edit_fails_when_ambiguous_without_replace_all() {
let content = "foo foo";
let op = make_op("foo", "bar", false);
let err = apply_edit_op(content, &op).unwrap_err();
assert!(err.to_string().contains("2 times"), "got: {}", err);
}
#[test]
fn edit_fails_when_old_equals_new() {
let content = "hello";
let op = make_op("hello", "hello", false);
assert!(apply_edit_op(content, &op).is_err());
}
#[test]
fn edit_batch_rolls_back_when_later_swap_fails() {
let dir = tempfile::tempdir().unwrap();
let alpha = dir.path().join("alpha.txt");
let beta = dir.path().join("beta.txt");
fs::write(&alpha, "alpha old\n").unwrap();
fs::write(&beta, "beta old\n").unwrap();
let batch = EditBatch {
edits: vec![
EditOp {
file: alpha.clone(),
old: "old".to_string(),
new: "new".to_string(),
replace_all: false,
},
EditOp {
file: beta.clone(),
old: "old".to_string(),
new: "new".to_string(),
replace_all: false,
},
],
};
let plan = build_edit_plan(&batch).unwrap();
let err = match apply_edit_plan_atomically_inner(plan, |commit_index, _| {
if commit_index == 1 {
bail!("simulated swap failure");
}
Ok(())
}) {
Ok(_) => panic!("expected simulated swap failure"),
Err(err) => err,
};
assert!(err.to_string().contains("simulated swap failure"));
assert_eq!(fs::read_to_string(&alpha).unwrap(), "alpha old\n");
assert_eq!(fs::read_to_string(&beta).unwrap(), "beta old\n");
}
// --- SQL introspection ---
fn setup_test_db() -> (tempfile::NamedTempFile, Connection) {
let tmp = tempfile::NamedTempFile::new().unwrap();
let conn = Connection::open(tmp.path()).unwrap();
conn.execute_batch(
"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT);
INSERT INTO users VALUES (1, 'Alice', 'alice@example.com');
INSERT INTO users VALUES (2, 'Bob', NULL);
CREATE TABLE posts (id INTEGER PRIMARY KEY, user_id INTEGER NOT NULL, title TEXT NOT NULL, body TEXT,
FOREIGN KEY(user_id) REFERENCES users(id));
INSERT INTO posts VALUES (1, 1, 'Hello World', 'First post');
INSERT INTO posts VALUES (2, 1, 'Second', NULL);
INSERT INTO posts VALUES (3, 2, 'Bob post', 'Content here');"
).unwrap();
(tmp, conn)
}
// --- rewrite_command ---
#[test]
fn rewrite_rg_simple_pattern() {
let result = rewrite_command("rg authenticate");
assert_eq!(
result,
Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string(),)
);
}
#[test]
fn rewrite_rg_with_path() {
let result = rewrite_command("rg authenticate src/");
assert_eq!(
result,
Some(
"tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
.to_string()
)
);
}
#[test]
fn rewrite_rg_with_flags_ignored() {
let result = rewrite_command("rg -i authenticate src/");
assert_eq!(
result,
Some(
"tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
.to_string()
)
);
}
#[test]
fn rewrite_rg_with_type_flag() {
// -t rs takes a value, should be skipped; pattern is next positional
let result = rewrite_command("rg -t rs authenticate");
assert_eq!(
result,
Some("tsift --envelope search \"authenticate\" --exact --budget normal".to_string())
);
}
#[test]
fn rewrite_rg_pipe_passthrough() {
// Pipe chains can't be translated — pass through
let result = rewrite_command("rg authenticate | head -5");
assert_eq!(result, None);
}
#[test]
fn rewrite_rg_files_passthrough() {
let result = rewrite_command("rg --files src/tsift .agent-doc logs");
assert_eq!(result, None);
}
#[test]
fn rewrite_find_passthrough() {
let result = rewrite_command("find src/tsift .agent-doc -type f -name '*.rs'");
assert_eq!(result, None);
}
#[test]
fn rewrite_grep_recursive() {
let result = rewrite_command("grep -r authenticate src/");
assert_eq!(
result,
Some(
"tsift --envelope search \"authenticate\" --exact --budget normal --path \"src/\""
.to_string()
)
);
}
#[test]
fn rewrite_grep_non_recursive_passthrough() {
let result = rewrite_command("grep authenticate file.txt");
assert_eq!(result, None);
}
#[test]
fn rewrite_tsift_passthrough() {
let result = rewrite_command("tsift search \"foo\"");
assert_eq!(result, Some("tsift search \"foo\"".to_string()));
}
#[test]
fn rewrite_run_tsift_search_disables_timeout_by_default() {
let result = effective_rewrite_run_command("tsift search hookcaps --exact --path /tmp/x");
assert_eq!(
result,
"tsift search hookcaps --exact --path /tmp/x --timeout 0"
);
}
#[test]
fn rewrite_run_preserves_explicit_search_timeout() {
let result = effective_rewrite_run_command(
"tsift search hookcaps --exact --path /tmp/x --timeout 5",
);
assert_eq!(
result,
"tsift search hookcaps --exact --path /tmp/x --timeout 5"
);
}
#[test]
fn rewrite_unrelated_passthrough() {
let result = rewrite_command("echo cargo build");
assert_eq!(result, None);
}
#[test]
fn rewrite_rg_quoted_pattern() {
let result = rewrite_command("rg \"fn main\"");
assert_eq!(
result,
Some("tsift --envelope search \"fn main\" --exact --budget normal".to_string())
);
}
#[test]
fn rewrite_git_diff_to_diff_digest() {
let result = rewrite_command("git diff");
assert_eq!(result, Some("tsift diff-digest .".to_string()));
}
#[test]
fn rewrite_git_diff_cached_to_diff_digest() {
let result = rewrite_command("git diff --cached");
assert_eq!(result, Some("tsift diff-digest --cached .".to_string()));
}
#[test]
fn rewrite_git_diff_with_path_to_diff_digest() {
let result = rewrite_command("git diff -- src/");
assert_eq!(result, Some("tsift diff-digest \"src/\"".to_string()));
}
#[test]
fn rewrite_git_diff_with_revision_passthrough() {
let result = rewrite_command("git diff HEAD~1");
assert_eq!(result, None);
}
#[test]
fn rewrite_git_show_to_revision_diff_digest() {
let result = rewrite_command("git show HEAD~1");
assert_eq!(
result,
Some("tsift diff-digest --revision \"HEAD~1\" .".to_string())
);
}
#[test]
fn rewrite_git_log_patch_history_to_revision_diff_digest() {
let result = rewrite_command("git log -p -1 HEAD~2");
assert_eq!(
result,
Some("tsift diff-digest --revision \"HEAD~2\" .".to_string())
);
}
#[test]
fn rewrite_cat_long_agent_doc_session_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("tsift.md");
let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
for index in 0..90 {
body.push_str(&format!("❯ prompt {index}?\n"));
}
fs::write(&session, body).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source markdown",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_head_long_claude_jsonl_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("session.jsonl");
let line =
r#"{"message":{"role":"assistant","content":[{"type":"text","text":"❯ do [#yyhd]"}]}}"#;
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!(
"head -n 120 {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source claude-jsonl",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_head_long_codex_jsonl_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("codex.jsonl");
let line = r#"{"type":"event_msg","payload":{"type":"user_message","message":"do [#cdxlog]. spec-test-build-install-commit-push"}}"#;
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!(
"head -n 120 {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source codex-jsonl",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_small_transcript_window_passthrough() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("session.jsonl");
let line = r#"{"message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}"#;
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!(
"tail -n 20 {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(result, None);
}
#[test]
fn rewrite_sed_large_agent_doc_range_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("tsift.md");
let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
for index in 0..120 {
body.push_str(&format!("### Re: topic {index}\n"));
}
fs::write(&session, body).unwrap();
let result = rewrite_command(&format!(
"sed -n '1,120p' {}",
shell_quote(session.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source markdown",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_cat_large_agent_doc_log_to_session_digest() {
let dir = tempfile::tempdir().unwrap();
let session = dir.path().join("tsift.log");
let line = "[1776528398] claude_start mode=fresh_restart restart_count=1";
let body = std::iter::repeat_n(line, 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&session, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source agent-doc-log",
shell_quote(&resolve_digest_context_path(&session)),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_session_reads_prefer_submodule_root_for_digest_path() {
let dir = tempfile::tempdir().unwrap();
fs::write(
dir.path().join(".gitmodules"),
r#"[submodule "src/tsift"]
path = src/tsift
url = https://example.com/tsift
"#,
)
.unwrap();
let submodule = dir.path().join("src/tsift");
fs::create_dir_all(submodule.join("tasks")).unwrap();
fs::write(
submodule.join(".git"),
"gitdir: ../../.git/modules/src/tsift\n",
)
.unwrap();
let session = submodule.join("tasks/plan.md");
let mut body = String::from("---\nagent_doc_session: tsift-v0.1\n---\n\n## Exchange\n");
for index in 0..90 {
body.push_str(&format!("❯ prompt {index}?\n"));
}
fs::write(&session, body).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(session.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift session-digest --path {} --input {} --source markdown",
shell_quote(submodule.to_str().unwrap()),
shell_quote(session.to_str().unwrap())
))
);
}
#[test]
fn rewrite_regular_markdown_read_passthrough() {
let dir = tempfile::tempdir().unwrap();
let readme = dir.path().join("README.md");
let body = std::iter::repeat_n("plain markdown", 120)
.collect::<Vec<_>>()
.join("\n");
fs::write(&readme, format!("{body}\n")).unwrap();
let result = rewrite_command(&format!("cat {}", shell_quote(readme.to_str().unwrap())));
assert_eq!(result, None);
}
#[test]
fn rewrite_cat_large_source_to_source_read_in_indexed_repo() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
assert_eq!(
result,
Some(format!(
"tsift --envelope source-read \"src/lib.rs\" --path {} --start 1 --lines 80 --budget normal",
shell_quote(&dir.path().to_string_lossy())
))
);
}
#[test]
fn rewrite_head_small_source_window_passthrough() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
let result = rewrite_command(&format!(
"head -n 20 {}",
shell_quote(source.to_str().unwrap())
));
assert_eq!(result, None);
}
#[test]
fn rewrite_sed_large_source_range_to_source_read() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
let result = rewrite_command(&format!(
"sed -n '40,160p' {}",
shell_quote(source.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift --envelope source-read \"src/lib.rs\" --path {} --start 40 --lines 121 --budget normal",
shell_quote(&dir.path().to_string_lossy())
))
);
}
#[test]
fn rewrite_tail_large_source_window_preserves_tail_anchor() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 200);
let result = rewrite_command(&format!(
"tail -n 120 {}",
shell_quote(source.to_str().unwrap())
));
assert_eq!(
result,
Some(format!(
"tsift --envelope source-read \"src/lib.rs\" --path {} --start 81 --lines 120 --budget normal",
shell_quote(&dir.path().to_string_lossy())
))
);
}
#[test]
fn rewrite_large_non_source_read_passthrough_even_when_indexed() {
let dir = tempfile::tempdir().unwrap();
write_empty_root_index(dir.path());
let text = write_repeated_lines(&dir.path().join("notes.txt"), "plain text", 120);
let result = rewrite_command(&format!("cat {}", shell_quote(text.to_str().unwrap())));
assert_eq!(result, None);
}
#[test]
fn rewrite_large_source_read_passthrough_without_index() {
let dir = tempfile::tempdir().unwrap();
let source = write_repeated_lines(&dir.path().join("src/lib.rs"), "fn demo() {}", 120);
let result = rewrite_command(&format!("cat {}", shell_quote(source.to_str().unwrap())));
assert_eq!(result, None);
}
#[test]
fn rewrite_cargo_test_to_digest_runner() {
let result = rewrite_command("cargo test --lib");
assert_eq!(
result,
Some(
"tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\"".to_string()
)
);
}
#[test]
fn rewrite_pytest_to_digest_runner() {
let result = rewrite_command("pytest -q tests/test_cli.py");
assert_eq!(
result,
Some(
"tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"pytest -q tests/test_cli.py\" --runner \"pytest\"".to_string()
)
);
}
#[test]
fn rewrite_python_m_pytest_to_digest_runner() {
let result = rewrite_command("python -m pytest tests/test_cli.py");
assert_eq!(
result,
Some(
"tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"python -m pytest tests/test_cli.py\" --runner \"pytest\"".to_string()
)
);
}
#[test]
fn rewrite_cargo_build_to_log_digest_runner() {
let result = rewrite_command("cargo build --release");
assert_eq!(
result,
Some(
"tsift --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\"".to_string()
)
);
}
#[test]
fn rewrite_cargo_install_to_log_digest_runner() {
let result = rewrite_command("cargo install --path . --force");
assert_eq!(
result,
Some(
"tsift --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo install --path . --force\"".to_string()
)
);
}
#[test]
fn rewrite_metacharacter_command_passthrough() {
let result = rewrite_command("cargo test | head");
assert_eq!(result, None);
}
#[test]
fn rewrite_output_cap_detects_search_even_with_global_flag() {
let cap = rewrite_output_cap("tsift --compact search foo").expect("cap");
assert_eq!(cap.max_lines, 50);
assert_eq!(cap.strip_prefix, Some("Strategy:"));
}
#[test]
fn rewrite_output_cap_skips_structured_output() {
assert!(rewrite_output_cap("tsift search foo --json").is_none());
assert!(rewrite_output_cap("tsift --schema graph foo").is_none());
assert!(rewrite_output_cap("tsift --envelope search foo").is_none());
}
#[test]
fn rewrite_output_format_forwards_envelope_to_digest_runner() {
let command = rewrite_command("cargo test --lib").expect("rewrite");
let forwarded = apply_rewrite_output_format(
&command,
OutputFormat {
json_output: true,
compact: false,
pretty: false,
terse: false,
schema: false,
envelope: true,
},
);
assert_eq!(
forwarded,
"tsift --envelope __digest-runner --kind \"test\" --path \".\" --shell-command \"cargo test --lib\" --runner \"cargo\""
);
}
#[test]
fn rewrite_output_format_forwards_json_when_requested() {
let command = rewrite_command("cargo build --release").expect("rewrite");
let forwarded = apply_rewrite_output_format(
&command,
OutputFormat {
json_output: true,
compact: false,
pretty: true,
terse: false,
schema: false,
envelope: false,
},
);
assert_eq!(
forwarded,
"tsift --pretty --envelope __digest-runner --kind \"log\" --path \".\" --shell-command \"cargo build --release\""
);
}
#[test]
fn output_cap_strips_search_header_and_truncates() {
let capped = apply_output_cap(
b"Strategy: exact | Indexed: 0 | Skipped: 0\n\nline1\nline2\nline3\n",
OutputCap {
max_lines: 2,
strip_prefix: Some("Strategy:"),
},
);
assert_eq!(
capped,
"line1\nline2\n... (+1 more lines; rerun the underlying tsift command directly for the full output)\n"
);
}
#[test]
fn sql_schema_overview_lists_tables() {
let (_tmp, conn) = setup_test_db();
let tables = schema_overview(&conn).unwrap();
let names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert_eq!(names, &["posts", "users"]);
}
#[test]
fn sql_schema_overview_row_counts() {
let (_tmp, conn) = setup_test_db();
let tables = schema_overview(&conn).unwrap();
let users = tables.iter().find(|t| t.name == "users").unwrap();
let posts = tables.iter().find(|t| t.name == "posts").unwrap();
assert_eq!(users.row_count, 2);
assert_eq!(posts.row_count, 3);
}
#[test]
fn sql_table_columns_metadata() {
let (_tmp, conn) = setup_test_db();
let cols = table_columns(&conn, "users").unwrap();
assert_eq!(cols.len(), 3);
assert_eq!(cols[0].name, "id");
assert!(cols[0].pk);
assert_eq!(cols[1].name, "name");
assert!(cols[1].notnull);
assert_eq!(cols[2].name, "email");
assert!(!cols[2].notnull);
}
#[test]
fn sql_execute_query_returns_rows() {
let (_tmp, conn) = setup_test_db();
let (columns, rows) =
execute_query(&conn, "SELECT name, email FROM users ORDER BY id").unwrap();
assert_eq!(columns, &["name", "email"]);
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0], serde_json::json!("Alice"));
assert_eq!(rows[0][1], serde_json::json!("alice@example.com"));
assert_eq!(rows[1][1], serde_json::Value::Null);
}
#[test]
fn sql_execute_query_aggregate() {
let (_tmp, conn) = setup_test_db();
let (columns, rows) = execute_query(&conn, "SELECT COUNT(*) as cnt FROM posts").unwrap();
assert_eq!(columns, &["cnt"]);
assert_eq!(rows[0][0], serde_json::json!(3));
}
#[test]
fn sql_execute_query_join() {
let (_tmp, conn) = setup_test_db();
let (_cols, rows) = execute_query(
&conn,
"SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id ORDER BY p.id",
)
.unwrap();
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0], serde_json::json!("Alice"));
assert_eq!(rows[2][0], serde_json::json!("Bob"));
}
#[test]
fn sql_open_db_read_only() {
let (tmp, _conn) = setup_test_db();
drop(_conn);
let ro_conn = open_db(tmp.path()).unwrap();
let result = ro_conn.execute("INSERT INTO users VALUES (99, 'Fail', NULL)", []);
assert!(result.is_err(), "read-only connection should reject writes");
}
#[test]
fn sql_empty_table_schema() {
let tmp = tempfile::NamedTempFile::new().unwrap();
let conn = Connection::open(tmp.path()).unwrap();
conn.execute_batch("CREATE TABLE empty_tbl (id INTEGER PRIMARY KEY, data BLOB)")
.unwrap();
let tables = schema_overview(&conn).unwrap();
assert_eq!(tables[0].row_count, 0);
assert_eq!(tables[0].columns.len(), 2);
}
// --- graph command ---
fn setup_graph_index() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
dir
}
fn setup_traversal_project() -> tempfile::TempDir {
let dir = setup_graph_index();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-v0.1
agent_doc_format: template
---
## Exchange
<!-- agent:exchange patch=append -->
❯ do [#kgnv]
Completed `#kgnv`; touched files `main.rs`; tests `cargo test traversal_graph`; follow-up `#gfix`.
<!-- /agent:exchange -->
<!-- agent:queue -->
dispatch #spec-test-build-install-commit-push
- do [#kgnv]
<!-- /agent:queue -->
## Backlog
<!-- agent:backlog -->
- [ ] [#kgnv] Fix helper traversal handles while preserving graph navigation.
<!-- /agent:backlog -->
"#,
)
.unwrap();
dir
}
fn setup_dependency_dag_project() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
"fn shared_helper() {}\nfn main() { shared_helper(); }\n",
)
.unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[package]\nname = \"dag-fixture\"\n",
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-dag
agent_doc_format: template
---
## Exchange
<!-- agent:exchange patch=append -->
Completed `#alpha`; touched files `main.rs`; tests `cargo test dependency_dag`; follow-up `#gamma`.
<!-- /agent:exchange -->
## Backlog
<!-- agent:backlog -->
- [ ] [#prep] Prepare Cargo.toml configuration before shared helper work.
- [ ] [#alpha] Update shared_helper in main.rs after #prep.
- [ ] [#beta] Refactor shared_helper tests in main.rs.
- [ ] [#gamma] Follow-up review for graph navigation.
<!-- /agent:backlog -->
"#,
)
.unwrap();
dir
}
fn setup_dependency_dag_cycle_project() -> tempfile::TempDir {
let dir = setup_graph_index();
let task_dir = dir.path().join("tasks/software");
std::fs::create_dir_all(&task_dir).unwrap();
std::fs::write(
task_dir.join("tsift.md"),
r#"---
agent_doc_session: tsift-dag-cycle
agent_doc_format: template
---
## Backlog
<!-- agent:backlog -->
- [ ] [#left] Left side depends on #right.
- [ ] [#right] Right side depends on #left.
<!-- /agent:backlog -->
"#,
)
.unwrap();
dir
}
fn seed_traversal_semantic_summaries(dir: &Path) {
let summary_db = summarize::SummaryDb::open(&dir.join(".tsift/summaries.db")).unwrap();
summary_db
.insert(&summarize::Summary {
id: 0,
symbol_name: "helper".to_string(),
file_path: "main.rs".to_string(),
content_hash: "hash-main".to_string(),
summary: "helper builds graph navigation handles for traversal.".to_string(),
entities: Some(vec![
summarize::Entity {
name: "helper".to_string(),
kind: "function".to_string(),
description: "Builds graph navigation handles.".to_string(),
},
summarize::Entity {
name: "TraversalGraph".to_string(),
kind: "type".to_string(),
description: "Carries GraphStore-backed traversal rows.".to_string(),
},
]),
relationships: Some(vec![summarize::Relationship {
from: "helper".to_string(),
to: "TraversalGraph".to_string(),
kind: "uses".to_string(),
}]),
concept_labels: Some(vec![
"graph navigation".to_string(),
"semantic extraction".to_string(),
]),
extracted_at: "1700000000".to_string(),
model: "test-model".to_string(),
tokens_input: Some(10),
tokens_output: Some(5),
})
.unwrap();
}
#[test]
fn graph_callers_query() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let callers = db.callers_of("helper").unwrap();
assert_eq!(callers.len(), 1);
assert_eq!(callers[0].caller_name, "main");
}
#[test]
fn graph_callees_query() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let callees = db.callees_of("main").unwrap();
let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
assert!(names.contains(&"helper"));
assert!(names.contains(&"new"));
}
#[test]
fn graph_no_callers_returns_empty() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let callers = db.callers_of("nonexistent").unwrap();
assert!(callers.is_empty());
}
#[test]
fn graph_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("main.rs"),
"fn helper() {}\nfn main() { helper(); }\n",
)
.unwrap();
let result = cmd_graph(
"helper",
dir.path(),
true,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn traversal_graph_has_stable_typed_handles() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let graph_again = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let file = resolve_traversal_node(&graph, "main.rs").unwrap();
let symbol = resolve_traversal_node(&graph, "helper").unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let session = resolve_traversal_node(&graph, "tsift-v0.1").unwrap();
assert!(file.handle.starts_with("gfil-"));
assert!(symbol.handle.starts_with("gsym-"));
assert!(backlog.handle.starts_with("gbak-"));
assert!(session.handle.starts_with("gses-"));
assert_eq!(
symbol.handle,
resolve_traversal_node(&graph_again, "helper")
.unwrap()
.handle
);
assert_eq!(
backlog.handle,
resolve_traversal_node(&graph_again, "#kgnv")
.unwrap()
.handle
);
}
#[test]
fn traversal_graph_links_backlog_items_to_code_tokens() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let helper = resolve_traversal_node(&graph, "helper").unwrap();
assert!(graph.edges.iter().any(|edge| {
edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
}));
}
#[test]
fn session_hinted_traversal_skips_global_call_edges() {
let dir = setup_traversal_project();
let session = dir.path().join("tasks/software/tsift.md");
let bounded = build_traversal_graph_source(dir.path(), &session, None).unwrap();
let backlog = resolve_traversal_node(&bounded, "#kgnv").unwrap();
let helper = resolve_traversal_node(&bounded, "helper").unwrap();
assert!(bounded.edges.iter().any(|edge| {
edge.from == backlog.handle && edge.to == helper.handle && edge.relation == "mentions"
}));
assert!(
!bounded.edges.iter().any(|edge| edge.relation == "calls"),
"session-hinted graph-db projections should not materialize unrelated global call edges"
);
let full = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
assert!(
full.edges.iter().any(|edge| edge.relation == "calls"),
"root/full projections still carry the complete indexed call graph"
);
}
#[test]
fn agent_doc_task_path_infers_matching_workspace_scope() {
let dir = tempfile::tempdir().unwrap();
std::fs::create_dir_all(dir.path().join("src/tsift")).unwrap();
std::fs::create_dir_all(dir.path().join("tasks/software")).unwrap();
std::fs::write(
dir.path().join(".gitmodules"),
"[submodule \"src/tsift\"]\n\tpath = src/tsift\n\turl = https://example.invalid/tsift.git\n",
)
.unwrap();
let task = dir.path().join("tasks/software/tsift.md");
std::fs::write(&task, "# tsift\n").unwrap();
let targets = resolve_search_index_targets(dir.path(), &task, None, false).unwrap();
let query_db_path = resolve_query_db_path(dir.path(), &task, None).unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
assert_eq!(targets.len(), 1);
assert_eq!(targets[0].scope_name.as_deref(), Some("tsift"));
assert_eq!(targets[0].source_root, dir.path().join("src/tsift"));
assert!(
targets[0]
.db_path
.ends_with(".tsift/indexes/tsift/index.db")
);
assert_eq!(query_db_path, cfg.db_path_for(dir.path(), "tsift"));
}
#[test]
fn traversal_graph_links_agent_doc_queue_job_packets_to_backlog() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let job = resolve_traversal_node(&graph, "do #kgnv").unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
assert_eq!(job.kind, "job_packet");
assert!(job.handle.starts_with("gjob-"));
assert!(graph.edges.iter().any(|edge| {
edge.from == job.handle && edge.to == backlog.handle && edge.relation == "targets"
}));
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let jobs = store.nodes_by_kind("job_packet").unwrap();
assert!(
jobs.iter()
.any(|node| node.properties.get("ref_id") == Some(&"kgnv".to_string())),
"expected queued job packet in graph store, got {jobs:?}"
);
}
#[test]
fn traversal_graph_includes_routes_and_handler_edges() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(
dir.path().join("api.py"),
r#"@router.get("/items")
def list_items():
return []
"#,
)
.unwrap();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
db.apply_changes(dir.path()).unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let route = resolve_traversal_node(&graph, "/items").unwrap();
let handler = resolve_traversal_node(&graph, "list_items").unwrap();
assert_eq!(route.kind, "route");
assert!(graph.edges.iter().any(|edge| {
edge.from == route.handle && edge.to == handler.handle && edge.relation == "handled_by"
}));
}
#[test]
fn traversal_neighborhood_handles_prioritizes_high_signal_edges_when_limited() {
let edges = vec![
TraversalEdge {
from: "origin".to_string(),
to: "aaa_low".to_string(),
relation: "unknown".to_string(),
label: None,
weight: 1,
},
TraversalEdge {
from: "origin".to_string(),
to: "zzz_high".to_string(),
relation: "mentions".to_string(),
label: None,
weight: 1,
},
];
let handles = traversal_neighborhood_handles(&edges, "origin", 1, 2);
assert!(handles.contains("origin"));
assert!(handles.contains("zzz_high"), "{handles:?}");
assert!(!handles.contains("aaa_low"), "{handles:?}");
}
#[test]
fn traversal_materializes_provider_neutral_sqlite_graph() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let backlog_nodes = store.nodes_by_kind("backlog").unwrap();
assert!(
backlog_nodes.iter().any(|node| node.id == backlog.handle
&& node.properties.get("ref_id") == Some(&"kgnv".to_string())),
"expected materialized backlog node, got {backlog_nodes:?}"
);
assert!(
store
.all_nodes()
.unwrap()
.iter()
.any(|node| node.kind == GRAPH_PROJECTION_META_KIND
&& node.properties.get("projection_version")
== Some(&GRAPH_PROJECTION_VERSION.to_string())),
"expected projection metadata node"
);
let source_handles = store.nodes_by_kind("source_handle").unwrap();
assert!(
source_handles
.iter()
.any(|node| node.properties.get("file") == Some(&"main.rs".to_string())),
"expected bounded source_handle rows, got {source_handles:?}"
);
let worker_context = store.nodes_by_kind("worker_context").unwrap();
assert!(
worker_context
.iter()
.any(|node| node.properties.get("target")
== Some(&"tasks/software/tsift.md".to_string())),
"expected bounded worker_context rows, got {worker_context:?}"
);
let worker_results = store.nodes_by_kind("worker_result").unwrap();
assert!(
worker_results.iter().any(|node| {
node.properties.get("ref_id") == Some(&"kgnv".to_string())
&& node.properties.get("status") == Some(&"completed".to_string())
&& node.properties.get("touched_files") == Some(&"main.rs".to_string())
&& node.properties.get("follow_up_ids") == Some(&"gfix".to_string())
}),
"expected worker_result rows, got {worker_results:?}"
);
}
#[test]
fn traversal_projection_materializes_cached_semantic_rows() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let helper = resolve_traversal_node(&graph, "helper").unwrap();
let concept = resolve_traversal_node(&graph, "graph navigation").unwrap();
let entity = resolve_traversal_node(&graph, "TraversalGraph").unwrap();
assert_eq!(concept.kind, "semantic_concept");
assert_eq!(entity.kind, "semantic_entity");
assert!(concept.handle.starts_with("gcon-"));
assert!(entity.handle.starts_with("gent-"));
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
assert!(
store
.nodes_by_kind("semantic_concept")
.unwrap()
.iter()
.any(|node| node.label == "semantic extraction"
&& node.properties.contains_key("embedding")),
"expected persisted concept embeddings"
);
assert!(
store
.outgoing_edges(&helper.handle, Some("mentions_concept"))
.unwrap()
.iter()
.any(|edge| edge.to_id == concept.handle),
"expected helper symbol to link to cached summary concept"
);
assert!(
store
.outgoing_edges(
&semantic_entity_handle("helper", "function"),
Some("semantic_relation")
)
.unwrap()
.iter()
.any(|edge| edge.to_id == entity.handle
&& edge.properties.get("relationship_kind") == Some(&"uses".to_string())),
"expected LLM relationship rows projected into GraphStore"
);
}
#[test]
fn semantic_related_query_uses_persisted_graph_embeddings() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let report = semantic_related_report_from_store(
dir.path(),
None,
"graph navigation",
5,
SemanticRelatedKind::Concept,
&store,
)
.unwrap();
assert_eq!(report.embedding_model, SEMANTIC_EMBEDDING_MODEL);
assert!(
report
.items
.iter()
.any(|item| item.label == "graph navigation"
&& item.kind == "semantic_concept"
&& item.score > 0.9),
"expected nearest concept match from graph embeddings, got {:?}",
report.items
);
}
#[test]
fn graph_db_related_query_uses_semantic_seeds_and_incident_neighborhoods() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Related {
query: "graph navigation".to_string(),
kind: SemanticRelatedKind::All,
depth: 1,
seed_limit: 2,
limit: 20,
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
let knowledge = report.knowledge_retrieval.as_ref().unwrap();
assert_eq!(knowledge.mode, "semantic_seeded_neighborhood");
assert_eq!(knowledge.seed_kind, "all");
assert_eq!(knowledge.depth, 1);
assert!(
knowledge
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("incident"))
);
assert!(
report
.semantic_related
.iter()
.any(|item| item.label == "graph navigation"
&& item.kind == "semantic_concept"
&& item.score > 0.9),
"expected natural-language query to seed the graph navigation concept, got {:?}",
report.semantic_related
);
assert!(
report
.nodes
.iter()
.any(|node| node.kind == "semantic_concept" && node.label == "graph navigation")
);
assert!(
report
.nodes
.iter()
.any(|node| node.kind == "symbol" && node.label == "helper"),
"incident expansion from semantic seed should recover source symbols, got {:?}",
report
.nodes
.iter()
.map(|node| (&node.kind, &node.label))
.collect::<Vec<_>>()
);
assert!(
report
.edges
.iter()
.any(|edge| edge.kind == "mentions_concept")
);
}
#[test]
fn graph_db_semantic_seeded_neighborhood_scores_before_caps() {
let mut nodes = vec![
SubstrateGraphNode::new("seed", "semantic_concept", "graph budget"),
SubstrateGraphNode::new("zzz_high", "symbol", "high_signal"),
];
let mut edges = vec![SubstrateGraphEdge::new(
"zzz_high",
"seed",
"mentions_concept",
)];
for idx in 0..24 {
let id = format!("aaa_low_{idx:02}");
nodes.push(SubstrateGraphNode::new(
id.clone(),
"note",
format!("low {idx}"),
));
edges.push(SubstrateGraphEdge::new(id, "seed", "weak_link"));
}
let mut store = SqliteGraphStore::in_memory().unwrap();
store
.replace_projection(&GraphProjection { nodes, edges })
.unwrap();
let subgraph =
graph_db_semantic_seeded_neighborhood(&store, &["seed".to_string()], 1, 3).unwrap();
assert_eq!(subgraph.nodes.len(), 3);
assert_eq!(subgraph.nodes[0].id, "seed");
assert_eq!(
subgraph.nodes[1].id, "zzz_high",
"expected semantic mention edge to survive caps before lexicographic low-signal nodes: {:?}",
subgraph.nodes
);
assert!(subgraph.truncated);
assert!(
subgraph
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("per-node edge scan cap")),
"{:?}",
subgraph.diagnostics
);
assert!(
subgraph
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("skipped")),
"{:?}",
subgraph.diagnostics
);
}
#[test]
fn conflict_matrix_uses_semantic_rows_as_dispatch_ranking_signal() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
init_git_repo(dir.path());
let session = dir.path().join("tasks/software/tsift.md");
refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let freshness = sqlite_graph_freshness(&store, "root").unwrap();
let evidence = graph_db_evidence_report_from_store(GraphDbEvidenceInput {
root: dir.path(),
scope: None,
backend: "sqlite",
target: "kgnv",
depth: 4,
limit: 8,
store: &store,
freshness,
warnings: Vec::new(),
})
.unwrap();
assert!(
evidence
.semantic_related
.iter()
.any(|node| node.kind == "semantic_concept" && node.label == "graph navigation"),
"expected semantic evidence rows, got {:?}",
evidence
.semantic_related
.iter()
.map(|node| (&node.kind, &node.label))
.collect::<Vec<_>>()
);
let cached_diff = diff_digest::compute(
dir.path(),
diff_digest::DiffDigestOptions {
cached: true,
revision: None,
max_parsed_files: None,
},
)
.unwrap();
let impact_report = impact::compute(
dir.path(),
impact::ImpactOptions {
cached: true,
revision: None,
scope: None,
limit: 10,
},
)
.unwrap();
let graph_nodes = store.all_nodes().unwrap();
let graph_index = conflict_matrix_graph_index(&graph_nodes);
let semantic_candidate = conflict_matrix_candidate_from_evidence(
dir.path(),
&evidence,
&graph_index,
&cached_diff,
&impact_report,
);
assert!(semantic_candidate.semantic_dispatch_score > 0);
assert!(
semantic_candidate
.semantic_dispatch_reasons
.iter()
.any(|reason| reason.contains("semantic_concept") && reason.contains("owned file")),
"expected semantic ranking explanations, got {:?}",
semantic_candidate.semantic_dispatch_reasons
);
assert!(
semantic_candidate
.semantic_related
.iter()
.any(|item| item.label == "graph navigation")
);
let mut plain_candidate = semantic_candidate.clone();
plain_candidate.target = "plain".to_string();
plain_candidate.semantic_related.clear();
plain_candidate.semantic_dispatch_score = 0;
plain_candidate.semantic_dispatch_reasons.clear();
let mut ranked = [plain_candidate, semantic_candidate];
ranked.sort_by(|left, right| {
left.risk
.cmp(&right.risk)
.then_with(|| left.risk_score.cmp(&right.risk_score))
.then_with(|| {
right
.semantic_dispatch_score
.cmp(&left.semantic_dispatch_score)
})
.then_with(|| left.target.cmp(&right.target))
});
assert_eq!(ranked[0].target, "kgnv");
}
#[test]
fn dependency_dag_extracts_explicit_overlap_and_follow_up_edges() {
let dir = setup_dependency_dag_project();
let session = dir.path().join("tasks/software/tsift.md");
let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
assert_eq!(report.contract_version, "dependency-dag-v1");
assert_eq!(
report.targets,
vec![
"prep".to_string(),
"alpha".to_string(),
"beta".to_string(),
"gamma".to_string()
]
);
assert!(report.edges.iter().any(|edge| {
edge.from == "prep" && edge.to == "alpha" && edge.kind == "explicit_depends_on"
}));
assert!(report.edges.iter().any(|edge| {
edge.from == "alpha" && edge.to == "gamma" && edge.kind == "worker_result_follow_up"
}));
assert!(report.edges.iter().any(|edge| {
edge.from == "alpha"
&& edge.to == "beta"
&& edge.kind == "shared_resource"
&& edge.shared_files.contains(&"main.rs".to_string())
&& edge.shared_symbols.contains(&"shared_helper".to_string())
}));
assert!(
!report.cycle_diagnostics.has_cycles,
"{:?}",
report.cycle_diagnostics
);
assert_eq!(report.topo_batches[0].targets, vec!["prep".to_string()]);
assert_eq!(report.topo_batches[1].targets, vec!["alpha".to_string()]);
assert!(
report.replay_commands[0].contains("dependency-dag"),
"{:?}",
report.replay_commands
);
cmd_dependency_dag(
&session,
None,
&["alpha".to_string(), "beta".to_string()],
4,
12,
OutputFormat {
json_output: true,
compact: false,
pretty: false,
terse: false,
schema: false,
envelope: false,
},
)
.unwrap();
}
#[test]
fn dependency_dag_reports_cycles_from_explicit_depends_on_text() {
let dir = setup_dependency_dag_cycle_project();
let report = build_dependency_dag_report(dir.path(), None, &[], 4, 12).unwrap();
assert!(report.cycle_diagnostics.has_cycles);
assert_eq!(
report.cycle_diagnostics.blocked_nodes,
vec!["left".to_string(), "right".to_string()]
);
assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
edge.from == "left" && edge.to == "right" && edge.kind == "explicit_depends_on"
}));
assert!(report.cycle_diagnostics.cycle_edges.iter().any(|edge| {
edge.from == "right" && edge.to == "left" && edge.kind == "explicit_depends_on"
}));
}
#[test]
fn traversal_projection_queries_match_sqlite_and_convex_stores() {
let dir = setup_traversal_project();
let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
let mut sqlite = SqliteGraphStore::in_memory().unwrap();
sqlite.replace_projection(&projection).unwrap();
let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
projection.upsert_into(&convex).unwrap();
let sqlite_graph = traversal_graph_from_store(dir.path(), &sqlite).unwrap();
let convex_graph = traversal_graph_from_store(dir.path(), &convex).unwrap();
assert_eq!(sqlite_graph.nodes.len(), convex_graph.nodes.len());
assert_eq!(sqlite_graph.edges.len(), convex_graph.edges.len());
let sqlite_backlog = resolve_traversal_node(&sqlite_graph, "#kgnv").unwrap();
let convex_helper = resolve_traversal_node(&convex_graph, "helper").unwrap();
assert!(convex_graph.edges.iter().any(|edge| {
edge.from == sqlite_backlog.handle
&& edge.to == convex_helper.handle
&& edge.relation == "mentions"
}));
}
#[test]
fn graph_db_api_queries_sqlite_neighborhood_and_schema() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let freshness = sqlite_graph_freshness(&store, "root").unwrap();
assert_eq!(freshness.status, "current");
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Neighborhood {
id: backlog.handle.clone(),
depth: 1,
edge_kind: Some("mentions".to_string()),
cursor: None,
limit: None,
property_filters: Vec::new(),
},
&store,
freshness,
Vec::new(),
)
.unwrap();
assert!(
report
.edges
.iter()
.any(|edge| edge.from_id == backlog.handle && edge.kind == "mentions"),
"expected backlog mention edge, got {:?}",
report.edges
);
assert!(
report.ranked_neighbors.iter().any(|neighbor| {
neighbor.depth == Some(1)
&& neighbor.edge_kinds.iter().any(|kind| kind == "mentions")
&& neighbor.node_id != backlog.handle
&& neighbor.handle_coverage_pct >= 95.0
&& neighbor.duplicate_name_precision >= 0.99
}),
"expected ranked neighborhood neighbors with quality scores, got {:?}",
report.ranked_neighbors
);
assert!(report.ranked_neighbors.len() <= GRAPH_DB_RANKED_NEIGHBOR_CAP);
let ranking_gate = report.neighborhood_ranking_gate.as_ref().unwrap();
assert!(!ranking_gate.ranked_output_default);
assert_eq!(ranking_gate.default_order, "stable_node_id");
assert!(
ranking_gate
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("score-capped")),
"{ranking_gate:?}"
);
assert!(
ranking_gate
.required_metrics
.iter()
.any(|metric| metric == "handle_coverage_pct")
);
assert!(
ranking_gate
.required_metrics
.iter()
.any(|metric| metric == "duplicate_name_precision")
);
assert!(
report
.page
.as_ref()
.unwrap()
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("idx_graph_edges_from_kind")),
"expected SQLite neighborhood query plan diagnostics, got {:?}",
report.page.as_ref().unwrap().diagnostics
);
let edge_id = graph_db_edge_key(
report
.edges
.iter()
.find(|edge| edge.from_id == backlog.handle && edge.kind == "mentions")
.unwrap(),
);
let edge_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Edge {
id: edge_id.clone(),
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert_eq!(
edge_report.edge.as_ref().map(graph_db_edge_key),
Some(edge_id.clone())
);
let edges_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Edges {
edge_kind: Some("mentions".to_string()),
cursor: None,
limit: Some(2),
property_filters: Vec::new(),
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert!(edges_report.edges.iter().any(|edge| edge.id == edge_id));
assert_eq!(
edges_report.page.as_ref().unwrap().returned_edges,
edges_report.edges.len()
);
let incident_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Incident {
id: backlog.handle.clone(),
edge_kind: Some("mentions".to_string()),
cursor: None,
limit: Some(1),
property_filters: Vec::new(),
},
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert_eq!(incident_report.page.as_ref().unwrap().returned_edges, 1);
assert!(
incident_report
.edges
.iter()
.all(|edge| edge.from_id == backlog.handle || edge.to_id == backlog.handle),
"{:?}",
incident_report.edges
);
let schema_report = graph_db_report_from_store(
dir.path(),
None,
"sqlite",
GraphDbQuery::Schema,
&store,
sqlite_graph_freshness(&store, "root").unwrap(),
Vec::new(),
)
.unwrap();
assert!(
schema_report
.schema
.unwrap()
.operations
.iter()
.any(|operation| operation.command.starts_with("neighborhood"))
);
}
#[test]
fn graph_db_status_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_traversal_project();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let graph_db = dir.path().join(".tsift/graph.db");
let _lock = hold_rollback_journal_lock(&graph_db);
let report =
graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
.unwrap();
assert_eq!(report.status, "current");
assert_eq!(
report.recovery,
Some(index::ReadOnlyRecovery::SnapshotFallback)
);
assert!(
report
.warnings
.iter()
.any(|warning| warning.contains("rollback-journal lock")),
"expected rollback-journal recovery warning, got {:?}",
report.warnings
);
}
#[test]
fn graph_db_status_copies_wal_sidecars_when_locked() {
let dir = setup_traversal_project();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let graph_db = dir.path().join(".tsift/graph.db");
let _lock = hold_wal_database_lock(&graph_db);
let report =
graph_db_operator_report_from_disk(dir.path(), None, &graph_db, "status", None, vec![])
.unwrap();
assert_eq!(report.status, "current");
assert_eq!(
report.recovery,
Some(index::ReadOnlyRecovery::SnapshotFallbackWal)
);
assert!(
report
.warnings
.iter()
.any(|warning| warning.contains("WAL-aware snapshot fallback")),
"expected WAL recovery warning, got {:?}",
report.warnings
);
}
#[test]
fn graph_db_evidence_uses_snapshot_fallback_when_graph_db_is_locked() {
let dir = setup_traversal_project();
let session = dir.path().join("tasks/software/tsift.md");
refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
let graph_db = dir.path().join(".tsift/graph.db");
let _lock = hold_rollback_journal_lock(&graph_db);
let result = cmd_graph_db(
&session,
None,
GraphDbBackend::Sqlite,
None,
GraphDbQuery::Evidence {
target: "kgnv".to_string(),
depth: 3,
limit: 8,
},
OutputFormat {
json_output: false,
compact: true,
pretty: false,
terse: false,
schema: false,
envelope: false,
},
);
assert!(result.is_ok());
}
fn current_graph_db_freshness() -> GraphDbFreshnessReport {
GraphDbFreshnessReport {
status: "current".to_string(),
fail_closed: false,
projection_version: Some(GRAPH_PROJECTION_VERSION.to_string()),
content_hash: Some("fixture".to_string()),
source_watermark: None,
diagnostics: Vec::new(),
}
}
#[test]
fn graph_db_evidence_fails_closed_with_repair_command_for_stale_freshness() {
let dir = setup_traversal_project();
refresh_traversal_graph_store(dir.path(), dir.path(), None).unwrap();
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let stale = GraphDbFreshnessReport {
status: "stale".to_string(),
fail_closed: true,
projection_version: Some("old-v0".to_string()),
content_hash: None,
source_watermark: None,
diagnostics: vec!["projection content hash is missing".to_string()],
};
let err = match graph_db_evidence_report_from_store(GraphDbEvidenceInput {
root: dir.path(),
scope: None,
backend: "sqlite",
target: "kgnv",
depth: 3,
limit: 8,
store: &store,
freshness: stale,
warnings: Vec::new(),
}) {
Ok(_) => panic!("stale graph freshness should fail closed"),
Err(err) => err,
};
let message = err.to_string();
assert!(message.contains("failed closed"), "{message}");
assert!(message.contains("graph-db --path"), "{message}");
assert!(message.contains("refresh --json"), "{message}");
}
fn paged_graph_ids(
store: &impl GraphStore,
cursor: Option<&str>,
) -> (Vec<String>, GraphDbPageReport) {
let report = graph_db_report_from_store(
Path::new("."),
None,
"fixture",
GraphDbQuery::Kind {
kind: "backlog".to_string(),
cursor: cursor.map(str::to_string),
limit: Some(2),
property_filters: vec!["phase=open".to_string()],
},
store,
current_graph_db_freshness(),
Vec::new(),
)
.unwrap();
(
report.nodes.iter().map(|node| node.id.clone()).collect(),
report.page.unwrap(),
)
}
#[test]
fn graph_db_query_pagination_and_filters_match_sqlite_and_convex() {
let nodes = (0..5)
.map(|idx| {
let phase = if idx == 1 { "closed" } else { "open" };
SubstrateGraphNode::new(format!("gbak-{idx:02}"), "backlog", format!("#{idx:02}"))
.with_property("phase", phase)
})
.collect::<Vec<_>>();
let projection = GraphProjection {
nodes,
edges: Vec::new(),
};
let sqlite = SqliteGraphStore::in_memory().unwrap();
projection.upsert_into(&sqlite).unwrap();
let convex = ConvexGraphStore::new(MemoryConvexGraphClient::default());
projection.upsert_into(&convex).unwrap();
let (sqlite_first_ids, sqlite_first_page) = paged_graph_ids(&sqlite, None);
let (convex_first_ids, convex_first_page) = paged_graph_ids(&convex, None);
assert_eq!(sqlite_first_ids, vec!["gbak-00", "gbak-02"]);
assert_eq!(sqlite_first_ids, convex_first_ids);
assert_eq!(sqlite_first_page.next_cursor.as_deref(), Some("gbak-02"));
assert!(sqlite_first_page.truncated);
assert_eq!(
sqlite_first_page.returned_nodes,
convex_first_page.returned_nodes
);
assert_eq!(
sqlite_first_page.property_filters,
convex_first_page.property_filters
);
assert!(
sqlite_first_page
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("idx_graph_nodes_kind")),
"expected SQLite kind query plan diagnostics, got {:?}",
sqlite_first_page.diagnostics
);
let cursor = sqlite_first_page.next_cursor.as_deref();
let (sqlite_next_ids, sqlite_next_page) = paged_graph_ids(&sqlite, cursor);
let (convex_next_ids, convex_next_page) = paged_graph_ids(&convex, cursor);
assert_eq!(sqlite_next_ids, vec!["gbak-03", "gbak-04"]);
assert_eq!(sqlite_next_ids, convex_next_ids);
assert_eq!(sqlite_next_page.next_cursor, None);
assert!(!sqlite_next_page.truncated);
assert_eq!(
sqlite_next_page.returned_nodes,
convex_next_page.returned_nodes
);
assert_eq!(
sqlite_next_page.property_filters,
convex_next_page.property_filters
);
}
#[test]
fn traversal_shortest_path_crosses_artifacts_and_symbols() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let backlog = resolve_traversal_node(&graph, "#kgnv").unwrap();
let main = resolve_traversal_node(&graph, "main").unwrap();
let path = traversal_shortest_handles(&graph.edges, &backlog.handle, &main.handle).unwrap();
assert_eq!(path.first(), Some(&backlog.handle));
assert_eq!(path.last(), Some(&main.handle));
assert!(
path.len() >= 3,
"expected backlog -> symbol -> main, got {path:?}"
);
}
#[test]
fn traversal_report_recommends_next_bugfix_nodes() {
let dir = setup_traversal_project();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let report = traversal_report(dir.path(), None, graph, Some("#kgnv"), None, 1, 50).unwrap();
assert_eq!(report.mode, "neighborhood");
assert!(
report
.recommendations
.iter()
.any(|rec| rec.label == "helper" && rec.reason.contains("matched")),
"expected helper recommendation, got {:?}",
report.recommendations
);
assert!(
!report.exploration.source_windows.is_empty(),
"expected exploration source windows"
);
assert!(
report
.exploration
.no_reread_guidance
.contains("avoid whole-file reads")
);
}
#[test]
fn traversal_graph_refreshes_stale_index_before_loading_symbols() {
let dir = setup_traversal_project();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
)
.unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
assert!(
graph
.warnings
.iter()
.any(|warning| warning.contains("index refreshed")
&& warning.contains("graph traversal packet")),
"expected refresh diagnostic, got {:?}",
graph.warnings
);
assert!(resolve_traversal_node(&graph, "fresh_helper").is_some());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn traversal_graph_falls_back_to_raw_source_when_stale_refresh_is_blocked() {
let dir = setup_traversal_project();
let db_path = dir.path().join(".tsift/index.db");
let _writer = hold_writer_lock(&index::writer_lock_path(&db_path));
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn fresh_helper() { println!(\"fresh\"); }\nfn main() { fresh_helper(); }\n",
)
.unwrap();
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let file = resolve_traversal_node(&graph, "main.rs").unwrap();
assert!(
graph
.warnings
.iter()
.any(|warning| warning.contains("falling back to raw source file nodes")),
"expected raw-source fallback diagnostic, got {:?}",
graph.warnings
);
assert!(
file.detail
.as_deref()
.is_some_and(|detail| detail.contains("raw source fallback")),
"expected raw-source detail, got {:?}",
file.detail
);
assert!(
file.expand.contains("source-read"),
"expected source-read fallback command, got {}",
file.expand
);
assert!(
resolve_traversal_node(&graph, "helper").is_none(),
"stale symbol evidence should be skipped when refresh is blocked"
);
}
#[test]
fn traversal_cmd_supports_json_and_html_outputs() {
let dir = setup_traversal_project();
cmd_traverse(
Some("#kgnv"),
Some("main"),
dir.path(),
None,
1,
50,
TraverseFormat::Json,
false,
false,
false,
None,
)
.unwrap();
cmd_traverse(
None,
None,
dir.path(),
None,
1,
50,
TraverseFormat::Html,
false,
false,
false,
None,
)
.unwrap();
}
#[test]
fn traversal_html_renders_inline_graph_visualization() {
let dir = setup_traversal_project();
seed_traversal_semantic_summaries(dir.path());
let graph = build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let report = traversal_report(dir.path(), None, graph, None, None, 1, 50).unwrap();
let html = traversal_report_html(&report).unwrap();
assert!(html.contains("id=\"graph-canvas\""));
assert!(html.contains("semantic_concept"));
assert!(html.contains("graph navigation"));
assert!(html.contains("JSON.parse"));
}
#[test]
fn compact_helpers_trim_scores_and_snippets() {
assert_eq!(format_score(0.12345, true), "0.12");
assert_eq!(format_score(0.12345, false), "0.1235");
let snippet = compact_snippet(" first line with useful context\nsecond");
assert_eq!(snippet.as_deref(), Some("first line with useful context"));
}
#[test]
fn compact_members_caps_list() {
let members: Vec<graph::CommunityMember> = ["a", "b", "c", "d", "e", "f"]
.iter()
.map(|n| graph::CommunityMember::new(*n))
.collect();
assert_eq!(compact_members(&members, 5), "a, b, c, d, e (+1 more)");
}
#[test]
fn abbreviate_kind_maps_common_kinds() {
assert_eq!(abbreviate_kind("function"), "fn");
assert_eq!(abbreviate_kind("method"), "meth");
assert_eq!(abbreviate_kind("class"), "cls");
assert_eq!(abbreviate_kind("interface"), "iface");
assert_eq!(abbreviate_kind("type_alias"), "type");
assert_eq!(abbreviate_kind("data_class"), "data_cls");
assert_eq!(abbreviate_kind("sealed_class"), "sealed_cls");
assert_eq!(abbreviate_kind("enum_class"), "enum_cls");
assert_eq!(abbreviate_kind("companion_object"), "comp_obj");
assert_eq!(abbreviate_kind("object"), "obj");
assert_eq!(abbreviate_kind("heading"), "h");
assert_eq!(abbreviate_kind("code_block"), "code");
// short kinds pass through
assert_eq!(abbreviate_kind("struct"), "struct");
assert_eq!(abbreviate_kind("trait"), "trait");
assert_eq!(abbreviate_kind("enum"), "enum");
assert_eq!(abbreviate_kind("const"), "const");
assert_eq!(abbreviate_kind("unknown_kind"), "unknown_kind");
}
#[test]
fn abbreviate_match_type_maps_search_types() {
assert_eq!(abbreviate_match_type("exact_name"), "exact");
assert_eq!(abbreviate_match_type("partial_tags"), "partial");
assert_eq!(abbreviate_match_type("all_tags"), "all_tags");
assert_eq!(abbreviate_match_type("other_type"), "other_type");
}
#[test]
fn explain_compact_groups_edges_by_file() {
let edges = vec![
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "helper".to_string(),
call_site_line: 2,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "render".to_string(),
call_site_line: 3,
tagpath_handle: None,
},
];
let lines = format_edge_groups(&edges, false);
assert_eq!(lines, vec![" src/main.rs (2): helper, render"]);
}
#[test]
fn search_hit_groups_preserve_file_counts_and_samples() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
let main_rs = root.join("src/main.rs");
fs::create_dir_all(main_rs.parent().unwrap()).unwrap();
fs::write(&main_rs, "claudescore-3 anchor\nclaudescore-3 follow-up\n").unwrap();
let freshness = exact_search_file_timestamp(&main_rs);
let hits = vec![
sift::SearchHit {
artifact_id: "a".to_string(),
artifact_kind: sift::ContextArtifactKind::File,
path: main_rs.display().to_string(),
rank: 1,
score: 10.0,
confidence: sift::ScoreConfidence::High,
location: Some("line 3".to_string()),
snippet: "claudescore-3 anchor".to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness: freshness.clone(),
budget: sift::ArtifactBudget::from_text("claudescore-3 anchor", 1),
},
sift::SearchHit {
artifact_id: "b".to_string(),
artifact_kind: sift::ContextArtifactKind::File,
path: main_rs.display().to_string(),
rank: 2,
score: 9.0,
confidence: sift::ScoreConfidence::High,
location: Some("line 7".to_string()),
snippet: "claudescore-3 follow-up".to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness: freshness.clone(),
budget: sift::ArtifactBudget::from_text("claudescore-3 follow-up", 1),
},
sift::SearchHit {
artifact_id: "c".to_string(),
artifact_kind: sift::ContextArtifactKind::File,
path: main_rs.display().to_string(),
rank: 3,
score: 8.0,
confidence: sift::ScoreConfidence::High,
location: Some("line 9".to_string()),
snippet: "claudescore-3 tail".to_string(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness,
budget: sift::ArtifactBudget::from_text("claudescore-3 tail", 1),
},
];
let groups = group_search_hits(&hits, root, false);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].path, "src/main.rs");
assert_eq!(groups[0].hits, 3);
assert_eq!(
groups[0].samples,
vec![
"line 3: claudescore-3 anchor".to_string(),
"line 7: claudescore-3 follow-up".to_string()
]
);
assert!(should_collapse_search_hits(&hits, root, false));
}
#[test]
fn dense_edge_groups_trigger_collapse() {
let edges = vec![
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "helper".to_string(),
call_site_line: 2,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "beta".to_string(),
caller_line: 5,
callee_name: "helper".to_string(),
call_site_line: 6,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "gamma".to_string(),
caller_line: 9,
callee_name: "helper".to_string(),
call_site_line: 10,
tagpath_handle: None,
},
];
assert!(should_collapse_edge_groups(&edges));
}
// --- workspace indexing ---
fn setup_workspace() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(
root.join(".gitmodules"),
r#"[submodule "src/alpha"]
path = src/alpha
url = https://example.com/alpha
[submodule "src/beta"]
path = src/beta
url = https://example.com/beta
"#,
)
.unwrap();
let alpha = root.join("src/alpha");
let beta = root.join("src/beta");
std::fs::create_dir_all(&alpha).unwrap();
std::fs::create_dir_all(&beta).unwrap();
std::fs::write(
alpha.join("lib.rs"),
"fn alpha_helper() {}\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
std::fs::write(beta.join("lib.rs"), "fn beta_func() {}").unwrap();
dir
}
fn setup_workspace_with_duplicate_leaf_names() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(
root.join(".gitmodules"),
r#"[submodule "pkg/app/foo"]
path = pkg/app/foo
url = https://example.com/pkg-app-foo
[submodule "vendor/foo"]
path = vendor/foo
url = https://example.com/vendor-foo
"#,
)
.unwrap();
let pkg_foo = root.join("pkg/app/foo");
let vendor_foo = root.join("vendor/foo");
std::fs::create_dir_all(&pkg_foo).unwrap();
std::fs::create_dir_all(&vendor_foo).unwrap();
std::fs::write(
pkg_foo.join("lib.rs"),
"fn pkg_only() {}\nfn shared_name() { pkg_only(); }\n",
)
.unwrap();
std::fs::write(
vendor_foo.join("lib.rs"),
"fn vendor_only() {}\nfn shared_name() { vendor_only(); }\n",
)
.unwrap();
dir
}
#[test]
fn workspace_index_creates_per_submodule_dbs() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
assert!(dir.path().join(".tsift/indexes/beta/index.db").exists());
}
#[test]
fn workspace_index_single_submodule() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
Some("alpha"),
false,
false,
false,
false,
false,
false,
)
.unwrap();
assert!(dir.path().join(".tsift/indexes/alpha/index.db").exists());
assert!(!dir.path().join(".tsift/indexes/beta/index.db").exists());
}
#[test]
fn workspace_index_single_submodule_errors_on_unknown_scope() {
let dir = setup_workspace();
let err = cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
Some("missing"),
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("unknown scope `missing`"));
assert!(msg.contains("Available scopes: alpha, beta"));
assert!(!dir.path().join(".tsift/indexes/missing/index.db").exists());
}
#[test]
fn workspace_index_uses_unique_scope_ids_when_leaf_names_collide() {
let dir = setup_workspace_with_duplicate_leaf_names();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
assert!(
dir.path()
.join(".tsift/indexes/pkg/app/foo/index.db")
.exists()
);
assert!(
dir.path()
.join(".tsift/indexes/vendor/foo/index.db")
.exists()
);
}
#[test]
fn federated_search_across_submodules() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let (hits, _diag) = federated_symbol_search(
dir.path(),
"alpha_helper",
10,
&TagpathSearchOpts {
no_tagpath: true,
strict: false,
},
)
.unwrap();
assert!(
!hits.is_empty(),
"should find alpha_helper via federated search"
);
}
#[test]
fn federated_search_respects_isolation() {
let dir = setup_workspace();
let tsift_dir = dir.path().join(".tsift");
std::fs::create_dir_all(&tsift_dir).unwrap();
std::fs::write(
tsift_dir.join("config.toml"),
r#"
[overrides.alpha]
tier = "isolated"
"#,
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let (hits, _diag) = federated_symbol_search(
dir.path(),
"alpha_helper",
10,
&TagpathSearchOpts {
no_tagpath: true,
strict: false,
},
)
.unwrap();
assert!(
hits.is_empty(),
"isolated submodule should not appear in federated search"
);
}
#[test]
fn federated_lexical_search_respects_isolation() {
let dir = setup_workspace();
let tsift_dir = dir.path().join(".tsift");
std::fs::create_dir_all(&tsift_dir).unwrap();
std::fs::write(
tsift_dir.join("config.toml"),
r#"
[overrides.alpha]
tier = "isolated"
"#,
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let response = federated_sift_search(
dir.path(),
&dir.path().join(".tsift/search-cache"),
"fn",
10,
0,
"lexical",
)
.unwrap();
assert!(
!response.hits.is_empty(),
"shared scopes should still contribute lexical hits"
);
assert!(
response
.hits
.iter()
.all(|hit| hit.path.ends_with("src/beta/lib.rs")),
"isolated scope should not leak lexical hits: {:?}",
response.hits
);
}
#[test]
fn federated_lexical_search_respects_private_tier() {
let dir = setup_workspace();
let tsift_dir = dir.path().join(".tsift");
std::fs::create_dir_all(&tsift_dir).unwrap();
std::fs::write(
tsift_dir.join("config.toml"),
r#"
[overrides.alpha]
tier = "private"
"#,
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let response = federated_sift_search(
dir.path(),
&dir.path().join(".tsift/search-cache"),
"fn",
10,
0,
"lexical",
)
.unwrap();
assert!(
!response.hits.is_empty(),
"shared scopes should still contribute lexical hits"
);
assert!(
response
.hits
.iter()
.all(|hit| hit.path.ends_with("src/beta/lib.rs")),
"private scope should not leak lexical hits: {:?}",
response.hits
);
}
#[test]
fn scoped_search_finds_submodule_symbols() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let db_path = cfg.db_path_for(dir.path(), "alpha");
let db = index::IndexDb::open(&db_path).unwrap();
let hits = db.symbol_search("alpha_main", 10).unwrap();
assert!(!hits.is_empty());
assert_eq!(hits[0].name, "alpha_main");
}
#[test]
fn scoped_search_cmd_errors_on_unknown_scope() {
let dir = setup_workspace();
let err = cmd_search(
"alpha_main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("missing".to_string()),
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("unknown scope `missing`"));
assert!(msg.contains("Available scopes: alpha, beta"));
}
#[test]
fn scoped_search_cmd_errors_on_ambiguous_legacy_scope_name() {
let dir = setup_workspace_with_duplicate_leaf_names();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_search(
"vendor_only".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("foo".to_string()),
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("ambiguous scope `foo`"));
assert!(msg.contains("pkg/app/foo"));
assert!(msg.contains("vendor/foo"));
}
#[test]
fn scoped_graph_query() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let db_path = cfg.db_path_for(dir.path(), "alpha");
let db = index::IndexDb::open(&db_path).unwrap();
let callees = db.callees_of("alpha_main").unwrap();
let names: Vec<&str> = callees.iter().map(|e| e.callee_name.as_str()).collect();
assert!(names.contains(&"alpha_helper"));
}
fn assert_workspace_query_requires_scope(err: anyhow::Error) {
let msg = err.to_string();
assert!(msg.contains("require `--scope <scope>`"), "{msg}");
assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
assert!(
!msg.contains("no index found at"),
"workspace query should fail with scope guidance, got: {msg}"
);
}
fn assert_workspace_search_requires_explicit_target(err: anyhow::Error) {
let msg = err.to_string();
assert!(
msg.contains("requires `--scope <scope>` or `--federated`"),
"{msg}"
);
assert!(msg.contains("Available scopes: alpha, beta"), "{msg}");
assert!(msg.contains("Indexed scopes: alpha, beta"), "{msg}");
assert!(
!msg.contains("autoindexing index"),
"workspace search should fail before creating a shared root index: {msg}"
);
}
#[test]
fn graph_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_graph(
"alpha_main",
dir.path(),
false,
false,
None,
20,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn graph_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_graph(
"alpha_main",
&nested,
false,
false,
None,
20,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn communities_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_communities(
&nested,
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn path_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_path(
"alpha_main",
"alpha_helper",
dir.path(),
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn path_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_path(
"alpha_main",
"alpha_helper",
&nested,
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn path_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_path(
"main",
"helper",
dir.path(),
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn explain_cmd_requires_scope_for_workspace_root_without_shared_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_explain(
"alpha_main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert_workspace_query_requires_scope(err);
}
#[test]
fn explain_cmd_infers_scope_from_nested_workspace_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_explain(
"alpha_main",
&nested,
None,
15,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn explain_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_explain(
"main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
// --- community detection ---
#[test]
fn community_detection_groups_related() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let edges = db.all_edges().unwrap();
let result = graph::detect_communities(&edges);
assert!(result.node_count > 0);
assert!(!result.communities.is_empty());
}
#[test]
fn community_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
let result = cmd_communities(
dir.path(),
None,
2,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
}
// --- path ---
#[test]
fn path_finds_connected_symbols() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let edges = db.all_edges().unwrap();
let result = graph::shortest_path(&edges, "main", "helper");
assert!(result.is_some());
let path = result.unwrap();
assert_eq!(path.hops, 1);
}
#[test]
fn path_returns_none_for_unknown() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let edges = db.all_edges().unwrap();
assert!(graph::shortest_path(&edges, "main", "nonexistent").is_none());
}
#[test]
fn path_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
let result = cmd_path(
"a",
"b",
dir.path(),
None,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
}
// --- explain ---
#[test]
fn explain_shows_symbol_info() {
let dir = setup_graph_index();
let db = index::IndexDb::open(&dir.path().join(".tsift/index.db")).unwrap();
let symbols = db.symbol_info("main").unwrap();
assert!(!symbols.is_empty());
assert_eq!(symbols[0].name, "main");
assert_eq!(symbols[0].kind, "function");
}
#[test]
fn explain_cmd_autoindexes_missing_index_by_default() {
let dir = tempfile::tempdir().unwrap();
let result = cmd_explain(
"main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
}
fn hold_write_lock(db_path: &std::path::Path) -> Connection {
let conn = Connection::open(db_path).unwrap();
conn.execute_batch("BEGIN IMMEDIATE").unwrap();
conn
}
fn hold_writer_lock(lock_path: &std::path::Path) -> std::fs::File {
use fs4::fs_std::FileExt;
use std::io::Write;
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(false)
.open(lock_path)
.unwrap();
assert!(file.try_lock_exclusive().unwrap());
writeln!(file, "{}", std::process::id()).unwrap();
file
}
fn hold_rollback_journal_lock(db_path: &std::path::Path) -> Connection {
let conn = Connection::open(db_path).unwrap();
conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
.unwrap();
std::fs::write(substrate::rollback_journal_path(db_path), "locked").unwrap();
conn
}
fn hold_wal_database_lock(db_path: &std::path::Path) -> Connection {
let conn = Connection::open(db_path).unwrap();
conn.execute_batch(
"PRAGMA journal_mode=WAL;
PRAGMA wal_autocheckpoint=0;
CREATE TABLE IF NOT EXISTS wal_lock_probe (id INTEGER PRIMARY KEY);
INSERT INTO wal_lock_probe DEFAULT VALUES;
PRAGMA locking_mode=EXCLUSIVE;
BEGIN EXCLUSIVE;",
)
.unwrap();
assert!(substrate::wal_sidecar_path(db_path).exists());
conn
}
#[test]
fn index_cmd_reports_wal_sidecar_diagnostics_without_tsift_writer_lock() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_wal_database_lock(&db_path);
let err = cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("indexing"));
assert!(msg.contains("lock diagnostics:"));
assert!(msg.contains("lock: absent"));
assert!(msg.contains("wal: present") || msg.contains("shm: present"));
assert!(msg.contains("wedged writer holding live WAL sidecars"));
assert!(msg.contains("snapshot fallback"));
}
#[test]
fn search_cmd_succeeds_while_writer_lock_is_held() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_write_lock(&db_path);
let result = cmd_search(
"main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
true,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn search_cmd_uses_snapshot_fallback_when_rollback_journal_lock_appears_after_precheck() {
let dir = setup_graph_index();
let _hook = install_search_post_precheck_lock(dir.path().join(".tsift/index.db"));
let result = cmd_search(
"main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
true,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn search_cmd_uses_wal_snapshot_fallback_when_lock_appears_after_precheck() {
let dir = setup_graph_index();
let _hook = install_search_post_precheck_wal_lock(dir.path().join(".tsift/index.db"));
let result = cmd_search(
"main".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
true,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn search_cmd_fails_fast_when_autoindex_disabled_and_index_is_stale() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let err = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("search aborted"));
assert!(err.to_string().contains("index is stale"));
assert!(err.to_string().contains("--no-autoindex"));
}
#[test]
fn search_cmd_reports_stale_when_root_index_is_locked_by_rollback_journal() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
let err = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("search aborted"));
assert!(err.to_string().contains("index is stale"));
assert!(!err.to_string().contains("database is locked"));
}
#[test]
fn search_cmd_autoindexes_stale_index_by_default() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let result = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn search_cmd_keeps_read_only_results_when_active_writer_blocks_autoindex() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
let result = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.modified, 1);
}
#[test]
fn search_cmd_autoindex_reports_lock_diagnostics_when_rollback_journal_blocks_writer() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }",
)
.unwrap();
let _lock = hold_rollback_journal_lock(&dir.path().join(".tsift/index.db"));
let err = cmd_search(
"helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("autoindexing index"));
assert!(msg.contains("lock diagnostics:"));
assert!(msg.contains("journal: present"));
assert!(msg.contains("next: inspect the host for a wedged rollback-journal writer"));
}
#[test]
fn search_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_search(
"helper".to_string(),
Some(nested.clone()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!nested.join(".tsift/index.db").exists());
}
#[test]
fn exact_search_returns_literal_matches() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("notes.txt"), "alpha\nclaudescore-3\nbeta\n").unwrap();
let response = run_exact_search_with_timeout(dir.path(), "claudescore-3", 5, 0).unwrap();
assert_eq!(response.strategy, "exact");
assert_eq!(response.hits.len(), 1);
assert!(response.hits[0].path.ends_with("notes.txt"));
assert_eq!(response.hits[0].location.as_deref(), Some("line 2"));
assert!(response.hits[0].snippet.contains("claudescore-3"));
}
#[test]
fn exact_search_skips_stale_index_precheck() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
)
.unwrap();
let result = cmd_search(
"println!(\"updated\")".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("exact".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn workspace_exact_search_does_not_require_shared_root_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("exact".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!dir.path().join(".tsift/index.db").exists());
}
#[test]
fn identifier_like_query_prefers_exact_search() {
assert!(query_prefers_exact_search("claudescore-3"));
assert!(query_prefers_exact_search("alpha_helper"));
assert!(query_prefers_exact_search("src/main.rs"));
assert!(query_prefers_exact_search("crate::module"));
assert!(!query_prefers_exact_search("authenticate"));
assert!(!query_prefers_exact_search("fn main"));
assert!(!query_prefers_exact_search("."));
}
#[test]
fn resolve_search_strategy_auto_promotes_identifier_like_queries() {
assert_eq!(resolve_search_strategy("claudescore-3", None), "exact");
assert_eq!(resolve_search_strategy("authenticate", None), "lexical");
assert_eq!(
resolve_search_strategy("claudescore-3", Some("hybrid".to_string())),
"hybrid"
);
}
#[test]
fn workspace_identifier_like_search_auto_uses_exact_backend() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
None,
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
assert!(!dir.path().join(".tsift/index.db").exists());
}
#[test]
fn index_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
std::fs::write(nested.join("extra.rs"), "fn nested_helper() {}\n").unwrap();
let result = cmd_index(
&nested, false, false, false, false, false, false, None, false, false, false, false,
false, false,
);
assert!(result.is_ok());
assert!(dir.path().join(".tsift/index.db").exists());
assert!(!nested.join(".tsift/index.db").exists());
}
#[test]
fn workspace_index_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_workspace();
let nested = dir.path().join("docs/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_index(
&nested, false, false, false, false, false, true, None, false, false, false, false,
false, false,
);
let cfg = config::Config::load(dir.path()).unwrap();
assert!(result.is_ok());
assert!(cfg.db_path_for(dir.path(), "alpha").exists());
assert!(cfg.db_path_for(dir.path(), "beta").exists());
}
#[test]
fn status_cmd_autoindexes_missing_workspace_scopes() {
let dir = setup_workspace();
let cfg = config::Config::load(dir.path()).unwrap();
let alpha = config::Config::resolve_submodule(dir.path(), "alpha").unwrap();
let alpha_db_path = cfg.db_path_for(dir.path(), &alpha.id);
let alpha_db = index::IndexDb::open(&alpha_db_path).unwrap();
alpha_db.apply_changes(&alpha.source_root).unwrap();
let beta_db_path = cfg.db_path_for(dir.path(), "beta");
assert!(!beta_db_path.exists());
cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
assert!(beta_db_path.exists());
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
}
#[test]
fn status_cmd_autoindexes_workspace_when_all_scopes_are_missing() {
let dir = setup_workspace();
let cfg = config::Config::load(dir.path()).unwrap();
cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
assert!(cfg.db_path_for(dir.path(), "alpha").exists());
assert!(cfg.db_path_for(dir.path(), "beta").exists());
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
}
#[test]
fn status_cmd_fix_refreshes_stale_index() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
)
.unwrap();
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Stale { .. }));
cmd_status(dir.path(), true, true, false, false, false, false).unwrap();
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(report.index, status::IndexStatus::Fresh { .. }));
}
#[test]
fn status_cmd_reports_wal_snapshot_recovery_without_tsift_writer_lock() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_wal_database_lock(&db_path);
cmd_status(dir.path(), false, true, false, false, false, false).unwrap();
let report = status::check_status(dir.path()).unwrap();
assert!(matches!(
report.index,
status::IndexStatus::Fresh {
recovery: Some(index::ReadOnlyRecovery::SnapshotFallbackWal),
..
}
));
let locks = status::check_locks(dir.path(), None, None).unwrap();
assert!(matches!(
locks.writer_lock,
status::WriterLockStatus::Absent { .. }
));
assert!(locks.wal_sidecar.present || locks.shared_memory_sidecar.present);
assert!(
locks
.recommended_action
.contains("wedged writer holding live WAL sidecars")
);
}
#[test]
fn locks_report_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
let report = status::check_locks(&root, Some(&nested), None).unwrap();
assert_eq!(report.source_root, dir.path());
assert_eq!(report.db_path, dir.path().join(".tsift/index.db"));
}
#[test]
fn workspace_locks_report_infers_scope_from_nested_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
let report = status::check_locks(&root, Some(&nested), None).unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
assert_eq!(report.label, "submodule `alpha` index");
assert_eq!(report.source_root, dir.path().join("src/alpha"));
assert_eq!(report.db_path, cfg.db_path_for(dir.path(), "alpha"));
assert_eq!(
report.reindex_command,
format!("tsift index --submodule alpha {}", dir.path().display())
);
}
#[test]
fn scoped_search_cmd_autoindexes_stale_submodule_index_by_default() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("alpha".to_string()),
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let cfg = config::Config::load(dir.path()).unwrap();
let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn scoped_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
let err = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
Some("alpha".to_string()),
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("search aborted"));
assert!(err.to_string().contains("submodule `alpha` index"));
assert!(!err.to_string().contains("database is locked"));
}
#[test]
fn federated_search_cmd_autoindexes_stale_indexes_by_default() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
true,
false,
true,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
let cfg = config::Config::load(dir.path()).unwrap();
let db = index::IndexDb::open_read_only(&cfg.db_path_for(dir.path(), "alpha")).unwrap();
let summary = db.compute_changes(&dir.path().join("src/alpha")).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn federated_search_cmd_reports_stale_when_submodule_index_is_locked_by_rollback_journal() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let alpha = dir.path().join("src/alpha/lib.rs");
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
&alpha,
"fn alpha_helper() { println!(\"updated\"); }\nfn alpha_main() { alpha_helper(); }",
)
.unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
let _lock = hold_rollback_journal_lock(&cfg.db_path_for(dir.path(), "alpha"));
let err = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
true,
false,
false,
30,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert!(err.to_string().contains("stale"));
assert!(err.to_string().contains("submodule `alpha` index"));
assert!(!err.to_string().contains("database is locked"));
}
#[test]
fn workspace_search_cmd_requires_explicit_target_without_shared_root_index() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let err = cmd_search(
"alpha_helper".to_string(),
Some(dir.path().to_path_buf()),
5,
Some("lexical".to_string()),
None,
false,
false,
true,
0,
false,
false,
false,
false,
false,
false,
)
.unwrap_err();
assert_workspace_search_requires_explicit_target(err);
assert!(!dir.path().join(".tsift/index.db").exists());
}
#[test]
fn workspace_search_cmd_infers_scope_from_nested_path() {
let dir = setup_workspace();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("src/alpha/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_search(
"alpha_helper".to_string(),
Some(nested),
5,
Some("lexical".to_string()),
None,
false,
false,
false,
0,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn resolve_query_db_path_infers_matching_duplicate_leaf_scope_from_nested_path() {
let dir = setup_workspace_with_duplicate_leaf_names();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
true,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let nested = dir.path().join("vendor/foo/nested");
std::fs::create_dir_all(&nested).unwrap();
let root = lint::resolve_project_root_or_canonical_path(&nested).unwrap();
let db_path = resolve_query_db_path(&root, &nested, None).unwrap();
let cfg = config::Config::load(dir.path()).unwrap();
assert_eq!(db_path, cfg.db_path_for(dir.path(), "vendor/foo"));
}
#[test]
fn graph_cmd_succeeds_while_writer_lock_is_held() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_write_lock(&db_path);
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_autoindexes_stale_index_by_default() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
)
.unwrap();
let result = cmd_graph(
"helper",
dir.path(),
true,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn graph_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
20,
false,
true,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_uses_ancestor_project_root_for_nested_paths() {
let dir = setup_graph_index();
let nested = dir.path().join("src/nested");
std::fs::create_dir_all(&nested).unwrap();
let result = cmd_graph(
"helper",
&nested,
true,
false,
None,
20,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_succeeds_while_writer_lock_is_held() {
let dir = setup_graph_index();
let _lock = hold_writer_lock(&dir.path().join(".tsift/index.lock"));
let result = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_uses_snapshot_fallback_when_rollback_journal_is_locked() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _lock = hold_rollback_journal_lock(&db_path);
let result = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn lint_finds_entities_from_project_root_index_db() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("main.rs"), "fn alpha_helper() {}\n").unwrap();
std::fs::write(
dir.path().join("README.md"),
"alpha_helper should be backticked.\n",
)
.unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let root = lint::find_project_root_for_path(&dir.path().join("README.md"))
.unwrap()
.unwrap();
let entities = lint::collect_entities_from_index_path(&root).unwrap();
let result = lint::lint_markdown(&dir.path().join("README.md"), &entities).unwrap();
assert!(
result
.annotations
.iter()
.any(|ann| ann.text == "alpha_helper")
);
}
// --- search timeout ---
#[test]
fn search_direct_runs_ok() {
let dir = tempfile::tempdir().unwrap();
let search_dir = dir.path().to_path_buf();
let cache_dir = search_dir.join(".tsift/search-cache");
std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
let result = run_sift_search(&search_dir, &cache_dir, "main", 1, "lexical");
assert!(result.is_ok(), "direct search should succeed");
assert!(
cache_dir.exists(),
"search should create the configured cache dir"
);
}
#[test]
fn search_timeout_zero_disables_timeout() {
let dir = tempfile::tempdir().unwrap();
let search_dir = dir.path().to_path_buf();
let cache_dir = search_dir.join(".tsift/search-cache");
std::fs::write(search_dir.join("test.rs"), "fn main() {}").unwrap();
let result = run_search_with_timeout(&search_dir, &cache_dir, "main", 1, 0, "lexical", &[]);
assert!(result.is_ok(), "timeout=0 should still work (no timeout)");
assert!(
cache_dir.exists(),
"timeout=0 should keep using the stable search cache dir"
);
}
#[test]
fn search_timeout_message_reports_missing_index_as_rebuild_needed() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("main.rs"), "fn main() {}\n").unwrap();
cmd_index(
dir.path(),
false,
false,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
)
.unwrap();
let db_path = dir.path().join(".tsift/index.db");
std::fs::remove_file(&db_path).unwrap();
let search_target = SearchIndexTarget {
label: "index".to_string(),
db_path,
source_root: dir.path().to_path_buf(),
scope_name: None,
reindex_cmd: format!("tsift index {}", dir.path().display()),
};
let message = search_timeout_message(1, "lexical", &[search_target]).unwrap();
assert!(message.contains("timed out after 1s"));
assert!(message.contains("index is missing"));
assert!(message.contains("Run `tsift index"));
assert!(!message.contains("search root looks fresh"));
}
#[test]
fn search_worker_output_path_uses_json_suffix() {
let path = next_search_worker_output_path();
assert!(path.extension().is_some_and(|ext| ext == "json"));
}
// --- index quiet mode ---
#[test]
fn index_quiet_suppresses_file_list() {
let dir = setup_graph_index();
let result = cmd_index(
dir.path(),
false,
true,
false,
false,
true,
false,
None,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn index_exit_code_implies_quiet() {
let dir = setup_graph_index();
let result = cmd_index(
dir.path(),
false,
true,
false,
false,
false,
false,
None,
false,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn index_quiet_json_omits_changes() {
let dir = setup_graph_index();
let result = cmd_index(
dir.path(),
false,
true,
false,
false,
true,
false,
None,
true,
false,
false,
false,
false,
false,
);
assert!(result.is_ok());
}
#[test]
fn cli_workflow_defaults_to_search_topic() {
let cli = parse_cli(["tsift", "workflow"]);
match cli.command {
Some(Commands::Workflow { topic, json }) => {
assert_eq!(topic, "search");
assert!(!json);
}
_ => panic!("expected Workflow command"),
}
}
#[test]
fn search_workflow_recipe_preserves_handles_across_expansions() {
let recipe = search_workflow_recipe();
let step_names: Vec<&str> = recipe.steps.iter().map(|step| step.name).collect();
assert_eq!(
step_names,
vec![
"exact-anchor",
"semantic-search",
"explain-symbol",
"summarize-selection",
"digest-expansion"
]
);
assert!(
recipe
.handle_contract
.iter()
.any(|item| item.contains("originating command"))
);
assert!(
recipe.steps[1]
.preserves
.iter()
.any(|item| item.contains("sfam-*"))
);
assert!(
recipe.steps[2]
.preserves
.iter()
.any(|item| item.contains("ecall-*"))
);
assert!(
recipe.steps[4]
.preserves
.iter()
.any(|item| item.contains("artifact handles"))
);
}
// --- JSON compact vs pretty ---
#[test]
fn to_json_compact_default() {
let val = serde_json::json!({"a": 1, "b": [2, 3]});
let compact = to_json(&val, false, false).unwrap();
assert!(!compact.contains('\n'));
assert!(
compact.contains("\"a\":1")
|| compact.contains("\"a\": 1")
|| compact.contains("\"a\":")
);
}
#[test]
fn to_json_pretty_indents() {
let val = serde_json::json!({"a": 1, "b": [2, 3]});
let pretty = to_json(&val, true, false).unwrap();
assert!(pretty.contains('\n'));
assert!(pretty.contains(" "));
}
#[test]
fn to_json_compact_is_shorter() {
let val =
serde_json::json!({"name": "test", "items": [1, 2, 3], "nested": {"key": "value"}});
let compact = to_json(&val, false, false).unwrap();
let pretty = to_json(&val, true, false).unwrap();
assert!(compact.len() < pretty.len());
}
#[test]
fn terse_renames_keys() {
let val =
serde_json::json!({"caller_file": "a.rs", "caller_name": "main", "call_site_line": 10});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["_s"].is_object());
let d = &parsed["d"];
assert_eq!(d["cf"], "a.rs");
assert_eq!(d["cn"], "main");
assert_eq!(d["csl"], 10);
}
#[test]
fn terse_schema_only_includes_used_keys() {
let val = serde_json::json!({"name": "test", "score": 0.5});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let schema = parsed["_s"].as_object().unwrap();
assert_eq!(schema["n"], "name");
assert_eq!(schema["sc"], "score");
assert!(!schema.contains_key("cf"));
}
#[test]
fn terse_nested_arrays() {
let val = serde_json::json!({"callers": [{"caller_name": "a", "caller_file": "b.rs", "caller_line": 1, "callee_name": "c", "call_site_line": 2}]});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let d = &parsed["d"];
assert_eq!(d["crs"][0]["cn"], "a");
assert_eq!(d["crs"][0]["cf"], "b.rs");
}
#[test]
fn terse_preserves_unknown_keys() {
let val = serde_json::json!({"custom_field": "value", "name": "test"});
let result = to_json(&val, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let d = &parsed["d"];
assert_eq!(d["custom_field"], "value");
assert_eq!(d["n"], "test");
}
// --- schema-then-values ---
#[test]
fn schema_converts_homogeneous_arrays() {
let val = serde_json::json!({"symbols": [
{"name": "foo", "kind": "fn", "line": 10},
{"name": "bar", "kind": "fn", "line": 20}
]});
let result = to_json_schema(&val, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
let syms = &parsed["symbols"];
// serde_json uses BTreeMap — keys sorted alphabetically
assert_eq!(syms["_c"], serde_json::json!(["kind", "line", "name"]));
assert_eq!(syms["_r"][0], serde_json::json!(["fn", 10, "foo"]));
assert_eq!(syms["_r"][1], serde_json::json!(["fn", 20, "bar"]));
}
#[test]
fn schema_skips_short_arrays() {
let val = serde_json::json!({"items": [{"name": "only"}]});
let result = to_json_schema(&val, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["items"].is_array());
assert_eq!(parsed["items"][0]["name"], "only");
}
#[test]
fn schema_skips_heterogeneous_arrays() {
let val = serde_json::json!({"items": [{"a": 1}, {"b": 2}]});
let result = to_json_schema(&val, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["items"].is_array());
assert_eq!(parsed["items"][0]["a"], 1);
}
#[test]
fn schema_with_terse_combines() {
let val = serde_json::json!({"callers": [
{"caller_name": "a", "caller_file": "x.rs"},
{"caller_name": "b", "caller_file": "y.rs"}
]});
let result = to_json_schema(&val, false, true, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert!(parsed["_s"].is_object());
let d = &parsed["d"];
let crs = &d["crs"];
assert!(crs["_c"].is_array());
assert!(crs["_r"].is_array());
// terse: caller_file→cf, caller_name→cn; BTreeMap sorts: cf < cn
assert_eq!(crs["_r"][0], serde_json::json!(["x.rs", "a"]));
}
#[test]
fn schema_preserves_non_object_arrays() {
let val = serde_json::json!({"tags": ["a", "b", "c"]});
let result = to_json_schema(&val, false, false, true).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["tags"], serde_json::json!(["a", "b", "c"]));
}
#[test]
fn cli_accepts_global_schema_flag() {
let cli = parse_cli(["tsift", "--schema", "search", "test"]);
assert!(cli.schema);
assert!(matches!(cli.command, Some(Commands::Search { .. })));
}
#[test]
fn cli_accepts_global_envelope_flag() {
let cli = parse_cli([
"tsift",
"--envelope",
"context-pack",
"tasks/software/tsift.md",
]);
assert!(cli.envelope);
assert!(matches!(cli.command, Some(Commands::ContextPack { .. })));
}
#[test]
fn cli_accepts_locks_command() {
let cli = parse_cli(["tsift", "locks"]);
assert!(matches!(cli.command, Some(Commands::Locks { .. })));
}
#[test]
fn cli_locks_accepts_scope_flag() {
let cli = parse_cli(["tsift", "locks", "--scope", "alpha"]);
match cli.command {
Some(Commands::Locks { scope, .. }) => {
assert_eq!(scope.as_deref(), Some("alpha"));
}
_ => panic!("expected Locks command"),
}
}
#[test]
fn cli_search_accepts_autoindex_flag() {
let cli = parse_cli(["tsift", "search", "test", "--autoindex"]);
match cli.command {
Some(Commands::Search {
autoindex,
no_autoindex,
..
}) => {
assert!(autoindex);
assert!(!no_autoindex);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_exact_flag() {
let cli = parse_cli(["tsift", "search", "test", "--exact"]);
match cli.command {
Some(Commands::Search {
exact, strategy, ..
}) => {
assert!(exact);
assert!(strategy.is_none());
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_parses_diff_digest_command() {
let cli = parse_cli(["tsift", "diff-digest", "--json", "."]);
match cli.command {
Some(Commands::DiffDigest {
json,
path,
cached,
revision,
}) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert!(!cached);
assert!(revision.is_none());
}
_ => panic!("expected DiffDigest command"),
}
}
#[test]
fn cli_rejects_conflicting_diff_digest_modes() {
match try_parse_cli([
"tsift",
"diff-digest",
"--cached",
"--revision",
"HEAD",
".",
]) {
Ok(_) => panic!("expected conflicting diff-digest modes to fail"),
Err(err) => {
assert!(err.to_string().contains("--cached"));
assert!(err.to_string().contains("--revision"));
}
}
}
#[test]
fn cli_parses_test_digest_command() {
let cli = parse_cli([
"tsift",
"test-digest",
"--path",
".",
"--input",
"target/test.log",
"--runner",
"cargo",
"--json",
]);
match cli.command {
Some(Commands::TestDigest {
json,
path,
input,
runner,
}) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert_eq!(input, Some(PathBuf::from("target/test.log")));
assert_eq!(runner.as_deref(), Some("cargo"));
}
_ => panic!("expected TestDigest command"),
}
}
#[test]
fn cli_parses_log_digest_command() {
let cli = parse_cli([
"tsift",
"log-digest",
"--path",
".",
"--input",
"target/build.log",
"--json",
]);
match cli.command {
Some(Commands::LogDigest { json, path, input }) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert_eq!(input, Some(PathBuf::from("target/build.log")));
}
_ => panic!("expected LogDigest command"),
}
}
#[test]
fn cli_parses_metric_digest_command() {
let cli = parse_cli([
"tsift",
"metric-digest",
"--input",
"target/runs.json",
"--baseline",
"target/prior.json",
"--metric",
"session_mae",
"--lower-is-better",
"session_mae",
"--history",
"4",
"--top",
"2",
"--json",
]);
match cli.command {
Some(Commands::MetricDigest {
input,
baseline,
metrics,
lower_is_better,
history,
top,
json,
..
}) => {
assert!(json);
assert_eq!(input, Some(PathBuf::from("target/runs.json")));
assert_eq!(baseline, Some(PathBuf::from("target/prior.json")));
assert_eq!(metrics, vec!["session_mae"]);
assert_eq!(lower_is_better, vec!["session_mae"]);
assert_eq!(history, 4);
assert_eq!(top, 2);
}
_ => panic!("expected MetricDigest command"),
}
}
#[test]
fn cli_parses_dci_benchmark_command() {
let cli = parse_cli([
"tsift",
"dci-benchmark",
"--fixture",
"fixtures/dci-search-benchmark.json",
"--json",
]);
match cli.command {
Some(Commands::DciBenchmark { fixture, json }) => {
assert!(json);
assert_eq!(fixture, PathBuf::from("fixtures/dci-search-benchmark.json"));
}
_ => panic!("expected DciBenchmark command"),
}
}
#[test]
fn cli_parses_session_digest_command() {
let cli = parse_cli([
"tsift",
"session-digest",
"--path",
".",
"--input",
"target/session.md",
"--source",
"markdown",
"--json",
]);
match cli.command {
Some(Commands::SessionDigest {
json,
path,
input,
source,
}) => {
assert!(json);
assert_eq!(path, PathBuf::from("."));
assert_eq!(input, Some(PathBuf::from("target/session.md")));
assert_eq!(source.as_deref(), Some("markdown"));
}
_ => panic!("expected SessionDigest command"),
}
}
#[test]
fn cli_parses_session_cost_command() {
let cli = parse_cli([
"tsift",
"session-cost",
"--input",
"target/session.jsonl",
"--source",
"codex-jsonl",
"--json",
]);
match cli.command {
Some(Commands::SessionCost {
json,
input,
source,
}) => {
assert!(json);
assert_eq!(input, Some(PathBuf::from("target/session.jsonl")));
assert_eq!(source.as_deref(), Some("codex-jsonl"));
}
_ => panic!("expected SessionCost command"),
}
}
#[test]
fn cli_parses_session_review_command() {
let cli = parse_cli([
"tsift",
"session-review",
"tasks/software/tsift.md",
"--next-context",
"--json",
]);
match cli.command {
Some(Commands::SessionReview {
json,
next_context,
path,
..
}) => {
assert!(json);
assert!(next_context);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
}
_ => panic!("expected SessionReview command"),
}
}
#[test]
fn cli_search_accepts_budget_flags() {
let cli = parse_cli([
"tsift",
"search",
"alpha_helper",
"--max-items",
"3",
"--max-bytes",
"96",
]);
match cli.command {
Some(Commands::Search {
max_items,
max_bytes,
..
}) => {
assert_eq!(max_items, Some(3));
assert_eq!(max_bytes, Some(96));
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_budget_preset() {
let cli = parse_cli(["tsift", "search", "alpha_helper", "--budget", "small"]);
match cli.command {
Some(Commands::Search { budget, .. }) => {
assert_eq!(budget, Some(ResponseBudgetPreset::Small));
}
_ => panic!("expected Search command"),
}
}
#[test]
fn response_budget_presets_fill_defaults_and_preserve_explicit_caps() {
let small = ResponseBudget::from_cli(None, None, Some(ResponseBudgetPreset::Small), false);
assert_eq!(small.preview_items(), 3);
assert_eq!(small.preview_bytes(), 120);
assert_eq!(small.follow_up_items(), 4);
let overridden =
ResponseBudget::from_cli(Some(7), None, Some(ResponseBudgetPreset::Small), false);
assert_eq!(overridden.preview_items(), 7);
assert_eq!(overridden.preview_bytes(), 120);
assert_eq!(overridden.follow_up_items(), 7);
let envelope_default = ResponseBudget::from_cli(None, None, None, true);
assert!(envelope_default.is_active());
}
#[test]
fn cli_explain_accepts_budget_flags() {
let cli = parse_cli([
"tsift",
"explain",
"alpha_helper",
"--max-items",
"2",
"--max-bytes",
"80",
]);
match cli.command {
Some(Commands::Explain {
max_items,
max_bytes,
..
}) => {
assert_eq!(max_items, Some(2));
assert_eq!(max_bytes, Some(80));
}
_ => panic!("expected Explain command"),
}
}
#[test]
fn cli_session_review_accepts_budget_flags() {
let cli = parse_cli([
"tsift",
"session-review",
"tasks/software/tsift.md",
"--max-items",
"4",
"--max-bytes",
"120",
]);
match cli.command {
Some(Commands::SessionReview {
max_items,
max_bytes,
..
}) => {
assert_eq!(max_items, Some(4));
assert_eq!(max_bytes, Some(120));
}
_ => panic!("expected SessionReview command"),
}
}
#[test]
fn cli_parses_context_pack_command() {
let cli = parse_cli([
"tsift",
"context-pack",
"tasks/software/tsift.md",
"--test-input",
"target/test.log",
"--runner",
"cargo",
"--log-input",
"target/build.log",
"--max-items",
"3",
"--max-bytes",
"96",
"--json",
]);
match cli.command {
Some(Commands::ContextPack {
path,
test_input,
runner,
log_input,
json,
max_items,
max_bytes,
budget,
convex_snapshot,
}) => {
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(test_input, Some(PathBuf::from("target/test.log")));
assert_eq!(runner.as_deref(), Some("cargo"));
assert_eq!(log_input, Some(PathBuf::from("target/build.log")));
assert!(json);
assert_eq!(max_items, Some(3));
assert_eq!(max_bytes, Some(96));
assert!(budget.is_none());
assert!(convex_snapshot.is_none());
}
_ => panic!("expected ContextPack command"),
}
}
#[test]
fn cli_parses_token_savings_command() {
let cli = parse_cli([
"tsift",
"token-savings",
"--fixture",
"fixtures/tsift-token-savings.json",
"--fail-under",
"--json",
]);
match cli.command {
Some(Commands::TokenSavings {
fixture,
fail_under,
json,
}) => {
assert_eq!(fixture, PathBuf::from("fixtures/tsift-token-savings.json"));
assert!(fail_under);
assert!(json);
}
_ => panic!("expected TokenSavings command"),
}
}
#[test]
fn token_savings_report_records_fixture_thresholds() {
let raw_symbols = [
"validate_user",
"validateUser",
"ValidateUser",
"validate-user",
"VALIDATE_USER",
"Validate_User",
"raw_symbol",
"rawSymbol",
"RawSymbol",
"raw-symbol",
"RAW_SYMBOL",
"Raw_Symbol",
]
.iter()
.enumerate()
.map(|(idx, identifier)| TokenSavingsRawSymbol {
identifier: (*identifier).to_string(),
file: format!("src/example_{idx}.rs"),
line: (idx + 1) as u64,
context: "function".to_string(),
})
.collect();
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "search-preview".to_string(),
surface: "search".to_string(),
minimum_savings_percent: 40.0,
raw_symbols,
tagpath_families: vec![
TokenSavingsFamily {
canonical: "validate_user".to_string(),
count: 6,
aliases: BTreeMap::new(),
},
TokenSavingsFamily {
canonical: "raw_symbol".to_string(),
count: 6,
aliases: BTreeMap::new(),
},
],
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: None,
}],
};
let report = build_token_savings_report(&fixture).unwrap();
assert!(report.pass);
assert_eq!(report.cases[0].raw_symbol_count, 12);
assert_eq!(report.cases[0].family_count, 2);
assert_eq!(report.cases[0].status, "pass");
assert!(report.cases[0].byte_delta > 0);
assert!(report.cases[0].raw_estimated_tokens > report.cases[0].envelope_estimated_tokens);
assert!(report.cases[0].savings_percent >= 40.0);
}
#[test]
fn token_savings_source_read_inputs_preserve_required_anchors() {
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "source-read".to_string(),
surface: "source-read".to_string(),
minimum_savings_percent: 40.0,
raw_symbols: Vec::new(),
tagpath_families: Vec::new(),
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: Some(TokenSavingsSourceReadInputs {
reads: vec![TokenSavingsSourceReadInput {
command: "sed -n '40,160p' src/main.rs".to_string(),
file: "src/main.rs".to_string(),
raw_start: 40,
raw_lines: 121,
raw_excerpt: "line 40\n".repeat(121),
envelope_start: 40,
envelope_lines: 121,
required_line_anchors: vec![40, 120, 160],
}],
}),
}],
};
let report = build_token_savings_report(&fixture).unwrap();
assert!(report.pass);
assert_eq!(report.cases[0].surface, "source-read");
assert!(report.cases[0].savings_percent >= 40.0);
}
#[test]
fn token_savings_source_read_inputs_fail_when_anchor_is_hidden() {
let fixture = TokenSavingsFixture {
schema_version: 1,
description: "fixture".to_string(),
token_estimate: "ceil(utf8_bytes / 4)".to_string(),
cases: vec![TokenSavingsFixtureCase {
name: "source-read".to_string(),
surface: "source-read".to_string(),
minimum_savings_percent: 40.0,
raw_symbols: Vec::new(),
tagpath_families: Vec::new(),
context_pack_inputs: None,
session_review_inputs: None,
source_read_inputs: Some(TokenSavingsSourceReadInputs {
reads: vec![TokenSavingsSourceReadInput {
command: "cat src/main.rs".to_string(),
file: "src/main.rs".to_string(),
raw_start: 1,
raw_lines: 200,
raw_excerpt: "line\n".repeat(200),
envelope_start: 1,
envelope_lines: 80,
required_line_anchors: vec![120],
}],
}),
}],
};
let err = match build_token_savings_report(&fixture) {
Ok(_) => panic!("hidden anchor should fail the source-read fixture"),
Err(err) => err,
};
assert!(err.to_string().contains("hides required line anchor 120"));
}
#[test]
fn search_budget_report_truncates_symbol_preview_and_emits_stable_handle() {
let response = empty_search_response(Path::new("/repo"), "lexical");
let symbol_hits = vec![index::SymbolHit {
name: "alpha_helper_with_a_long_name".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
tags: None,
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
}];
let report = build_search_budget_report(
"alpha_helper_with_a_long_name",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
false,
ResponseBudget::new(Some(1), Some(12)),
);
assert_eq!(report.symbols.len(), 1);
assert!(report.symbols[0].handle.starts_with("sfam-"));
assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/hel..."));
assert_eq!(report.symbols[0].name, "alpha_hel...");
assert_eq!(report.symbols[0].file, "src/lib.rs");
assert!(report.symbols[0].expand.contains("tsift search"));
}
#[test]
fn search_budget_report_groups_repeated_symbols_by_canonical_tag_family() {
let response = empty_search_response(Path::new("/repo"), "lexical");
let symbol_hits = vec![
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
tags: Some("alpha,helper".to_string()),
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "alphaHelper".to_string(),
kind: "method".to_string(),
language: "rust".to_string(),
file: "/repo/src/main.rs".to_string(),
line: 34,
end_line: None,
tags: Some("alpha,helper".to_string()),
score: 0.93,
match_type: "tag_overlap".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/worker.rs".to_string(),
line: 56,
end_line: None,
tags: Some("alpha,helper".to_string()),
score: 0.91,
match_type: "tag_overlap".to_string(),
tagpath_handle: None,
},
];
let report = build_search_budget_report(
"alpha helper",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
false,
ResponseBudget::new(Some(5), Some(48)),
);
assert_eq!(report.symbol_total, 1);
assert_eq!(report.raw_symbol_total, 3);
assert_eq!(report.symbols.len(), 1);
assert_eq!(report.symbols[0].tag_alias.as_deref(), Some("alpha/helper"));
assert_eq!(report.symbols[0].match_count, 3);
assert_eq!(report.symbols[0].surface_count, 2);
assert_eq!(report.symbols[0].file_count, 3);
assert_eq!(
report.symbols[0].surface_examples,
vec!["alpha_helper".to_string(), "alphaHelper".to_string()]
);
assert!(report.symbols[0].name.contains("(+1 variant)"));
assert!(report.symbols[0].file.contains("(+2 files)"));
assert!(report.symbols[0].expand.contains("tsift search"));
assert!(report.symbols[0].expand.contains("alpha helper"));
}
#[test]
fn search_budget_report_warns_on_broad_preview_and_lists_narrowing_commands() {
let mut response = empty_search_response(Path::new("/repo"), "lexical");
response.indexed_artifacts = 450;
let symbol_hits = vec![
index::SymbolHit {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/lib.rs".to_string(),
line: 12,
end_line: None,
tags: Some("alpha,helper".to_string()),
score: 0.98,
match_type: "exact_name".to_string(),
tagpath_handle: None,
},
index::SymbolHit {
name: "beta_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
file: "/repo/src/beta.rs".to_string(),
line: 21,
end_line: None,
tags: Some("beta,helper".to_string()),
score: 0.92,
match_type: "tag_overlap".to_string(),
tagpath_handle: None,
},
];
let report = build_search_budget_report(
"helper",
"lexical",
Path::new("/repo"),
&response,
&symbol_hits,
false,
ResponseBudget::new(Some(1), Some(64)),
);
let guard = report
.scale_guard
.as_ref()
.expect("broad previews should emit a scale guard");
assert_eq!(guard.level, "high-hit");
assert_eq!(guard.signals.indexed_artifacts, 450);
assert_eq!(guard.signals.raw_symbol_matches, 2);
assert!(
guard
.narrow_commands
.iter()
.any(|command| command.contains("--exact"))
);
assert!(
guard
.narrow_commands
.iter()
.any(|command| command.contains("alpha helper"))
);
assert!(
guard
.narrow_commands
.last()
.unwrap()
.contains("workflow search")
);
}
#[test]
fn explain_budget_report_limits_edges_and_members() {
let symbols = vec![index::StoredSymbol {
name: "alpha_helper".to_string(),
kind: "function".to_string(),
language: "rust".to_string(),
signature: None,
file: "src/lib.rs".to_string(),
line: 10,
end_line: None,
parent_module: None,
visibility: None,
tags: None,
tagpath_handle: None,
}];
let callers = vec![
index::StoredEdge {
caller_file: "src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "alpha_helper".to_string(),
call_site_line: 3,
tagpath_handle: None,
},
index::StoredEdge {
caller_file: "src/worker.rs".to_string(),
caller_name: "worker".to_string(),
caller_line: 5,
callee_name: "alpha_helper".to_string(),
call_site_line: 8,
tagpath_handle: None,
},
];
let community = graph::Community {
id: 1,
members: vec![
graph::CommunityMember::new("alpha_helper"),
graph::CommunityMember::new("main"),
graph::CommunityMember::new("worker"),
],
modularity_contribution: 0.5,
};
let report = build_explain_budget_report(
"alpha_helper",
Path::new("/repo"),
&symbols,
&callers,
2,
false,
&[],
0,
false,
Some(&community),
ResponseBudget::new(Some(1), Some(24)),
);
assert_eq!(report.definitions.len(), 1);
assert_eq!(report.callers.len(), 1);
assert!(report.truncated);
assert_eq!(report.community.as_ref().unwrap().members.len(), 1);
assert_eq!(
report.definitions[0].tag_alias.as_deref(),
Some("alpha/helper")
);
assert!(report.callers[0].handle.starts_with("ecall-"));
assert_eq!(report.callers[0].tag_alias.as_deref(), Some("main"));
}
#[test]
fn session_review_next_context_budget_limits_lists() {
let report = session_review::SessionReviewReport {
root: "/repo".to_string(),
target: "tasks/software/tsift.md".to_string(),
target_kind: "file".to_string(),
sessions_considered: 1,
sessions_matched: 1,
claude_sessions: 1,
codex_sessions: 0,
agent_doc_logs: 0,
prompt_target_count: 2,
command_groups: 0,
file_groups: 2,
symbol_groups: 1,
failure_groups: 1,
runtime_event_groups: 0,
restart_churn_groups: 0,
closeout_groups: 0,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
cached_input_ratio: Some(40.0),
largest_turn_total_tokens: 240,
aggregate_cost: session_review::SessionReviewCostSummary {
scope: "bounded_matched_sessions".to_string(),
sessions: 1,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
cached_input_ratio: Some(40.0),
largest_turn_total_tokens: 240,
},
latest_session_cost: Some(session_review::SessionReviewCostSummary {
scope: "latest_matched_session".to_string(),
sessions: 1,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
cached_input_ratio: Some(66.67),
largest_turn_total_tokens: 240,
}),
guardrails: vec![
session_cost::SessionCostGuardrail {
kind: "cache_resend".to_string(),
severity: "warn".to_string(),
message: "cached input ratio was high".to_string(),
guidance: "compact or restart the session".to_string(),
},
session_cost::SessionCostGuardrail {
kind: "prompt_budget".to_string(),
severity: "warn".to_string(),
message: "largest prompt turn reached 999999 tokens".to_string(),
guidance: "compact the session before another large turn".to_string(),
},
session_cost::SessionCostGuardrail {
kind: "restart_loop".to_string(),
severity: "warn".to_string(),
message: "restart churn detected".to_string(),
guidance: "restart cleanly".to_string(),
},
session_cost::SessionCostGuardrail {
kind: "noop_closeout".to_string(),
severity: "warn".to_string(),
message: "commit_already_current appeared 8 times".to_string(),
guidance: "avoid reopening without new edits".to_string(),
},
],
loop_clusters: vec![],
file_read_diagnostics: vec![],
prompt_targets: vec![
session_review::SessionReviewPromptTarget {
text: "do one".to_string(),
occurrences: 1,
},
session_review::SessionReviewPromptTarget {
text: "do two".to_string(),
occurrences: 1,
},
],
commands: vec![],
touched_files: vec![],
touched_symbols: vec![],
failures: vec![],
runtime_events: vec![],
restart_churn: vec![],
closeout: vec![],
largest_turns: vec![],
sessions: vec![session_review::SessionReviewSession {
source: "claude_jsonl".to_string(),
path: "/tmp/session.jsonl".to_string(),
matched_by: vec!["path".to_string()],
modified_unix_secs: None,
prompt_target_count: 2,
command_groups: 0,
file_groups: 2,
symbol_groups: 1,
failure_groups: 1,
runtime_event_groups: 0,
restart_churn_groups: 0,
closeout_groups: 0,
usage_samples: 1,
prompt_tokens: 120,
cached_input_tokens: 80,
cache_creation_input_tokens: 0,
output_tokens: 40,
reasoning_output_tokens: 0,
total_tokens: 240,
largest_turn_total_tokens: 240,
}],
next_context: session_review::SessionReviewNextContext {
target: "tasks/software/tsift.md".to_string(),
active_prompt_targets: vec!["do one".to_string(), "do two".to_string()],
last_verification: session_review::SessionReviewVerificationState {
status: "green".to_string(),
detail: "cargo test".to_string(),
},
touched_files: vec!["src/lib.rs".to_string(), "src/main.rs".to_string()],
touched_symbols: vec!["alpha_helper".to_string(), "main".to_string()],
unresolved_failures: vec![session_review::SessionReviewFailure {
kind: "timeout".to_string(),
message: "search timed out".to_string(),
occurrences: 1,
command: None,
session_path: None,
}],
next_digest_commands: vec![
"tsift session-review --next-context tasks/software/tsift.md".to_string(),
"tsift diff-digest .".to_string(),
"tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log".to_string(),
"tsift log-digest --path . < target/very-long-build-output-file-name-that-must-remain-executable.log".to_string(),
],
},
warnings: vec![],
};
let budget_report = build_session_review_next_context_budget_report(
&report,
ResponseBudget::new(Some(1), Some(12)),
None,
);
assert!(budget_report.truncated);
assert_eq!(budget_report.prompt_targets, vec!["do one"]);
assert_eq!(budget_report.touched_files, vec!["src/lib.rs"]);
assert!(
budget_report.touched_symbol_refs[0]
.handle
.starts_with("ncsym-")
);
assert_eq!(
budget_report.touched_symbol_refs[0].tag_alias.as_deref(),
Some("alpha/helper")
);
assert!(
budget_report.unresolved_failures[0]
.handle
.starts_with("snf-")
);
assert_eq!(budget_report.next_digest_commands.len(), 4);
assert_eq!(
budget_report.next_digest_commands[2],
"tsift test-digest --path . < target/very-long-test-output-file-name-that-must-remain-executable.log"
);
assert_eq!(budget_report.next_token_actions.len(), 1);
assert_eq!(budget_report.next_token_actions[0].kind, "prompt_budget");
let full_action_report = build_session_review_next_context_budget_report(
&report,
ResponseBudget::new(Some(4), Some(120)),
None,
);
assert_eq!(
full_action_report
.next_token_actions
.iter()
.map(|action| action.kind.as_str())
.collect::<Vec<_>>(),
vec![
"prompt_budget",
"cache_resend",
"restart_loop",
"noop_closeout"
]
);
assert_eq!(
full_action_report.next_token_actions[0]
.compact_command
.as_deref(),
Some("agent-doc compact \"tasks/software/tsift.md\" --commit")
);
assert_eq!(
full_action_report.next_token_actions[0]
.restart_command
.as_deref(),
Some("agent-doc start \"tasks/software/tsift.md\"")
);
assert!(
full_action_report.next_token_actions[0]
.digest_commands
.iter()
.any(|command| command
== "tsift --envelope context-pack \"tasks/software/tsift.md\" --budget normal")
);
}
#[test]
fn context_pack_diff_preview_limits_files_and_symbols() {
let report = diff_digest::DiffDigestReport {
root: "/repo".to_string(),
mode: diff_digest::DiffDigestMode::WorkingTree,
revision: None,
files_changed: 2,
files_with_current_summaries: 1,
symbols_touched: 3,
call_edges_added: 1,
call_edges_removed: 0,
files: vec![
diff_digest::DiffDigestFile {
path: "src/lib.rs".to_string(),
status: diff_digest::DiffDigestFileStatus::Modified,
touched_symbols: vec!["alpha_helper".to_string(), "beta_helper".to_string()],
summary_state: diff_digest::DiffDigestSummaryState::Current,
current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper handles the main alpha workflow".to_string(),
}],
added_call_edges: vec!["alpha->beta".to_string()],
removed_call_edges: vec![],
warnings: vec!["stale parse".to_string()],
},
diff_digest::DiffDigestFile {
path: "src/main.rs".to_string(),
status: diff_digest::DiffDigestFileStatus::Added,
touched_symbols: vec!["main".to_string()],
summary_state: diff_digest::DiffDigestSummaryState::Missing,
current_summaries: vec![],
added_call_edges: vec![],
removed_call_edges: vec![],
warnings: vec![],
},
],
};
let preview =
build_context_pack_diff_preview(&report, ResponseBudget::new(Some(1), Some(11)), None);
assert!(preview.truncated);
assert_eq!(preview.files.len(), 1);
assert_eq!(preview.files[0].path, "src/lib.rs");
assert_eq!(preview.files[0].touched_symbols, vec!["alpha_he..."]);
assert!(
preview.files[0].touched_symbol_refs[0]
.handle
.starts_with("cdsym-")
);
assert_eq!(
preview.files[0].touched_symbol_refs[0].tag_alias.as_deref(),
Some("alpha/he...")
);
assert!(
preview.files[0].summary_refs[0]
.handle
.starts_with("cdsum-")
);
assert_eq!(
preview.files[0].summary_refs[0].tag_alias.as_deref(),
Some("alpha/he...")
);
assert_eq!(preview.files[0].summary_refs[0].summary, "alpha he...");
assert_eq!(
preview.files[0].summary_refs[0].expand,
"tsift summarize --file \"src/lib.rs\""
);
assert_eq!(preview.files[0].warnings, vec!["stale parse"]);
}
#[test]
fn context_pack_status_reminders_include_stale_index_state() {
let dir = setup_graph_index();
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); Vec::new(); }\n",
)
.unwrap();
let reminders = context_pack_status_reminders(dir.path());
assert_eq!(reminders.len(), 1);
assert!(reminders[0].contains("index stale"));
assert!(reminders[0].contains("tsift index ."));
}
// #gdbgatecold regression-lock: the trusted context-pack pipeline must
// share its index-inspection across `prepare_agent_doc_index_gate` and
// `context_pack_status_reminders` (both call `IndexDb::inspect_read_only`
// on the same `(root, .tsift/index.db)` key). With the scope guard
// active in `build_context_pack_report_with_profile`, the second call
// hits the cache, so we should record one miss and at least one hit.
#[test]
fn build_context_pack_reuses_inspect_within_scope() {
let dir = setup_graph_index();
init_git_repo(dir.path());
let _guard = index::InspectScopeGuard::new();
let _ = build_context_pack_report(
dir.path(),
None,
None,
None,
ResponseBudget::new(Some(2), Some(96)),
)
.unwrap();
let (hits, misses) = index::inspect_scope_stats();
assert!(
hits >= 1,
"expected at least one cached inspect within scope (hits={hits}, misses={misses})"
);
assert!(
misses >= 1,
"expected at least one initial inspect miss (hits={hits}, misses={misses})"
);
}
// #gdbgatecold scope-isolation: outside of any scope, every call to
// `IndexDb::inspect_read_only` must hit the disk fresh. This locks in
// the contract that the search/status fast-paths never reuse a cached
// inspection across consecutive top-level calls.
#[test]
fn inspect_read_only_outside_scope_does_not_cache() {
let dir = setup_graph_index();
let db_path = dir.path().join(".tsift/index.db");
let _first = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
let (hits, misses) = index::inspect_scope_stats();
assert_eq!(
(hits, misses),
(0, 0),
"no scope guard => no hits/misses recorded"
);
let _second = index::IndexDb::inspect_read_only(&db_path, dir.path(), false).unwrap();
let (hits, _) = index::inspect_scope_stats();
assert_eq!(hits, 0, "must not reuse inspection outside of any scope");
}
#[test]
fn context_pack_refreshes_stale_index_before_handoff() {
let dir = setup_graph_index();
init_git_repo(dir.path());
std::thread::sleep(std::time::Duration::from_millis(50));
std::fs::write(
dir.path().join("main.rs"),
"fn helper() { println!(\"updated\"); }\nfn main() { helper(); }\n",
)
.unwrap();
let report = build_context_pack_report(
dir.path(),
None,
None,
None,
ResponseBudget::new(Some(2), Some(96)),
)
.unwrap();
assert!(
report
.status_reminders
.iter()
.any(|reminder| reminder.contains("index refreshed")
&& reminder.contains("context-pack handoff")),
"expected context-pack refresh diagnostic, got {:?}",
report.status_reminders
);
assert!(
!report
.status_reminders
.iter()
.any(|reminder| reminder.contains("index stale")),
"stale reminder should be gone after refresh: {:?}",
report.status_reminders
);
let db = index::IndexDb::open_read_only(&dir.path().join(".tsift/index.db")).unwrap();
let summary = db.compute_changes(dir.path()).unwrap();
assert_eq!(summary.new + summary.modified + summary.deleted, 0);
}
#[test]
fn context_pack_materializes_source_handles_into_graph_store() {
let dir = tempfile::tempdir().unwrap();
let packet = ExplorationPacket {
budget: exploration_budget_for_counts(2, 1),
relationship_map: vec![ExplorationRelation {
from: "file:main.rs".to_string(),
relation: "touches_symbol".to_string(),
to: "symbol:helper".to_string(),
label: Some("modified diff".to_string()),
}],
source_windows: vec![ExplorationSourceWindow {
handle: "xwin-test".to_string(),
file: "main.rs".to_string(),
start: 1,
end: 32,
reason: "changed file".to_string(),
expand: "tsift source-read main.rs --path . --start 1 --lines 32".to_string(),
}],
worker_context: vec![ExplorationWorkerContext {
handle: "xwrk-test".to_string(),
target: "tasks/software/tsift.md".to_string(),
summary: "do #kgnv".to_string(),
expand: "tsift --envelope context-pack tasks/software/tsift.md --budget normal"
.to_string(),
}],
no_reread_guidance: "use windows".to_string(),
};
let packet = materialize_context_pack_exploration_packet(dir.path(), packet).unwrap();
assert_eq!(packet.source_windows[0].handle, "xwin-test");
let store = SqliteGraphStore::open(&dir.path().join(".tsift/graph.db")).unwrap();
let source_handles = store.nodes_by_kind("source_handle").unwrap();
assert_eq!(source_handles.len(), 1);
assert_eq!(
source_handles[0].properties.get("file"),
Some(&"main.rs".to_string())
);
assert_eq!(
store
.outgoing_edges(&exploration_ref_id("file:main.rs"), Some("touches_symbol"))
.unwrap()
.len(),
1
);
let worker_context = store.nodes_by_kind("worker_context").unwrap();
assert_eq!(worker_context.len(), 1);
assert_eq!(
store
.outgoing_edges("xwrk-test", Some("scopes_source"))
.unwrap()
.len(),
1
);
}
#[test]
fn context_pack_records_graph_orchestration_observability() {
let dir = setup_traversal_project();
init_git_repo(dir.path());
let session = dir.path().join("tasks/software/tsift.md");
refresh_traversal_graph_store(dir.path(), &session, None).unwrap();
let report = build_context_pack_report(
&session,
None,
None,
None,
ResponseBudget::new(Some(4), Some(160)),
)
.unwrap();
assert_eq!(
report.graph_orchestration.contract_version,
CONTEXT_PACK_GRAPH_ORCHESTRATION_CONTRACT_VERSION
);
assert_eq!(
report
.graph_orchestration
.projection_freshness
.status
.as_str(),
"current"
);
assert!(!report.graph_orchestration.projection_hashes.is_empty());
assert!(
report
.graph_orchestration
.evidence_packet_ids
.iter()
.any(|id| id.starts_with("gevd-")),
"{:?}",
report.graph_orchestration.evidence_packet_ids
);
assert!(
report
.graph_orchestration
.conflict_matrix_decisions
.iter()
.any(|decision| decision.contains("run conflict-matrix")),
"{:?}",
report.graph_orchestration.conflict_matrix_decisions
);
assert!(
report
.graph_orchestration
.follow_up_commands
.iter()
.any(|command| command.contains("conflict-matrix")),
"{:?}",
report.graph_orchestration.follow_up_commands
);
assert!(
!report
.graph_orchestration
.worker_ownership_blocks
.is_empty()
);
}
#[test]
fn convex_sync_report_chunks_upserts_and_tombstones() {
let dir = setup_traversal_project();
let source_graph = build_traversal_graph_source(dir.path(), dir.path(), None).unwrap();
let projection = traversal_projection_from_graph(dir.path(), None, &source_graph).unwrap();
let mut snapshot = projection.to_convex_rows();
snapshot.nodes.push(ConvexNodeRow {
external_id: "stale-node".to_string(),
kind: "backlog".to_string(),
label: "stale".to_string(),
properties: BTreeMap::new(),
provenance: Vec::new(),
freshness: None,
});
snapshot.edges.clear();
snapshot.edges.push(ConvexEdgeRow {
edge_key: "stale-edge".to_string(),
from_external_id: "stale-node".to_string(),
to_external_id: "stale-node".to_string(),
kind: "mentions".to_string(),
properties: BTreeMap::new(),
provenance: Vec::new(),
freshness: None,
});
let snapshot_path = dir.path().join("convex-snapshot.json");
fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
let report = build_convex_sync_report(dir.path(), None, Some(&snapshot_path), 2).unwrap();
assert_eq!(report.freshness.status, "stale");
assert!(report.freshness.fail_closed);
assert_eq!(report.node_tombstones, vec!["stale-node".to_string()]);
assert!(
report.edge_upserts.len() > 1,
"snapshot without edges should upsert local edges"
);
assert_eq!(report.edge_tombstones, vec!["stale-edge".to_string()]);
assert_eq!(
report.chunks.first().map(|chunk| chunk.operation.as_str()),
Some("delete_edges"),
"edge tombstones should be planned before node tombstones"
);
assert!(
report
.chunks
.iter()
.any(|chunk| chunk.operation == "upsert_edges" && chunk.count <= 2),
"expected chunked edge upserts, got {:?}",
report.chunks
);
}
#[test]
fn convex_snapshot_validation_fails_closed_when_stale() {
let dir = setup_traversal_project();
build_traversal_graph(dir.path(), dir.path(), None).unwrap();
let snapshot = ConvexProjectionRows::default();
let snapshot_path = dir.path().join("empty-convex-snapshot.json");
fs::write(&snapshot_path, serde_json::to_string(&snapshot).unwrap()).unwrap();
let err = verify_convex_projection_snapshot(dir.path(), None, &snapshot_path).unwrap_err();
assert!(
err.to_string()
.contains("Convex graph projection is not current"),
"{err}"
);
}
#[test]
fn convex_sync_report_marks_live_apply_mode_without_network() {
let dir = setup_traversal_project();
let report =
build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
assert!(!report.dry_run);
assert!(
!report
.diagnostics
.iter()
.any(|diagnostic| diagnostic.contains("dry-run only")),
"apply-mode report should not claim dry-run diagnostics"
);
assert!(
report
.chunks
.iter()
.any(|chunk| chunk.operation == "upsert_nodes"),
"live apply mode should still expose chunked idempotent operations"
);
}
#[test]
fn convex_sync_apply_round_trips_with_http_backend() {
use std::net::TcpListener;
use std::sync::{Arc, Mutex};
let dir = setup_traversal_project();
let report =
build_convex_sync_report_with_snapshot(dir.path(), None, None, 100, false).unwrap();
let expected_chunks = report.chunks.len();
assert!(expected_chunks > 0);
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let endpoint = format!("http://{}", listener.local_addr().unwrap());
let operations = Arc::new(Mutex::new(Vec::<String>::new()));
let server_operations = Arc::clone(&operations);
let server = std::thread::spawn(move || {
for _ in 0..expected_chunks {
let (mut stream, _) = listener.accept().unwrap();
let mut reader = BufReader::new(stream.try_clone().unwrap());
let mut request_line = String::new();
reader.read_line(&mut request_line).unwrap();
assert!(request_line.starts_with("POST "));
let mut content_length = 0usize;
loop {
let mut line = String::new();
reader.read_line(&mut line).unwrap();
if line == "\r\n" {
break;
}
if let Some(value) = line.to_ascii_lowercase().strip_prefix("content-length:") {
content_length = value.trim().parse().unwrap();
}
}
let mut body = vec![0u8; content_length];
reader.read_exact(&mut body).unwrap();
let request: serde_json::Value = serde_json::from_slice(&body).unwrap();
server_operations
.lock()
.unwrap()
.push(request["operation"].as_str().unwrap().to_string());
let response = br#"{"status":"ok","message":"accepted"}"#;
write!(
stream,
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
response.len()
)
.unwrap();
stream.write_all(response).unwrap();
}
});
cmd_convex_sync(
ConvexSyncOptions {
path: dir.path(),
scope: None,
snapshot: None,
chunk_size: 100,
remote_snapshot: false,
apply: true,
endpoint: Some(&endpoint),
auth_token_env: "TSIFT_TEST_CONVEX_AUTH_TOKEN",
},
OutputFormat {
json_output: false,
compact: true,
pretty: false,
terse: false,
schema: false,
envelope: false,
},
)
.unwrap();
server.join().unwrap();
let operations = operations.lock().unwrap().clone();
assert!(operations.contains(&"upsert_nodes".to_string()));
assert!(operations.contains(&"upsert_edges".to_string()));
}
#[test]
fn context_pack_diff_preview_attaches_tag_ontology_refs() {
let root = tempfile::tempdir().unwrap();
fs::create_dir_all(root.path().join(".naming/tags")).unwrap();
fs::write(
root.path().join(".naming/tags/alpha.md"),
"+++\ntag = \"alpha\"\ntitle = \"Alpha Domain\"\ndomain = \"fixture\"\n+++\n\nAlpha definition.\n",
)
.unwrap();
let ontology = load_tag_ontology_preview_context(root.path()).unwrap();
let report = diff_digest::DiffDigestReport {
root: root.path().display().to_string(),
mode: diff_digest::DiffDigestMode::WorkingTree,
revision: None,
files_changed: 1,
files_with_current_summaries: 1,
symbols_touched: 1,
call_edges_added: 0,
call_edges_removed: 0,
files: vec![diff_digest::DiffDigestFile {
path: "src/lib.rs".to_string(),
status: diff_digest::DiffDigestFileStatus::Modified,
touched_symbols: vec!["alpha_helper".to_string()],
summary_state: diff_digest::DiffDigestSummaryState::Current,
current_summaries: vec![diff_digest::DiffDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper summary".to_string(),
}],
added_call_edges: vec![],
removed_call_edges: vec![],
warnings: vec![],
}],
};
let preview = build_context_pack_diff_preview(
&report,
ResponseBudget::new(Some(1), Some(80)),
Some(&ontology),
);
let symbol_ref = &preview.files[0].touched_symbol_refs[0].ontology_refs[0];
assert!(symbol_ref.handle.starts_with("tont-"));
assert_eq!(symbol_ref.tag, "alpha");
assert_eq!(symbol_ref.path, ".naming/tags/alpha.md");
assert_eq!(symbol_ref.title.as_deref(), Some("Alpha Domain"));
assert_eq!(symbol_ref.domain.as_deref(), Some("fixture"));
assert_eq!(
preview.files[0].summary_refs[0].ontology_refs[0].path,
".naming/tags/alpha.md"
);
}
#[test]
fn context_pack_test_preview_limits_failure_groups() {
let report = test_digest::TestDigestReport {
root: "/repo".to_string(),
runner: "cargo".to_string(),
failures: 2,
grouped_failures: 2,
counts: test_digest::TestDigestCounts {
passed: Some(8),
failed: Some(2),
skipped: Some(1),
},
failure_groups: vec![
test_digest::TestDigestFailure {
tests: vec!["suite::alpha_failure".to_string()],
message: "assertion failed".to_string(),
path: Some("src/lib.rs".to_string()),
line: Some(42),
column: None,
occurrences: 1,
summary_state: test_digest::TestDigestSummaryState::Current,
current_summaries: vec![test_digest::TestDigestSummarySnippet {
symbol: "alpha_failure".to_string(),
summary: "failure summary for alpha test".to_string(),
}],
},
test_digest::TestDigestFailure {
tests: vec!["suite::beta_failure".to_string()],
message: "panic".to_string(),
path: Some("src/main.rs".to_string()),
line: Some(7),
column: None,
occurrences: 1,
summary_state: test_digest::TestDigestSummaryState::Missing,
current_summaries: vec![],
},
],
warnings: vec!["warning text".to_string()],
};
let preview =
build_context_pack_test_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
assert!(preview.truncated);
assert_eq!(preview.failure_groups.len(), 1);
assert_eq!(preview.failure_groups[0].tests, vec!["suite::alph..."]);
assert_eq!(preview.failure_groups[0].message, "assertion f...");
assert!(
preview.failure_groups[0].summary_refs[0]
.handle
.starts_with("ctsum-")
);
assert_eq!(
preview.failure_groups[0].summary_refs[0].expand,
"tsift summarize --file \"src/lib.rs\""
);
assert_eq!(preview.warnings, vec!["warning text"]);
}
#[test]
fn context_pack_log_preview_limits_signals_and_refs() {
let report = log_digest::LogDigestReport {
root: "/repo".to_string(),
total_lines: 12,
non_empty_lines: 10,
signal_groups: 2,
repeated_line_groups: 2,
repeated_line_occurrences: 3,
file_ref_groups: 2,
symbol_ref_groups: 2,
stack_groups: 1,
signals: vec![
log_digest::LogDigestSignal {
severity: "error".to_string(),
message: "src/lib.rs:42 boom".to_string(),
path: Some("src/lib.rs".to_string()),
line: Some(42),
column: None,
occurrences: 2,
summary_state: log_digest::LogDigestSummaryState::Current,
current_summaries: vec![log_digest::LogDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper cached log summary".to_string(),
}],
},
log_digest::LogDigestSignal {
severity: "warn".to_string(),
message: "slow path".to_string(),
path: None,
line: None,
column: None,
occurrences: 1,
summary_state: log_digest::LogDigestSummaryState::Unavailable,
current_summaries: vec![],
},
],
repeated_lines: vec![
log_digest::LogDigestRepeatedLine {
line: "retrying work item alpha".to_string(),
occurrences: 3,
},
log_digest::LogDigestRepeatedLine {
line: "retrying work item beta".to_string(),
occurrences: 2,
},
],
file_refs: vec![
log_digest::LogDigestFileRef {
path: "src/lib.rs".to_string(),
line: Some(42),
column: None,
occurrences: 2,
summary_state: log_digest::LogDigestSummaryState::Current,
current_summaries: vec![log_digest::LogDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper cached file summary".to_string(),
}],
},
log_digest::LogDigestFileRef {
path: "src/main.rs".to_string(),
line: Some(7),
column: None,
occurrences: 1,
summary_state: log_digest::LogDigestSummaryState::Missing,
current_summaries: vec![],
},
],
symbol_refs: vec![
log_digest::LogDigestSymbolRef {
symbol: "alpha_helper".to_string(),
occurrences: 2,
summary_state: log_digest::LogDigestSummaryState::Current,
current_summaries: vec![log_digest::LogDigestSummarySnippet {
symbol: "alpha_helper".to_string(),
summary: "alpha helper cached symbol summary".to_string(),
}],
},
log_digest::LogDigestSymbolRef {
symbol: "beta_helper".to_string(),
occurrences: 1,
summary_state: log_digest::LogDigestSummaryState::Missing,
current_summaries: vec![],
},
],
stack_traces: vec![log_digest::LogDigestStackGroup {
frames: vec!["frame one".to_string()],
occurrences: 1,
}],
warnings: vec!["warning text".to_string()],
};
let preview =
build_context_pack_log_preview(&report, ResponseBudget::new(Some(1), Some(14)), None);
assert!(preview.truncated);
assert_eq!(preview.signals.len(), 1);
assert_eq!(preview.signals[0].message, "src/lib.rs:...");
assert_eq!(preview.repeated_lines[0].line, "retrying wo...");
assert_eq!(preview.file_refs.len(), 1);
assert_eq!(preview.symbol_refs[0].symbol, "alpha_helper");
assert!(
preview.signals[0].summary_refs[0]
.handle
.starts_with("clsum-")
);
assert!(
preview.file_refs[0].summary_refs[0]
.handle
.starts_with("clfsum-")
);
assert!(
preview.symbol_refs[0].summary_refs[0]
.handle
.starts_with("clssum-")
);
assert_eq!(
preview.symbol_refs[0].summary_refs[0].tag_alias.as_deref(),
Some("alpha/helper")
);
assert_eq!(
preview.symbol_refs[0].summary_refs[0].expand,
"tsift summarize \"alpha_helper\""
);
assert_eq!(preview.warnings, vec!["warning text"]);
}
#[test]
fn cli_search_rejects_exact_with_strategy_flag() {
let cli = try_parse_cli([
"tsift",
"search",
"test",
"--exact",
"--strategy",
"lexical",
]);
assert!(cli.is_err());
}
#[test]
fn cli_search_autoindexes_by_default() {
let cli = parse_cli(["tsift", "search", "test"]);
match cli.command {
Some(Commands::Search {
autoindex,
no_autoindex,
..
}) => {
assert!(!autoindex);
assert!(!no_autoindex);
assert!(autoindex || !no_autoindex);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_accepts_no_autoindex_flag() {
let cli = parse_cli(["tsift", "search", "test", "--no-autoindex"]);
match cli.command {
Some(Commands::Search {
autoindex,
no_autoindex,
..
}) => {
assert!(!autoindex);
assert!(no_autoindex);
}
_ => panic!("expected Search command"),
}
}
#[test]
fn cli_search_rejects_conflicting_autoindex_flags() {
let cli = try_parse_cli(["tsift", "search", "test", "--autoindex", "--no-autoindex"]);
assert!(cli.is_err());
}
// --- relativize paths ---
#[test]
fn cli_accepts_global_absolute_flag() {
let cli = parse_cli(["tsift", "--absolute", "status"]);
assert!(cli.absolute);
assert!(matches!(cli.command, Some(Commands::Status { .. })));
}
#[test]
fn cli_accepts_global_tabular_flag() {
let cli = parse_cli(["tsift", "--tabular", "search", "test"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Search { .. })));
}
#[test]
fn cli_tabular_with_graph() {
let cli = parse_cli(["tsift", "--tabular", "graph", "main"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Graph { .. })));
}
#[test]
fn cli_tabular_with_communities() {
let cli = parse_cli(["tsift", "--tabular", "communities"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Communities { .. })));
}
#[test]
fn cli_tabular_with_explain() {
let cli = parse_cli(["tsift", "--tabular", "explain", "main"]);
assert!(cli.tabular);
assert!(matches!(cli.command, Some(Commands::Explain { .. })));
}
#[test]
fn cli_traverse_accepts_path_target_and_html_format() {
let cli = parse_cli([
"tsift", "traverse", "#kgnv", "--to", "main", "--path", ".", "--format", "html",
]);
match cli.command {
Some(Commands::Traverse {
node,
to,
path,
format,
..
}) => {
assert_eq!(node.as_deref(), Some("#kgnv"));
assert_eq!(to.as_deref(), Some("main"));
assert_eq!(path, PathBuf::from("."));
assert_eq!(format, TraverseFormat::Html);
}
_ => panic!("expected Traverse command"),
}
}
#[test]
fn cli_parses_semantic_related_command() {
let cli = parse_cli([
"tsift",
"semantic",
"graph navigation",
"--path",
".",
"--kind",
"all",
"--limit",
"3",
"--json",
]);
match cli.command {
Some(Commands::Semantic {
query,
path,
kind,
limit,
json,
..
}) => {
assert_eq!(query, "graph navigation");
assert_eq!(path, PathBuf::from("."));
assert_eq!(kind, SemanticRelatedKind::All);
assert_eq!(limit, 3);
assert!(json);
}
_ => panic!("expected Semantic command"),
}
}
#[test]
fn cli_parses_convex_sync_command() {
let cli = parse_cli([
"tsift",
"convex-sync",
".",
"--snapshot",
"rows.json",
"--chunk-size",
"25",
"--json",
]);
match cli.command {
Some(Commands::ConvexSync {
path,
snapshot,
chunk_size,
json,
..
}) => {
assert_eq!(path, PathBuf::from("."));
assert_eq!(snapshot, Some(PathBuf::from("rows.json")));
assert_eq!(chunk_size, 25);
assert!(json);
}
_ => panic!("expected ConvexSync command"),
}
}
#[test]
fn cli_parses_convex_sync_live_flags() {
let cli = parse_cli([
"tsift",
"convex-sync",
".",
"--remote-snapshot",
"--apply",
"--endpoint",
"https://example.test/convex-graph",
"--auth-token-env",
"TSIFT_TEST_TOKEN",
]);
match cli.command {
Some(Commands::ConvexSync {
remote_snapshot,
apply,
endpoint,
auth_token_env,
..
}) => {
assert!(remote_snapshot);
assert!(apply);
assert_eq!(
endpoint.as_deref(),
Some("https://example.test/convex-graph")
);
assert_eq!(auth_token_env, "TSIFT_TEST_TOKEN");
}
_ => panic!("expected ConvexSync command"),
}
}
#[test]
fn cli_parses_graph_db_query() {
let cli = parse_cli([
"tsift",
"graph-db",
"--backend",
"convex-snapshot",
"--convex-snapshot",
"rows.json",
"--json",
"neighborhood",
"gbak-kgnv",
"--depth",
"2",
"--edge-kind",
"mentions",
"--property",
"path=tasks/software/tsift.md",
"--cursor",
"gbak-old",
"--limit",
"10",
]);
match cli.command {
Some(Commands::GraphDb {
backend,
convex_snapshot,
json,
query,
..
}) => {
assert_eq!(backend, GraphDbBackend::ConvexSnapshot);
assert_eq!(convex_snapshot, Some(PathBuf::from("rows.json")));
assert!(json);
match query {
GraphDbQuery::Neighborhood {
id,
depth,
edge_kind,
cursor,
limit,
property_filters,
} => {
assert_eq!(id, "gbak-kgnv");
assert_eq!(depth, 2);
assert_eq!(edge_kind.as_deref(), Some("mentions"));
assert_eq!(cursor.as_deref(), Some("gbak-old"));
assert_eq!(limit, Some(10));
assert_eq!(
property_filters,
vec!["path=tasks/software/tsift.md".to_string()]
);
}
_ => panic!("expected graph-db neighborhood query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_graph_db_tokensave_backend() {
let cli = parse_cli([
"tsift",
"graph-db",
"--backend",
"tokensave",
"--json",
"node",
"fn:main",
]);
match cli.command {
Some(Commands::GraphDb {
backend,
json,
query,
..
}) => {
assert_eq!(backend, GraphDbBackend::Tokensave);
assert!(json);
match query {
GraphDbQuery::Node { id } => assert_eq!(id, "fn:main"),
_ => panic!("expected graph-db node query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_analyze_command() {
let cli = parse_cli([
"tsift", "analyze", ".", "--scope", "core", "--entry", "main", "--entry", "run",
"--limit", "7", "--json",
]);
match cli.command {
Some(Commands::Analyze {
path,
scope,
entry_points,
limit,
json,
}) => {
assert_eq!(path, PathBuf::from("."));
assert_eq!(scope.as_deref(), Some("core"));
assert_eq!(entry_points, vec!["main".to_string(), "run".to_string()]);
assert_eq!(limit, 7);
assert!(json);
}
_ => panic!("expected Analyze command"),
}
}
#[test]
fn cli_parses_graph_db_related_query() {
let cli = parse_cli([
"tsift",
"graph-db",
"--json",
"related",
"voice avatar memory retrieval",
"--kind",
"all",
"--depth",
"3",
"--seed-limit",
"4",
"--limit",
"12",
]);
match cli.command {
Some(Commands::GraphDb { json, query, .. }) => {
assert!(json);
match query {
GraphDbQuery::Related {
query,
kind,
depth,
seed_limit,
limit,
} => {
assert_eq!(query, "voice avatar memory retrieval");
assert_eq!(kind, SemanticRelatedKind::All);
assert_eq!(depth, 3);
assert_eq!(seed_limit, 4);
assert_eq!(limit, 12);
}
_ => panic!("expected graph-db related query"),
}
}
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_graph_db_compact_query() {
let cli = parse_cli([
"tsift",
"graph-db",
"--path",
".",
"compact",
"--apply",
"--prune-tombstones",
"--confirmed-convex-reconciled",
]);
match cli.command {
Some(Commands::GraphDb { query, .. }) => match query {
GraphDbQuery::Compact {
apply,
prune_tombstones,
confirmed_convex_reconciled,
} => {
assert!(apply);
assert!(prune_tombstones);
assert!(confirmed_convex_reconciled);
}
_ => panic!("expected graph-db compact query"),
},
_ => panic!("expected GraphDb command"),
}
}
#[test]
fn cli_parses_impact_command() {
let cli = parse_cli(["tsift", "impact", ".", "--cached", "--limit", "5"]);
match cli.command {
Some(Commands::Impact {
path,
cached,
limit,
..
}) => {
assert_eq!(path, PathBuf::from("."));
assert!(cached);
assert_eq!(limit, 5);
}
_ => panic!("expected Impact command"),
}
}
#[test]
fn cli_parses_conflict_matrix_command() {
let cli = parse_cli([
"tsift",
"conflict-matrix",
"--path",
"tasks/software/tsift.md",
"--depth",
"4",
"--limit",
"12",
"--impact-limit",
"6",
"--json",
"pwcm",
"#g6kf",
]);
match cli.command {
Some(Commands::ConflictMatrix {
targets,
path,
depth,
limit,
impact_limit,
json,
..
}) => {
assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(depth, 4);
assert_eq!(limit, 12);
assert_eq!(impact_limit, 6);
assert!(json);
}
_ => panic!("expected ConflictMatrix command"),
}
}
#[test]
fn cli_parses_dispatch_trace_command() {
let cli = parse_cli([
"tsift",
"dispatch-trace",
"--path",
"tasks/software/tsift.md",
"--format",
"html",
"--depth",
"4",
"pwcm",
"#g6kf",
]);
match cli.command {
Some(Commands::DispatchTrace {
targets,
path,
format,
depth,
..
}) => {
assert_eq!(targets, vec!["pwcm".to_string(), "#g6kf".to_string()]);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(format, DispatchTraceFormat::Html);
assert_eq!(depth, 4);
}
_ => panic!("expected DispatchTrace command"),
}
}
#[test]
fn cli_parses_dependency_dag_command() {
let cli = parse_cli([
"tsift",
"dependency-dag",
"--path",
"tasks/software/tsift.md",
"--depth",
"5",
"--limit",
"20",
"--json",
"alpha",
"#beta",
]);
match cli.command {
Some(Commands::DependencyDag {
targets,
path,
depth,
limit,
json,
..
}) => {
assert_eq!(targets, vec!["alpha".to_string(), "#beta".to_string()]);
assert_eq!(path, PathBuf::from("tasks/software/tsift.md"));
assert_eq!(depth, 5);
assert_eq!(limit, 20);
assert!(json);
}
_ => panic!("expected DependencyDag command"),
}
}
#[test]
fn relativize_strips_root_prefix() {
let root = std::path::Path::new("/home/user/project");
assert_eq!(
relativize("/home/user/project/src/main.rs", root),
"src/main.rs"
);
}
#[test]
fn relativize_leaves_non_matching_path() {
let root = std::path::Path::new("/home/user/project");
assert_eq!(
relativize("/other/path/file.rs", root),
"/other/path/file.rs"
);
}
#[test]
fn relativize_leaves_already_relative() {
let root = std::path::Path::new("/home/user/project");
assert_eq!(relativize("src/main.rs", root), "src/main.rs");
}
#[test]
fn relativize_pathbuf_strips_prefix() {
let root = std::path::Path::new("/home/user/project");
let path = std::path::Path::new("/home/user/project/src/lib.rs");
assert_eq!(relativize_pathbuf(path, root), PathBuf::from("src/lib.rs"));
}
#[test]
fn relativize_edges_strips_caller_file() {
let root = std::path::Path::new("/tmp/proj");
let mut edges = vec![index::StoredEdge {
caller_file: "/tmp/proj/src/main.rs".to_string(),
caller_name: "main".to_string(),
caller_line: 1,
callee_name: "helper".to_string(),
call_site_line: 5,
tagpath_handle: None,
}];
relativize_edges(&mut edges, root);
assert_eq!(edges[0].caller_file, "src/main.rs");
}
#[test]
fn relativize_json_paths_strips_known_keys() {
let root = std::path::Path::new("/tmp/proj");
let mut val = serde_json::json!({
"file": "/tmp/proj/src/main.rs",
"path": "/tmp/proj/test.rs",
"name": "/tmp/proj/not-a-path",
"hits": [{"path": "/tmp/proj/nested.rs", "score": 1.0}]
});
relativize_json_paths(&mut val, root);
assert_eq!(val["file"], "src/main.rs");
assert_eq!(val["path"], "test.rs");
assert_eq!(val["name"], "/tmp/proj/not-a-path");
assert_eq!(val["hits"][0]["path"], "nested.rs");
}
// --- limit caps ---
#[test]
fn cli_graph_accepts_limit_flag() {
let cli = parse_cli(["tsift", "graph", "main", "--limit", "5"]);
match cli.command {
Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 5),
_ => panic!("expected Graph command"),
}
}
#[test]
fn cli_graph_default_limit_is_20() {
let cli = parse_cli(["tsift", "graph", "main"]);
match cli.command {
Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 20),
_ => panic!("expected Graph command"),
}
}
#[test]
fn cli_communities_accepts_limit_flag() {
let cli = parse_cli(["tsift", "communities", "--limit", "3"]);
match cli.command {
Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 3),
_ => panic!("expected Communities command"),
}
}
#[test]
fn cli_communities_default_limit_is_10() {
let cli = parse_cli(["tsift", "communities"]);
match cli.command {
Some(Commands::Communities { limit, .. }) => assert_eq!(limit, 10),
_ => panic!("expected Communities command"),
}
}
#[test]
fn cli_explain_accepts_limit_flag() {
let cli = parse_cli(["tsift", "explain", "main", "--limit", "7"]);
match cli.command {
Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 7),
_ => panic!("expected Explain command"),
}
}
#[test]
fn cli_explain_default_limit_is_15() {
let cli = parse_cli(["tsift", "explain", "main"]);
match cli.command {
Some(Commands::Explain { limit, .. }) => assert_eq!(limit, 15),
_ => panic!("expected Explain command"),
}
}
#[test]
fn cli_limit_zero_means_unlimited() {
let cli = parse_cli(["tsift", "graph", "main", "--limit", "0"]);
match cli.command {
Some(Commands::Graph { limit, .. }) => assert_eq!(limit, 0),
_ => panic!("expected Graph command"),
}
}
#[test]
fn graph_cmd_limit_runs_ok() {
let dir = setup_graph_index();
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
1,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_unlimited_runs_ok() {
let dir = setup_graph_index();
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
0,
false,
false,
false,
false,
false,
false,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn graph_cmd_tabular_runs_ok() {
let dir = setup_graph_index();
let result = cmd_graph(
"main",
dir.path(),
false,
false,
None,
20,
false,
false,
false,
false,
false,
true,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn communities_cmd_tabular_runs_ok() {
let dir = setup_graph_index();
let result = cmd_communities(
dir.path(),
None,
1,
10,
false,
false,
false,
false,
true,
false,
TagpathSearchOpts::default(),
);
assert!(result.is_ok());
}
#[test]
fn explain_cmd_tabular_runs_ok() {
let dir = setup_graph_index();
let result = cmd_explain(
"main",
dir.path(),
None,
15,
false,
false,
false,
false,
false,
true,
false,
);
assert!(result.is_ok());
}
#[test]
fn traversal_excludes_agent_doc_runtime_paths_from_source_watermark() {
// #gdbcacheprove: .agent-doc runtime markdown (snapshots, baselines, archives,
// session docs, runtime logs) must not contribute to the source watermark, or
// every agent-doc cycle would invalidate the graph-db backend-eval cache and
// force a full rebuild on the next run.
let cases = [
".agent-doc",
".agent-doc/snapshots/abc.md",
".agent-doc/baselines/abc.md",
".agent-doc/archives/2026.md",
".agent-doc/runtime/run.jsonl",
"src/foo/.agent-doc",
"src/foo/.agent-doc/snapshots/x.md",
"./.agent-doc/snapshots/x.md",
];
for path in cases {
assert!(
traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be excluded from source watermark"
);
}
// Real source paths must NOT be excluded.
for path in [
"src/main.rs",
"tests/perf_gate.rs",
"fixtures/x.json",
"agent-doc/src/lib.rs", // sibling dir without the leading dot
"src/.agent-doc-helper.rs",
] {
assert!(
!traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be included in source watermark"
);
}
}
#[test]
fn traversal_excludes_tsift_and_target_runtime_paths_from_source_watermark() {
// #cachelookupshift: the conflict-matrix preparation cache key hashes
// file_state snapshot rows + every markdown file under the root. Any
// .tsift/, target/, or .agent-doc/ path slipping past the filter would
// shift the watermark every run because those directories mutate as a
// side effect of running tsift itself. This test locks the artifact
// filter against regressions for each prefix variant
// (bare, root-anchored, nested, and './' leading).
let cases = [
".tsift",
".tsift/index.db",
".tsift/indexes/foo/index.db",
".tsift/conflict-matrix-cache/inputs/abc.json",
".tsift/summaries.db",
"src/foo/.tsift",
"src/foo/.tsift/graph.db",
"./.tsift/index.db",
"target",
"target/debug/build/x",
"target/release/tsift",
"src/foo/target/debug/x",
"./target/release/x",
];
for path in cases {
assert!(
traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be excluded from source watermark"
);
}
// Look-alike paths must NOT be excluded — only true artifact dirs.
for path in [
"src/ctx-core-dev/lib/a__target/CHANGELOG.md",
"src/ctx-core-dev/lib/a__target/A__Target/index.d.ts",
"src/tsift-extras/lib.rs",
"tsift/README.md",
"src/targeting.rs",
"src/.tsiftrc",
"src/agent-doc-helper.rs",
] {
assert!(
!traversal_relative_path_is_generated_artifact(path),
"expected `{path}` to be included in source watermark"
);
}
}
#[test]
fn traversal_source_watermark_is_stable_across_invocations_on_quiescent_root() {
// #cachelookupshift: the conflict-matrix preparation cache only hits
// when traversal_source_watermark returns the same hash for two
// consecutive calls on identical source state. Lock that invariant so
// a future change that folds wall-clock time, a directory mtime, or
// any other non-content input into the hash trips this test before
// regressing the preparation_cache_lookup hit rate. We exercise the
// session_only=true path with a hinted markdown file so the test does
// not need a full index DB to drive the index-snapshot branch.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/main.rs"), "fn main() {}\n").unwrap();
let hint = root.join("README.md");
std::fs::write(&hint, "# stable\n").unwrap();
// Add a generated-artifact directory that must NOT affect the watermark.
std::fs::create_dir_all(root.join(".tsift")).unwrap();
std::fs::write(root.join(".tsift/index.db"), b"placeholder").unwrap();
std::fs::create_dir_all(root.join("target/debug")).unwrap();
std::fs::write(root.join("target/debug/marker"), b"placeholder").unwrap();
let first = traversal_source_watermark(root, &hint, None, true)
.expect("first watermark call must succeed")
.expect("first watermark must produce a hash for hinted markdown");
let second = traversal_source_watermark(root, &hint, None, true)
.expect("second watermark call must succeed")
.expect("second watermark must produce a hash for hinted markdown");
assert_eq!(
first, second,
"watermark must be identical across back-to-back invocations on a quiescent root"
);
// Mutating a generated-artifact file must NOT shift the hash.
std::fs::write(root.join(".tsift/index.db"), b"changed").unwrap();
std::fs::write(root.join("target/debug/marker"), b"changed").unwrap();
let third = traversal_source_watermark(root, &hint, None, true)
.expect("third watermark call must succeed")
.expect("third watermark must produce a hash for hinted markdown");
assert_eq!(
first, third,
"watermark must ignore mutations under .tsift/ and target/"
);
// Mutating the hinted markdown file MUST shift the hash so the
// preparation cache invalidates correctly when user state changes.
// Sleep briefly to push the file mtime past the original even on
// coarse-resolution filesystems.
std::thread::sleep(std::time::Duration::from_millis(20));
std::fs::write(&hint, "# stable edited with longer content\n").unwrap();
let fourth = traversal_source_watermark(root, &hint, None, true)
.expect("fourth watermark call must succeed")
.expect("fourth watermark must produce a hash for hinted markdown");
assert_ne!(
first, fourth,
"watermark must invalidate when the hinted markdown file changes"
);
}
#[test]
fn traversal_source_watermark_uses_summary_rows_not_summaries_db_metadata() {
// #gcachemiss: full-projection cache keys must not miss just because the
// SQLite summary cache file header or mtime churned. Only the semantic rows
// that feed traversal projection should participate in the source watermark.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(root.join("README.md"), "# stable\n").unwrap();
let summaries_db_path = root.join(".tsift/summaries.db");
let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
let mut summary = summarize::Summary {
id: 0,
symbol_name: "main".to_string(),
file_path: "src/main.rs".to_string(),
content_hash: "hash-main".to_string(),
summary: "main wires the CLI".to_string(),
entities: Some(vec![summarize::Entity {
name: "Cli".to_string(),
kind: "type".to_string(),
description: "Command-line interface".to_string(),
}]),
relationships: None,
concept_labels: Some(vec!["cli".to_string()]),
extracted_at: "1700000000".to_string(),
model: "test-model".to_string(),
tokens_input: Some(10),
tokens_output: Some(5),
};
summary_db.insert(&summary).unwrap();
drop(summary_db);
let hint = root.join("README.md");
let first = traversal_source_watermark(root, &hint, None, true)
.expect("first watermark call must succeed")
.expect("first watermark must produce a hash");
std::thread::sleep(std::time::Duration::from_millis(20));
let conn = Connection::open(&summaries_db_path).unwrap();
conn.pragma_update(None, "user_version", 1).unwrap();
conn.pragma_update(None, "user_version", 0).unwrap();
drop(conn);
let second = traversal_source_watermark(root, &hint, None, true)
.expect("second watermark call must succeed")
.expect("second watermark must produce a hash");
assert_eq!(
first, second,
"metadata-only summaries.db churn must not invalidate the source watermark"
);
summary.entities = Some(vec![summarize::Entity {
name: "GraphCache".to_string(),
kind: "type".to_string(),
description: "Stable full-projection cache input".to_string(),
}]);
let summary_db = summarize::SummaryDb::open(&summaries_db_path).unwrap();
summary_db.delete_by_file("src/main.rs").unwrap();
summary_db.insert(&summary).unwrap();
drop(summary_db);
let third = traversal_source_watermark(root, &hint, None, true)
.expect("third watermark call must succeed")
.expect("third watermark must produce a hash");
assert_ne!(
first, third,
"semantic summary row changes must invalidate the source watermark"
);
}
#[test]
fn full_projection_source_watermark_ignores_source_mtime_when_index_rows_unchanged() {
// #gfullhot: backend-eval full-projection cache keys should be based on
// the indexed graph inputs, not file_state mtimes. Touching a source file
// without changing extracted symbols/call edges must still hit the cache.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::create_dir_all(root.join(".tsift")).unwrap();
let source = root.join("src/lib.rs");
let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.rebuild(root).unwrap();
drop(db);
let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
std::thread::sleep(std::time::Duration::from_millis(20));
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.apply_changes(root).unwrap();
drop(db);
let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
assert_eq!(
first, second,
"mtime-only source index churn must not invalidate the full-projection cache"
);
}
#[test]
fn full_projection_source_watermark_ignores_session_markdown_churn() {
// #gfullhot: the full-projection performance cache isolates code graph
// and semantic-summary inputs. Current session evidence is measured by
// the bounded real dataset, so unrelated task-doc edits must not force a
// million-row full-projection rebuild.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::create_dir_all(root.join("tasks/software")).unwrap();
std::fs::create_dir_all(root.join(".tsift")).unwrap();
std::fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\n").unwrap();
let task_doc = root.join("tasks/software/tsift.md");
std::fs::write(
&task_doc,
"---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Initial item\n",
)
.unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.rebuild(root).unwrap();
drop(db);
let first = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
std::fs::write(
&task_doc,
"---\nagent_doc_session: tsift-v0.1\n---\n\n## Backlog\n\n- [ ] [#one] Edited item\n",
)
.unwrap();
let second = graph_db_backend_eval_full_projection_source_watermark(root, None)
.unwrap()
.value;
assert_eq!(
first, second,
"session markdown churn must not invalidate the full-projection code/summary cache"
);
}
#[test]
fn full_projection_cache_hit_skips_provider_neutral_rebuild_after_mtime_churn() {
// #gfullhot: once a full-project projection is cached, repeated samples
// with unchanged graph inputs must report zero source_graph_build and
// projection_rows work even if indexed file mtimes changed.
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::create_dir_all(root.join(".tsift")).unwrap();
let source = root.join("src/lib.rs");
let source_body = "pub fn alpha() { beta(); }\npub fn beta() {}\n";
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.rebuild(root).unwrap();
drop(db);
let (_projection, _warnings, _phases, first_stats) =
graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
assert!(
!first_stats.hit,
"the first full-projection run should populate the cache"
);
std::thread::sleep(std::time::Duration::from_millis(20));
std::fs::write(&source, source_body).unwrap();
let db = index::IndexDb::open(&root.join(".tsift/index.db")).unwrap();
db.apply_changes(root).unwrap();
drop(db);
let (_projection, _warnings, phases, second_stats) =
graph_db_backend_eval_full_projection_with_profile(root, None).unwrap();
assert!(second_stats.hit, "mtime-only churn should still cache-hit");
let source_graph_build = phases
.iter()
.find(|phase| phase.name == "full_projection.source_graph_build")
.expect("cache hit must report source_graph_build");
let projection_rows = phases
.iter()
.find(|phase| phase.name == "full_projection.projection_rows")
.expect("cache hit must report projection_rows");
assert_eq!(source_graph_build.duration_micros, 0);
assert_eq!(projection_rows.duration_micros, 0);
}
}
// --- SQL introspection ---
#[derive(Serialize)]
struct TableInfo {
name: String,
columns: Vec<ColumnInfo>,
row_count: i64,
}
#[derive(Serialize)]
struct ColumnInfo {
name: String,
#[serde(rename = "type")]
col_type: String,
notnull: bool,
pk: bool,
#[serde(skip_serializing_if = "Option::is_none")]
default_value: Option<String>,
}
/// Open a SQLite connection (read-only).
pub(crate) fn open_db(path: &std::path::Path) -> Result<Connection> {
let conn = Connection::open_with_flags(
path,
rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
)
.with_context(|| format!("opening database: {}", path.display()))?;
Ok(conn)
}
/// List all user tables with column metadata and row counts.
pub(crate) fn schema_overview(conn: &Connection) -> Result<Vec<TableInfo>> {
let mut stmt = conn.prepare(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name",
)?;
let table_names: Vec<String> = stmt
.query_map([], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let mut tables = Vec::new();
for tbl in table_names {
let columns = table_columns(conn, &tbl)?;
let row_count: i64 =
conn.query_row(&format!("SELECT COUNT(*) FROM \"{}\"", tbl), [], |row| {
row.get(0)
})?;
tables.push(TableInfo {
name: tbl,
columns,
row_count,
});
}
Ok(tables)
}
/// Get column metadata for a single table.
pub(crate) fn table_columns(conn: &Connection, table: &str) -> Result<Vec<ColumnInfo>> {
let mut stmt = conn.prepare(&format!("PRAGMA table_info(\"{}\")", table))?;
let cols = stmt
.query_map([], |row| {
Ok(ColumnInfo {
name: row.get(1)?,
col_type: row.get::<_, String>(2).unwrap_or_default(),
notnull: row.get::<_, bool>(3).unwrap_or(false),
pk: row.get::<_, i32>(5).unwrap_or(0) > 0,
default_value: row.get(4)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(cols)
}
/// Execute an arbitrary SQL query and return rows as JSON values.
pub(crate) fn execute_query(
conn: &Connection,
sql: &str,
) -> Result<(Vec<String>, Vec<Vec<serde_json::Value>>)> {
let mut stmt = conn.prepare(sql).context("preparing SQL query")?;
let col_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
let col_count = col_names.len();
let mut rows = Vec::new();
let mut query_rows = stmt.query([])?;
while let Some(row) = query_rows.next()? {
let mut vals = Vec::with_capacity(col_count);
for i in 0..col_count {
let val = match row.get_ref(i)? {
rusqlite::types::ValueRef::Null => serde_json::Value::Null,
rusqlite::types::ValueRef::Integer(n) => serde_json::json!(n),
rusqlite::types::ValueRef::Real(f) => serde_json::json!(f),
rusqlite::types::ValueRef::Text(s) => {
serde_json::Value::String(String::from_utf8_lossy(s).into_owned())
}
rusqlite::types::ValueRef::Blob(b) => {
serde_json::Value::String(format!("<blob {} bytes>", b.len()))
}
};
vals.push(val);
}
rows.push(vals);
}
Ok((col_names, rows))
}
// --- Command rewriting for hook integrations and manual bounded execution ---
#[derive(Clone, Copy)]
struct OutputCap {
max_lines: usize,
strip_prefix: Option<&'static str>,
}
pub(crate) fn execute_rewritten_command(command: &str) -> Result<i32> {
let effective_command = effective_rewrite_run_command(command);
let parts = shell_split(&effective_command);
let Some(program) = parts.first().map(|part| strip_shell_quotes(part)) else {
bail!("rewritten command was empty");
};
let args: Vec<String> = parts[1..]
.iter()
.map(|part| strip_shell_quotes(part).to_string())
.collect();
let mut command = if program == "tsift" {
Command::new(std::env::current_exe().context("resolving current tsift executable")?)
} else {
Command::new(program)
};
let output = command
.args(&args)
.output()
.with_context(|| format!("executing rewritten command `{effective_command}`"))?;
let stdout = if let Some(cap) = rewrite_output_cap(&effective_command) {
apply_output_cap(&output.stdout, cap)
} else {
String::from_utf8_lossy(&output.stdout).into_owned()
};
if !stdout.is_empty() {
print!("{stdout}");
}
if !output.stderr.is_empty() {
eprint!("{}", String::from_utf8_lossy(&output.stderr));
}
Ok(output
.status
.code()
.unwrap_or_else(|| if output.status.success() { 0 } else { 1 }))
}
fn effective_rewrite_run_command(command: &str) -> String {
let parts = shell_split(command);
if parts.first().map(|part| strip_shell_quotes(part)) != Some("tsift") {
return command.to_string();
}
let structured = parts
.iter()
.skip(1)
.any(|part| strip_shell_quotes(part) == "--timeout");
let subcommand = parts
.iter()
.skip(1)
.map(|part| strip_shell_quotes(part))
.find(|part| !part.starts_with('-'));
if matches!(subcommand, Some("search")) && !structured {
format!("{command} --timeout 0")
} else {
command.to_string()
}
}
pub(crate) fn apply_rewrite_output_format(command: &str, format: OutputFormat) -> String {
let trimmed = command.trim_start();
let Some(rest) = trimmed.strip_prefix("tsift") else {
return command.to_string();
};
let existing_parts = shell_split(rest);
let mut flags = Vec::new();
if format.compact && !rewrite_has_global_flag(&existing_parts, "--compact") {
flags.push("--compact");
}
if format.pretty && !rewrite_has_global_flag(&existing_parts, "--pretty") {
flags.push("--pretty");
}
if format.terse && !rewrite_has_global_flag(&existing_parts, "--terse") {
flags.push("--terse");
}
if format.schema && !rewrite_has_global_flag(&existing_parts, "--schema") {
flags.push("--schema");
}
if format.envelope {
if !rewrite_has_global_flag(&existing_parts, "--envelope") {
flags.push("--envelope");
}
} else if format.json_output
&& !rewrite_has_global_flag(&existing_parts, "--json")
&& !rewrite_has_global_flag(&existing_parts, "--envelope")
{
flags.push("--json");
}
if flags.is_empty() {
return command.to_string();
}
let forwarded = flags.join(" ");
if rest.trim().is_empty() {
format!("tsift {forwarded}")
} else {
format!("tsift {forwarded}{rest}")
}
}
fn rewrite_has_global_flag(parts: &[&str], flag: &str) -> bool {
parts
.iter()
.take_while(|part| {
let value = strip_shell_quotes(part);
value.starts_with('-') || value == "tsift"
})
.any(|part| strip_shell_quotes(part) == flag)
}
fn rewrite_output_cap(command: &str) -> Option<OutputCap> {
let parts = shell_split(command);
if strip_shell_quotes(parts.first()?) != "tsift" {
return None;
}
let structured = parts.iter().skip(1).any(|part| {
matches!(
strip_shell_quotes(part),
"--json" | "--terse" | "--schema" | "--tabular" | "--envelope"
)
});
if structured {
return None;
}
let subcommand = parts
.iter()
.skip(1)
.map(|part| strip_shell_quotes(part))
.find(|part| !part.starts_with('-'))?;
match subcommand {
"communities" => Some(OutputCap {
max_lines: 80,
strip_prefix: None,
}),
"explain" => Some(OutputCap {
max_lines: 40,
strip_prefix: None,
}),
"graph" => Some(OutputCap {
max_lines: 50,
strip_prefix: None,
}),
"index" => Some(OutputCap {
max_lines: 30,
strip_prefix: None,
}),
"search" => Some(OutputCap {
max_lines: 50,
strip_prefix: Some("Strategy:"),
}),
_ => None,
}
}
fn apply_output_cap(stdout: &[u8], cap: OutputCap) -> String {
let cleaned = strip_ansi_codes(&String::from_utf8_lossy(stdout));
let mut lines: Vec<String> = cleaned
.lines()
.map(str::trim_end)
.filter(|line| !line.trim().is_empty())
.filter(|line| {
cap.strip_prefix
.map(|prefix| !line.starts_with(prefix))
.unwrap_or(true)
})
.map(ToOwned::to_owned)
.collect();
if lines.len() > cap.max_lines {
let hidden = lines.len() - cap.max_lines;
lines.truncate(cap.max_lines);
lines.push(format!(
"... (+{hidden} more lines; rerun the underlying tsift command directly for the full output)"
));
}
if lines.is_empty() {
String::new()
} else {
format!("{}\n", lines.join("\n"))
}
}
fn strip_ansi_codes(input: &str) -> String {
let mut output = String::with_capacity(input.len());
let mut chars = input.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\u{1b}' && matches!(chars.peek(), Some('[')) {
chars.next();
for next in chars.by_ref() {
if ('@'..='~').contains(&next) {
break;
}
}
continue;
}
output.push(ch);
}
output
}
/// Attempt to rewrite a shell command to use tsift.
/// Returns Some(rewritten) if applicable, None if no match.
///
/// `pub` (not `pub(crate)`) so the `tsift-sim-world` test-harness crate can
/// exercise the rewrite surface as a dev-dependency.
pub fn rewrite_command(command: &str) -> Option<String> {
let trimmed = command.trim();
// Already a tsift command — pass through (exit 0, identical)
if trimmed.starts_with("tsift ") || trimmed == "tsift" {
return Some(command.to_string());
}
// rg <pattern> [path] [flags] → tsift search "<pattern>" --exact [--path <path>]
if let Some(rewritten) = rewrite_rg(trimmed) {
return Some(rewritten);
}
// grep -r <pattern> [path] → tsift search "<pattern>" --exact [--path <path>]
if let Some(rewritten) = rewrite_grep(trimmed) {
return Some(rewritten);
}
// git diff / git show / patch-style history → tsift diff-digest
if let Some(rewritten) = rewrite_git_diff(trimmed) {
return Some(rewritten);
}
if let Some(rewritten) = rewrite_git_show(trimmed) {
return Some(rewritten);
}
if let Some(rewritten) = rewrite_git_patch_history(trimmed) {
return Some(rewritten);
}
// long session/doc transcript reads → tsift session-digest
if let Some(rewritten) = rewrite_session_read_command(trimmed) {
return Some(rewritten);
}
// large source-file reads inside indexed repos → tsift source-read windows
if let Some(rewritten) = rewrite_source_read_command(trimmed) {
return Some(rewritten);
}
// cargo test / pytest → tsift-owned test digest wrapper that preserves exit status
if let Some(rewritten) = rewrite_test_command(trimmed) {
return Some(rewritten);
}
// verbose build/check/install commands → tsift-owned log digest wrapper
if let Some(rewritten) = rewrite_log_command(trimmed) {
return Some(rewritten);
}
None
}
pub(crate) fn no_rewrite_message(command: &str, run: bool) -> String {
let trimmed = command.trim();
let parts = shell_split(trimmed);
let reason = if trimmed.is_empty() {
"empty command"
} else if has_shell_metacharacters(trimmed) {
"shell metacharacters such as pipes, redirection, or background operators are not rewritten"
} else if is_file_listing_command(&parts) {
"file-listing commands keep original shell/find/rg semantics"
} else {
"no supported tsift rewrite matched this command"
};
let action = if run {
"`--run` executes only rewritten commands; run the original command directly if intended"
} else {
"run the original command unchanged"
};
format!("tsift rewrite: no rewrite: {reason}; {action}")
}
fn is_file_listing_command(parts: &[&str]) -> bool {
match parts.first().copied() {
Some("find") => true,
Some("rg") => parts
.iter()
.skip(1)
.any(|part| matches!(*part, "--files" | "--type-list")),
_ => false,
}
}
/// Rewrite `rg` (ripgrep) commands to tsift search.
fn rewrite_rg(cmd: &str) -> Option<String> {
let parts: Vec<&str> = shell_split(cmd);
if parts.is_empty() || parts[0] != "rg" {
return None;
}
// File-listing forms do not have a search pattern. Leave them to the
// original command so roots, globs, and ignore rules keep rg semantics.
if is_file_listing_command(&parts) {
return None;
}
// Skip if rg is used with complex flags we can't translate
// (pipe chains, output redirection, --replace, --count, etc.)
if cmd.contains('|')
|| cmd.contains('>')
|| cmd.contains("--replace")
|| cmd.contains("--count")
|| cmd.contains("-c")
|| cmd.contains("--files-with-matches")
|| cmd.contains("--files-without-match")
|| cmd.contains("-l")
{
return None;
}
// Extract the pattern (first non-flag argument after rg)
let mut pattern = None;
let mut path = None;
let mut skip_next = false;
for part in &parts[1..] {
if skip_next {
skip_next = false;
continue;
}
// Flags that take a value
if matches!(
*part,
"-t" | "--type"
| "-g"
| "--glob"
| "-A"
| "-B"
| "-C"
| "--max-count"
| "--max-depth"
| "-m"
| "-e"
) {
skip_next = true;
continue;
}
// Skip standalone flags
if part.starts_with('-') {
continue;
}
// First positional = pattern, second = path
if pattern.is_none() {
pattern = Some(*part);
} else if path.is_none() {
path = Some(*part);
}
}
Some(build_agent_search_preview_command(pattern?, path))
}
/// Rewrite `grep -r` commands to tsift search.
fn rewrite_grep(cmd: &str) -> Option<String> {
let parts: Vec<&str> = shell_split(cmd);
if parts.is_empty() || parts[0] != "grep" {
return None;
}
// Only rewrite recursive grep
let has_recursive = parts.iter().any(|p| {
*p == "-r"
|| *p == "-R"
|| *p == "--recursive"
|| p.contains('r') && p.starts_with('-') && !p.starts_with("--")
});
if !has_recursive {
return None;
}
// Skip pipe chains
if cmd.contains('|') || cmd.contains('>') {
return None;
}
let mut pattern = None;
let mut path = None;
let mut skip_next = false;
for part in &parts[1..] {
if skip_next {
skip_next = false;
continue;
}
if matches!(*part, "--include" | "--exclude" | "--exclude-dir" | "-e") {
skip_next = true;
continue;
}
if part.starts_with('-') {
continue;
}
if pattern.is_none() {
pattern = Some(*part);
} else if path.is_none() {
path = Some(*part);
}
}
Some(build_agent_search_preview_command(pattern?, path))
}
fn build_agent_search_preview_command(pattern: &str, path: Option<&str>) -> String {
let mut result = format!(
"tsift --envelope search {} --exact --budget normal",
shell_quote(pattern)
);
if let Some(p) = path {
result.push_str(&format!(" --path {}", shell_quote(p)));
}
result
}
fn rewrite_git_diff(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let parts: Vec<&str> = shell_split(cmd);
if parts.len() < 2 || parts[0] != "git" || parts[1] != "diff" {
return None;
}
let mut cached = false;
let mut path = None;
let mut after_double_dash = false;
for part in &parts[2..] {
if after_double_dash {
if path.is_none() && !part.starts_with('-') {
path = Some(*part);
continue;
}
return None;
}
match *part {
"--cached" | "--staged" => cached = true,
"--" => after_double_dash = true,
raw if looks_like_path_selector(raw) => {
if path.replace(raw).is_some() {
return None;
}
}
_ => return None,
}
}
Some(build_diff_digest_command(path.unwrap_or("."), cached, None))
}
fn rewrite_git_show(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let parts: Vec<&str> = shell_split(cmd);
if parts.len() < 2 || parts[0] != "git" || parts[1] != "show" {
return None;
}
let mut revision = "HEAD";
let mut path = None;
let mut after_double_dash = false;
for part in &parts[2..] {
if after_double_dash {
if path.is_none() && !part.starts_with('-') {
path = Some(*part);
continue;
}
return None;
}
match *part {
"--" => after_double_dash = true,
"-p" | "--patch" | "--stat" => {}
raw if raw.starts_with("--format=") => {}
raw if !raw.starts_with('-') => {
if revision != "HEAD" {
return None;
}
revision = raw;
}
_ => return None,
}
}
Some(build_diff_digest_command(
path.unwrap_or("."),
false,
Some(revision),
))
}
fn rewrite_git_patch_history(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let parts: Vec<&str> = shell_split(cmd);
if parts.len() < 2 || parts[0] != "git" || parts[1] != "log" {
return None;
}
let mut saw_patch = false;
let mut saw_single_commit = false;
let mut revision = "HEAD";
let mut path = None;
let mut after_double_dash = false;
let mut skip_next = false;
for part in &parts[2..] {
if skip_next {
skip_next = false;
if *part == "1" {
saw_single_commit = true;
continue;
}
return None;
}
if after_double_dash {
if path.is_none() && !part.starts_with('-') {
path = Some(*part);
continue;
}
return None;
}
match *part {
"--" => after_double_dash = true,
"-p" | "--patch" => saw_patch = true,
"-1" | "-n1" | "--max-count=1" => saw_single_commit = true,
"-n" | "--max-count" => skip_next = true,
raw if !raw.starts_with('-') => {
if revision != "HEAD" {
return None;
}
revision = raw;
}
_ => return None,
}
}
if !saw_patch || !saw_single_commit {
return None;
}
Some(build_diff_digest_command(
path.unwrap_or("."),
false,
Some(revision),
))
}
fn build_diff_digest_command(path: &str, cached: bool, revision: Option<&str>) -> String {
let mut result = "tsift diff-digest".to_string();
if cached {
result.push_str(" --cached");
}
if let Some(revision) = revision {
result.push_str(&format!(" --revision {}", shell_quote(revision)));
}
if path == "." {
result.push_str(" .");
} else {
result.push_str(&format!(" {}", shell_quote(path)));
}
result
}
const SESSION_READ_LINE_THRESHOLD: usize = 80;
const SOURCE_READ_LINE_THRESHOLD: usize = 80;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FileReadWindow {
FullFile,
FromStart { lines: usize },
FromEnd { lines: usize },
Range { start: usize, lines: usize },
}
struct FileReadTarget {
input: String,
requested_lines: Option<usize>,
window: FileReadWindow,
}
fn rewrite_session_read_command(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let target = parse_file_read_target(cmd)?;
let input_path = Path::new(&target.input);
let source = detect_session_digest_source(input_path)?;
if let Some(requested_lines) = target.requested_lines {
if requested_lines < SESSION_READ_LINE_THRESHOLD {
return None;
}
} else if !file_has_at_least_lines(input_path, SESSION_READ_LINE_THRESHOLD) {
return None;
}
let digest_path = resolve_digest_context_path(input_path);
Some(build_session_digest_command(
&digest_path,
&target.input,
source,
))
}
fn rewrite_source_read_command(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let target = parse_file_read_target(cmd)?;
let input_path = Path::new(&target.input);
if !file_is_supported_source(input_path) {
return None;
}
if let Some(requested_lines) = target.requested_lines {
if requested_lines < SOURCE_READ_LINE_THRESHOLD {
return None;
}
} else if !file_has_at_least_lines(input_path, SOURCE_READ_LINE_THRESHOLD) {
return None;
}
let root = lint::find_project_root_for_path(input_path).ok()??;
if !project_has_index(&root) {
return None;
}
let file_abs = input_path.canonicalize().ok()?;
let file_display = relativize_pathbuf(&file_abs, &root)
.to_string_lossy()
.to_string();
let total_lines = count_file_lines(&file_abs)?;
let (start, lines) = source_window_for_read(target.window, total_lines)?;
Some(build_source_read_rewrite_command(
&root,
&file_display,
start,
lines,
))
}
fn parse_file_read_target(cmd: &str) -> Option<FileReadTarget> {
let parts: Vec<&str> = shell_split(cmd);
let head = parts.first().copied()?;
match head {
"cat" | "bat" | "batcat" => parse_cat_like_read_target(&parts),
"head" | "tail" => parse_head_tail_read_target(&parts),
"sed" => parse_sed_read_target(&parts),
_ => None,
}
}
fn parse_cat_like_read_target(parts: &[&str]) -> Option<FileReadTarget> {
let mut input = None;
for part in &parts[1..] {
if part.starts_with('-') {
continue;
}
if input.replace(strip_shell_quotes(part)).is_some() {
return None;
}
}
Some(FileReadTarget {
input: input?.to_string(),
requested_lines: None,
window: FileReadWindow::FullFile,
})
}
fn parse_head_tail_read_target(parts: &[&str]) -> Option<FileReadTarget> {
let mut requested_lines = 10;
let mut input = None;
let mut index = 1;
while index < parts.len() {
let part = parts[index];
if part == "-n" || part == "--lines" {
index += 1;
requested_lines = parse_requested_line_count(parts.get(index).copied()?)?;
index += 1;
continue;
}
if let Some(raw) = part.strip_prefix("-n")
&& !raw.is_empty()
{
requested_lines = parse_requested_line_count(raw)?;
index += 1;
continue;
}
if let Some(raw) = part.strip_prefix("--lines=") {
requested_lines = parse_requested_line_count(raw)?;
index += 1;
continue;
}
if part.starts_with('-') && part[1..].chars().all(|ch| ch.is_ascii_digit()) {
requested_lines = parse_requested_line_count(&part[1..])?;
index += 1;
continue;
}
if input.replace(strip_shell_quotes(part)).is_some() {
return None;
}
index += 1;
}
let window = match parts[0] {
"head" => FileReadWindow::FromStart {
lines: requested_lines,
},
"tail" => FileReadWindow::FromEnd {
lines: requested_lines,
},
_ => return None,
};
Some(FileReadTarget {
input: input?.to_string(),
requested_lines: Some(requested_lines),
window,
})
}
fn parse_sed_read_target(parts: &[&str]) -> Option<FileReadTarget> {
if parts.len() != 4 || parts[1] != "-n" {
return None;
}
let (start, lines) = parse_sed_print_window(parts[2])?;
Some(FileReadTarget {
input: strip_shell_quotes(parts[3]).to_string(),
requested_lines: Some(lines),
window: FileReadWindow::Range { start, lines },
})
}
fn parse_requested_line_count(raw: &str) -> Option<usize> {
let trimmed = strip_shell_quotes(raw);
if let Some(number) = trimmed.strip_prefix('+') {
number.parse::<usize>().ok()?;
return Some(SESSION_READ_LINE_THRESHOLD);
}
trimmed.parse::<usize>().ok()
}
fn parse_sed_print_window(raw: &str) -> Option<(usize, usize)> {
let trimmed = strip_shell_quotes(raw);
let range = trimmed.strip_suffix('p')?;
let (start, end) = range.split_once(',')?;
let start = start.parse::<usize>().ok()?;
let end = end.parse::<usize>().ok()?;
(end >= start).then_some((start, end - start + 1))
}
fn file_is_supported_source(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.and_then(graph::lang::Lang::from_extension)
.is_some()
}
fn count_file_lines(path: &Path) -> Option<usize> {
let file = fs::File::open(path).ok()?;
Some(
BufReader::new(file)
.lines()
.filter(|line| line.is_ok())
.count(),
)
}
fn source_window_for_read(window: FileReadWindow, total_lines: usize) -> Option<(usize, usize)> {
if total_lines == 0 {
return None;
}
match window {
FileReadWindow::FullFile => Some((1, SOURCE_READ_LINE_THRESHOLD.min(total_lines))),
FileReadWindow::FromStart { lines } => Some((1, lines.min(total_lines))),
FileReadWindow::FromEnd { lines } => {
let bounded = lines.min(total_lines);
Some((total_lines - bounded + 1, bounded))
}
FileReadWindow::Range { start, lines } => {
if start == 0 || start > total_lines {
return None;
}
Some((start, lines.min(total_lines - start + 1)))
}
}
}
fn build_source_read_rewrite_command(
root: &Path,
file: &str,
start: usize,
lines: usize,
) -> String {
format!(
"tsift --envelope source-read {} --path {} --start {} --lines {} --budget normal",
shell_quote(file),
shell_quote(&root.to_string_lossy()),
start,
lines
)
}
fn project_has_index(root: &Path) -> bool {
let tsift_dir = root.join(".tsift");
tsift_dir.join("index.db").is_file() || directory_contains_index_db(&tsift_dir.join("indexes"))
}
fn directory_contains_index_db(path: &Path) -> bool {
let Ok(entries) = fs::read_dir(path) else {
return false;
};
for entry in entries.flatten() {
let path = entry.path();
if path.file_name().is_some_and(|name| name == "index.db") && path.is_file() {
return true;
}
if path.is_dir() && directory_contains_index_db(&path) {
return true;
}
}
false
}
fn detect_session_digest_source(path: &Path) -> Option<session_digest::SessionDigestSource> {
match path.extension().and_then(|ext| ext.to_str()) {
Some("md") if file_looks_like_agent_doc_session(path) => {
Some(session_digest::SessionDigestSource::Markdown)
}
Some("jsonl") if file_looks_like_claude_jsonl(path) => {
Some(session_digest::SessionDigestSource::ClaudeJsonl)
}
Some("jsonl") if file_looks_like_codex_jsonl(path) => {
Some(session_digest::SessionDigestSource::CodexJsonl)
}
Some("log") if file_looks_like_agent_doc_log(path) => {
Some(session_digest::SessionDigestSource::AgentDocLog)
}
_ => None,
}
}
fn file_looks_like_agent_doc_session(path: &Path) -> bool {
let prefix = match read_file_prefix(path, 16 * 1024) {
Some(prefix) => prefix,
None => return false,
};
prefix.contains("agent_doc_session:")
|| prefix.contains("<!-- agent:exchange")
|| prefix.contains("\n## Exchange")
}
fn file_looks_like_claude_jsonl(path: &Path) -> bool {
let prefix = match read_file_prefix(path, 16 * 1024) {
Some(prefix) => prefix,
None => return false,
};
prefix
.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.take(3)
.any(|line| {
let value = match serde_json::from_str::<serde_json::Value>(line) {
Ok(value) => value,
Err(_) => return false,
};
value.get("message").is_some()
|| value.get("role").is_some()
|| value.get("content").is_some()
})
}
fn file_looks_like_codex_jsonl(path: &Path) -> bool {
let prefix = match read_file_prefix(path, 16 * 1024) {
Some(prefix) => prefix,
None => return false,
};
prefix
.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.take(8)
.any(|line| {
let value = match serde_json::from_str::<serde_json::Value>(line) {
Ok(value) => value,
Err(_) => return false,
};
matches!(
value.get("type").and_then(serde_json::Value::as_str),
Some("session_meta" | "response_item" | "event_msg")
)
})
}
fn file_looks_like_agent_doc_log(path: &Path) -> bool {
let prefix = match read_file_prefix(path, 16 * 1024) {
Some(prefix) => prefix,
None => return false,
};
prefix
.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.take(8)
.all(|line| line.starts_with('[') && line.contains("] "))
}
fn read_file_prefix(path: &Path, max_bytes: usize) -> Option<String> {
let file = fs::File::open(path).ok()?;
let mut reader = BufReader::new(file);
let mut buffer = Vec::new();
reader
.by_ref()
.take(max_bytes as u64)
.read_to_end(&mut buffer)
.ok()?;
Some(String::from_utf8_lossy(&buffer).into_owned())
}
fn file_has_at_least_lines(path: &Path, min_lines: usize) -> bool {
let file = match fs::File::open(path) {
Ok(file) => file,
Err(_) => return false,
};
let reader = BufReader::new(file);
reader
.lines()
.take(min_lines)
.filter(|line| line.is_ok())
.count()
>= min_lines
}
fn build_session_digest_command(
path: &str,
input: &str,
source: session_digest::SessionDigestSource,
) -> String {
format!(
"tsift session-digest --path {} --input {} --source {}",
shell_quote(path),
shell_quote(input),
source.cli_arg()
)
}
fn resolve_digest_context_path(path: &Path) -> String {
lint::resolve_harness_root_or_canonical_path(path)
.map(|root| root.display().to_string())
.unwrap_or_else(|_| ".".to_string())
}
fn rewrite_test_command(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let parts: Vec<&str> = shell_split(cmd);
if parts.len() >= 2 && parts[0] == "cargo" && parts[1] == "test" {
return Some(build_digest_runner_command("test", ".", Some("cargo"), cmd));
}
if !parts.is_empty() && parts[0] == "pytest" {
return Some(build_digest_runner_command(
"test",
".",
Some("pytest"),
cmd,
));
}
if parts.len() >= 3 && parts[0] == "python" && parts[1] == "-m" && parts[2] == "pytest" {
return Some(build_digest_runner_command(
"test",
".",
Some("pytest"),
cmd,
));
}
None
}
fn rewrite_log_command(cmd: &str) -> Option<String> {
if has_shell_metacharacters(cmd) {
return None;
}
let parts: Vec<&str> = shell_split(cmd);
if parts.len() >= 2
&& parts[0] == "cargo"
&& matches!(parts[1], "build" | "check" | "clippy" | "install")
{
return Some(build_digest_runner_command("log", ".", None, cmd));
}
None
}
fn build_digest_runner_command(
kind: &str,
path: &str,
runner: Option<&str>,
shell_command: &str,
) -> String {
let mut result = format!(
"tsift --envelope __digest-runner --kind {} --path {} --shell-command {}",
shell_quote(kind),
shell_quote(path),
shell_quote(shell_command)
);
if let Some(runner) = runner {
result.push_str(&format!(" --runner {}", shell_quote(runner)));
}
result
}
fn has_shell_metacharacters(cmd: &str) -> bool {
cmd.contains('|') || cmd.contains('>') || cmd.contains('<') || cmd.contains('&')
}
fn strip_shell_quotes(s: &str) -> &str {
if s.len() >= 2
&& ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
{
&s[1..s.len() - 1]
} else {
s
}
}
fn looks_like_path_selector(raw: &str) -> bool {
raw.ends_with('/')
|| raw.starts_with("./")
|| raw.starts_with("../")
|| raw.contains('/')
|| raw.contains('.')
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DigestRunnerKind {
Test,
Log,
}
impl DigestRunnerKind {
fn parse(raw: &str) -> Result<Self> {
match raw.trim().to_ascii_lowercase().as_str() {
"test" => Ok(Self::Test),
"log" => Ok(Self::Log),
other => bail!("unsupported digest runner kind `{other}`; expected test or log"),
}
}
fn as_str(self) -> &'static str {
match self {
Self::Test => "test",
Self::Log => "log",
}
}
}
/// Simple shell word splitting (handles single and double quotes).
fn shell_split(s: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut i = 0;
let bytes = s.as_bytes();
while i < bytes.len() {
// Skip whitespace
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() {
break;
}
let start = i;
if bytes[i] == b'"' || bytes[i] == b'\'' {
let quote = bytes[i];
i += 1;
while i < bytes.len() && bytes[i] != quote {
i += 1;
}
if i < bytes.len() {
i += 1; // closing quote
}
} else {
while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
i += 1;
}
}
parts.push(&s[start..i]);
}
parts
}
/// Quote a string for shell if it contains special characters.
pub(crate) fn shell_quote(s: &str) -> String {
// Strip existing quotes
let unquoted =
if (s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')) {
&s[1..s.len() - 1]
} else {
s
};
if unquoted
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
{
format!("\"{}\"", unquoted)
} else {
format!(
"\"{}\"",
unquoted.replace('\\', "\\\\").replace('"', "\\\"")
)
}
}
fn empty_search_coverage() -> sift::SearchCoverageSnapshot {
sift::SearchCoverageSnapshot {
mode: sift::SearchCoverageMode::Sealed,
total_sector_count: 0,
mounted_sector_count: 0,
reused_sector_count: 0,
dirty_sector_count: 0,
completed_dirty_sector_count: 0,
rebuilding_sector_count: 0,
resumed_sector_count: 0,
active_rebuild: None,
}
}
fn aggregate_search_coverage(responses: &[sift::SearchResponse]) -> sift::SearchCoverageSnapshot {
let total_sector_count = responses
.iter()
.map(|response| response.coverage.total_sector_count)
.sum();
let mounted_sector_count = responses
.iter()
.map(|response| response.coverage.mounted_sector_count)
.sum();
let reused_sector_count = responses
.iter()
.map(|response| response.coverage.reused_sector_count)
.sum();
let dirty_sector_count = responses
.iter()
.map(|response| response.coverage.dirty_sector_count)
.sum();
let completed_dirty_sector_count = responses
.iter()
.map(|response| response.coverage.completed_dirty_sector_count)
.sum();
let rebuilding_sector_count = responses
.iter()
.map(|response| response.coverage.rebuilding_sector_count)
.sum();
let resumed_sector_count = responses
.iter()
.map(|response| response.coverage.resumed_sector_count)
.sum();
let mode = if dirty_sector_count == 0 && rebuilding_sector_count == 0 {
sift::SearchCoverageMode::Sealed
} else if completed_dirty_sector_count > 0
|| rebuilding_sector_count > 0
|| resumed_sector_count > 0
{
sift::SearchCoverageMode::Converging
} else {
sift::SearchCoverageMode::Frontier
};
sift::SearchCoverageSnapshot {
mode,
total_sector_count,
mounted_sector_count,
reused_sector_count,
dirty_sector_count,
completed_dirty_sector_count,
rebuilding_sector_count,
resumed_sector_count,
active_rebuild: responses
.iter()
.find_map(|response| response.coverage.active_rebuild.clone()),
}
}
fn empty_search_response(root: &Path, strategy: &str) -> sift::SearchResponse {
sift::SearchResponse {
strategy: strategy.to_string(),
root: root.display().to_string(),
indexed_artifacts: 0,
skipped_artifacts: 0,
coverage: empty_search_coverage(),
hits: Vec::new(),
}
}
fn absolutize_search_hit_paths(response: &mut sift::SearchResponse, search_root: &Path) {
for hit in &mut response.hits {
let path = Path::new(&hit.path);
if path.is_relative() {
hit.path = search_root.join(path).display().to_string();
}
}
}
fn merge_search_responses(
root: &Path,
strategy: &str,
limit: usize,
responses: Vec<sift::SearchResponse>,
) -> sift::SearchResponse {
let indexed_artifacts = responses
.iter()
.map(|response| response.indexed_artifacts)
.sum();
let skipped_artifacts = responses
.iter()
.map(|response| response.skipped_artifacts)
.sum();
let coverage = if responses.is_empty() {
empty_search_coverage()
} else {
aggregate_search_coverage(&responses)
};
let mut hits: Vec<sift::SearchHit> = responses
.into_iter()
.flat_map(|response| response.hits)
.collect();
hits.sort_by(|left, right| {
right
.score
.partial_cmp(&left.score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.path.cmp(&right.path))
.then_with(|| left.location.cmp(&right.location))
});
hits.truncate(limit);
for (rank, hit) in hits.iter_mut().enumerate() {
hit.rank = rank + 1;
}
sift::SearchResponse {
strategy: strategy.to_string(),
root: root.display().to_string(),
indexed_artifacts,
skipped_artifacts,
coverage,
hits,
}
}
pub(crate) fn federated_sift_search(
root: &Path,
cache_dir: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
strategy: &str,
) -> Result<sift::SearchResponse> {
let targets = resolve_search_index_targets(root, root, None, true)?;
if targets.is_empty() {
if config::Config::submodule_dirs(root)?.is_empty() {
return run_search_with_timeout(
root,
cache_dir,
query,
limit,
timeout_secs,
strategy,
&[],
);
}
return Ok(empty_search_response(root, strategy));
}
let mut responses = Vec::with_capacity(targets.len());
for target in &targets {
let mut response = run_search_with_timeout(
&target.source_root,
cache_dir,
query,
limit,
timeout_secs,
strategy,
std::slice::from_ref(target),
)?;
absolutize_search_hit_paths(&mut response, &target.source_root);
response.root = root.display().to_string();
responses.push(response);
}
Ok(merge_search_responses(root, strategy, limit, responses))
}
/// Federated symbol search across every scoped `.tsift/indexes/<scope>/index.db`
/// in the workspace. Per-scope tagpath annotation runs inside the per-scope
/// loop so each scope's adapter resolves against its own `.naming.toml` /
/// `.naming/index.json` (the workspace root usually has no tagpath of its
/// own). The merged `TagpathAnnotationDiagnostic` reports `loaded=true` when
/// at least one scope loaded, and `stale=true` with the first stale reason
/// when any scope was stale.
pub(crate) fn federated_symbol_search(
root: &std::path::Path,
query: &str,
limit: usize,
tagpath_opts: &TagpathSearchOpts,
) -> Result<(Vec<index::SymbolHit>, TagpathAnnotationDiagnostic)> {
let cfg = config::Config::load(root)?;
let submodules = config::Config::submodule_dirs(root)?;
let mut all_hits: Vec<index::SymbolHit> = Vec::new();
let mut combined = TagpathAnnotationDiagnostic::default();
for scope in &submodules {
if !cfg.federation_for_scope(scope) {
continue;
}
let db_path = cfg.db_path_for(root, &scope.id);
if !db_path.exists() {
continue;
}
let db = index::IndexDb::open_read_only(&db_path)?;
let mut hits = db.symbol_search(query, limit)?;
let diag = annotate_hits_with_tagpath(&mut hits, &scope.source_root, tagpath_opts)?;
combined.loaded |= diag.loaded;
if diag.stale && !combined.stale {
combined.stale = true;
combined.reason = diag.reason;
}
all_hits.append(&mut hits);
}
all_hits.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
all_hits.truncate(limit);
Ok((all_hits, combined))
}
#[derive(Debug, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
enum RipgrepJsonEvent {
Match {
data: RipgrepMatchData,
},
#[serde(other)]
Other,
}
#[derive(Debug, Deserialize)]
struct RipgrepMatchData {
path: RipgrepTextField,
lines: RipgrepTextField,
line_number: Option<usize>,
}
#[derive(Debug, Deserialize)]
struct RipgrepTextField {
text: Option<String>,
}
pub(crate) fn federated_exact_search(
root: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
) -> Result<sift::SearchResponse> {
let cfg = config::Config::load(root)?;
let mut responses = Vec::new();
for scope in config::Config::submodule_dirs(root)? {
if !cfg.federation_for_scope(&scope) {
continue;
}
let mut response =
run_exact_search_with_timeout(&scope.source_root, query, limit, timeout_secs)?;
absolutize_search_hit_paths(&mut response, &scope.source_root);
response.root = root.display().to_string();
responses.push(response);
}
Ok(merge_search_responses(root, "exact", limit, responses))
}
pub(crate) fn run_sift_search(
search_path: &Path,
cache_dir: &Path,
query: &str,
limit: usize,
strategy: &str,
) -> Result<sift::SearchResponse> {
let engine = Sift::builder().with_cache_dir(cache_dir).build();
let options = SearchOptions::default()
.with_limit(limit)
.with_strategy(strategy.to_string());
let input = SearchInput::new(search_path, query).with_options(options);
engine.search(input).context("sift search failed")
}
fn exact_search_timeout_message(timeout_secs: u64) -> String {
format!(
"tsift search timed out after {}s (strategy: exact). \
Re-run with `--timeout 0` to disable the timeout or narrow `--path` / `--scope`.",
timeout_secs
)
}
fn exact_search_command(search_path: &Path, query: &str) -> Command {
let mut command = Command::new("rg");
command
.arg("--json")
.arg("--fixed-strings")
.arg("--line-number")
.arg("--hidden")
.arg("--")
.arg(query)
.arg(search_path);
command
}
fn exact_search_file_timestamp(path: &Path) -> sift::ArtifactFreshness {
let observed_unix_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
let modified_unix_secs = fs::metadata(path)
.ok()
.and_then(|metadata| metadata.modified().ok())
.and_then(|modified| modified.duration_since(UNIX_EPOCH).ok())
.map(|duration| duration.as_secs() as i64);
sift::ArtifactFreshness {
observed_unix_secs,
modified_unix_secs,
}
}
fn parse_exact_search_output(
search_path: &Path,
limit: usize,
raw: &str,
) -> Result<sift::SearchResponse> {
if limit == 0 {
return Ok(sift::SearchResponse {
strategy: "exact".to_string(),
root: search_path.display().to_string(),
indexed_artifacts: 0,
skipped_artifacts: 0,
coverage: empty_search_coverage(),
hits: Vec::new(),
});
}
let mut hits = Vec::new();
for line in raw.lines() {
let event: RipgrepJsonEvent =
serde_json::from_str(line).context("parsing ripgrep exact-search output")?;
let RipgrepJsonEvent::Match { data } = event else {
continue;
};
let Some(path_text) = data.path.text else {
continue;
};
let Some(lines_text) = data.lines.text else {
continue;
};
let path = PathBuf::from(path_text);
let snippet = lines_text.trim_end_matches(['\r', '\n']).to_string();
let rank = hits.len() + 1;
hits.push(sift::SearchHit {
artifact_id: format!(
"exact:{}:{}:{}",
path.display(),
data.line_number.unwrap_or(0),
rank
),
artifact_kind: sift::ContextArtifactKind::File,
path: path.display().to_string(),
rank,
score: (limit.saturating_sub(rank).saturating_add(1)) as f64,
confidence: sift::ScoreConfidence::High,
location: data.line_number.map(|line| format!("line {}", line)),
snippet: snippet.clone(),
provenance: sift::ArtifactProvenance {
adapter: sift::AcquisitionAdapterKind::FileSystem,
source: "ripgrep -F".to_string(),
synthetic: false,
},
freshness: exact_search_file_timestamp(&path),
budget: sift::ArtifactBudget::from_text(&snippet, 1),
});
if hits.len() >= limit {
break;
}
}
Ok(sift::SearchResponse {
strategy: "exact".to_string(),
root: search_path.display().to_string(),
indexed_artifacts: hits.len(),
skipped_artifacts: 0,
coverage: empty_search_coverage(),
hits,
})
}
fn exact_search_response_from_process(
search_path: &Path,
limit: usize,
status: std::process::ExitStatus,
stdout: &[u8],
stderr: &[u8],
) -> Result<sift::SearchResponse> {
if !status.success() && status.code() != Some(1) {
let message = String::from_utf8_lossy(stderr);
let trimmed = message.trim();
if trimmed.is_empty() {
bail!("ripgrep exact search exited with status {}", status);
}
bail!("{}", trimmed);
}
let raw = String::from_utf8(stdout.to_vec()).context("decoding ripgrep exact-search output")?;
parse_exact_search_output(search_path, limit, &raw)
}
fn run_exact_search(search_path: &Path, query: &str, limit: usize) -> Result<sift::SearchResponse> {
let output = exact_search_command(search_path, query)
.output()
.context("running exact search with ripgrep")?;
exact_search_response_from_process(
search_path,
limit,
output.status,
&output.stdout,
&output.stderr,
)
}
pub(crate) fn run_exact_search_with_timeout(
search_path: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
) -> Result<sift::SearchResponse> {
if timeout_secs == 0 {
return run_exact_search(search_path, query, limit);
}
let mut child = exact_search_command(search_path, query)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("spawning timed exact search worker")?;
let timeout = Duration::from_secs(timeout_secs);
let status = wait_for_child_exit(&mut child, timeout)
.context("waiting for timed exact search worker")?;
if status.is_none() {
let _ = child.kill();
let _ = child.wait();
bail!("{}", exact_search_timeout_message(timeout_secs));
}
let status = status.unwrap();
let stdout = read_child_stdout(&mut child)?;
let stderr = read_child_stderr(&mut child)?;
exact_search_response_from_process(
search_path,
limit,
status,
stdout.as_bytes(),
stderr.as_bytes(),
)
}
pub(crate) fn run_search_with_timeout(
search_path: &Path,
cache_dir: &Path,
query: &str,
limit: usize,
timeout_secs: u64,
strategy: &str,
search_targets: &[SearchIndexTarget],
) -> Result<sift::SearchResponse> {
if timeout_secs == 0 {
return run_sift_search(search_path, cache_dir, query, limit, strategy);
}
let output_path = next_search_worker_output_path();
let mut child = Command::new(
std::env::current_exe().context("resolving tsift executable for timed search")?,
)
.arg("__search-worker")
.arg("--path")
.arg(search_path)
.arg("--cache-dir")
.arg(cache_dir)
.arg("--query")
.arg(query)
.arg("--limit")
.arg(limit.to_string())
.arg("--strategy")
.arg(strategy)
.arg("--output")
.arg(&output_path)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::piped())
.spawn()
.context("spawning timed sift search worker")?;
let timeout = Duration::from_secs(timeout_secs);
let status =
wait_for_child_exit(&mut child, timeout).context("waiting for timed sift search worker")?;
if status.is_none() {
let _ = child.kill();
let _ = child.wait();
let _ = fs::remove_file(&output_path);
bail!(
"{}",
search_timeout_message(timeout_secs, strategy, search_targets)?
);
}
let status = status.unwrap();
let stderr = read_child_stderr(&mut child)?;
if !status.success() {
let _ = fs::remove_file(&output_path);
let message = stderr.trim();
if message.is_empty() {
bail!("sift search worker exited with status {}", status);
}
bail!("{}", message);
}
let raw = fs::read_to_string(&output_path)
.with_context(|| format!("reading search worker output: {}", output_path.display()))?;
let _ = fs::remove_file(&output_path);
serde_json::from_str(&raw).context("parsing search worker output")
}
fn next_search_worker_output_path() -> PathBuf {
let stamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
std::env::temp_dir().join(format!(
"tsift-search-{}-{}.json",
std::process::id(),
stamp
))
}
fn wait_for_child_exit(
child: &mut std::process::Child,
timeout: Duration,
) -> Result<Option<std::process::ExitStatus>> {
let started = Instant::now();
loop {
if let Some(status) = child.try_wait()? {
return Ok(Some(status));
}
if started.elapsed() >= timeout {
return Ok(None);
}
let remaining = timeout.saturating_sub(started.elapsed());
std::thread::sleep(remaining.min(Duration::from_millis(10)));
}
}
fn read_child_stderr(child: &mut std::process::Child) -> Result<String> {
let mut stderr = String::new();
if let Some(mut pipe) = child.stderr.take() {
pipe.read_to_string(&mut stderr)
.context("reading search worker stderr")?;
}
Ok(stderr)
}
fn read_child_stdout(child: &mut std::process::Child) -> Result<String> {
let mut stdout = String::new();
if let Some(mut pipe) = child.stdout.take() {
pipe.read_to_string(&mut stdout)
.context("reading search worker stdout")?;
}
Ok(stdout)
}
pub(crate) fn maybe_apply_search_worker_test_hooks() -> Result<()> {
if let Ok(path) = std::env::var("TSIFT_TEST_SEARCH_WORKER_PID_FILE") {
fs::write(&path, std::process::id().to_string())
.with_context(|| format!("writing search worker pid file: {path}"))?;
}
if let Ok(ms) = std::env::var("TSIFT_TEST_SEARCH_WORKER_SLEEP_MS") {
let delay_ms = ms
.parse::<u64>()
.with_context(|| format!("parsing TSIFT_TEST_SEARCH_WORKER_SLEEP_MS={ms}"))?;
std::thread::sleep(Duration::from_millis(delay_ms));
}
Ok(())
}
#[cfg(test)]
thread_local! {
static SEARCH_POST_PRECHECK_LOCK_HOOK: RefCell<Option<SearchPostPrecheckLockHook>> = const { RefCell::new(None) };
}
#[cfg(test)]
enum SearchPostPrecheckLockMode {
RollbackJournal,
Wal,
}
#[cfg(test)]
struct SearchPostPrecheckLockHook {
db_path: PathBuf,
mode: SearchPostPrecheckLockMode,
}
#[cfg(test)]
struct SearchPostPrecheckLockGuard;
#[cfg(test)]
impl Drop for SearchPostPrecheckLockGuard {
fn drop(&mut self) {
SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
hook.borrow_mut().take();
});
}
}
#[cfg(test)]
fn install_search_post_precheck_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::RollbackJournal)
}
#[cfg(test)]
fn install_search_post_precheck_wal_lock(db_path: PathBuf) -> SearchPostPrecheckLockGuard {
install_search_post_precheck_lock_hook(db_path, SearchPostPrecheckLockMode::Wal)
}
#[cfg(test)]
fn install_search_post_precheck_lock_hook(
db_path: PathBuf,
mode: SearchPostPrecheckLockMode,
) -> SearchPostPrecheckLockGuard {
SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| {
assert!(
hook.borrow().is_none(),
"search post-precheck lock hook already installed"
);
*hook.borrow_mut() = Some(SearchPostPrecheckLockHook { db_path, mode });
});
SearchPostPrecheckLockGuard
}
#[cfg(test)]
pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
let Some(hook) = SEARCH_POST_PRECHECK_LOCK_HOOK.with(|hook| hook.borrow_mut().take()) else {
return Ok(());
};
let (ready_tx, ready_rx) = std::sync::mpsc::sync_channel(1);
std::thread::spawn(move || {
let conn = Connection::open(&hook.db_path).expect("opening db for search lock hook");
match hook.mode {
SearchPostPrecheckLockMode::RollbackJournal => {
conn.execute_batch("PRAGMA journal_mode=DELETE; BEGIN EXCLUSIVE;")
.expect("acquiring rollback-journal hook lock");
fs::write(substrate::rollback_journal_path(&hook.db_path), "locked")
.expect("writing rollback journal marker");
}
SearchPostPrecheckLockMode::Wal => {
conn.execute_batch(
"PRAGMA journal_mode=WAL;
PRAGMA wal_autocheckpoint=0;
CREATE TABLE IF NOT EXISTS search_wal_lock_probe (id INTEGER PRIMARY KEY);
INSERT INTO search_wal_lock_probe DEFAULT VALUES;
PRAGMA locking_mode=EXCLUSIVE;
BEGIN EXCLUSIVE;",
)
.expect("acquiring WAL hook lock");
assert!(substrate::wal_sidecar_path(&hook.db_path).exists());
}
}
ready_tx.send(()).expect("signaling search lock hook");
std::thread::sleep(Duration::from_millis(200));
drop(conn);
let _ = fs::remove_file(substrate::rollback_journal_path(&hook.db_path));
});
ready_rx
.recv_timeout(Duration::from_secs(1))
.context("waiting for search post-precheck lock hook")?;
Ok(())
}
#[cfg(not(test))]
pub(crate) fn maybe_apply_search_post_precheck_test_hooks() -> Result<()> {
Ok(())
}